# Analysis of monthly return data

Description:


In [7]:
import feather
import numpy as np
import pandas as pd

from scipy import sparse

import matplotlib.pyplot as plt

In [47]:
path = '../data/raw/holdings_co_info.feather'
holdings_co_info = feather.read_dataframe(path)
holdings_co_info = holdings_co_info.drop_duplicates()
holdings_co_info.shape

(1621421, 5)

In [48]:
path = '../data/processed/holdings_summary_s_s.feather'
summary = feather.read_dataframe(path)
summary.shape

(52540, 9)

In [72]:
path = '../data/processed/stock_map.feather'
stock_map = feather.read_dataframe(path)
stock_map.shape

(60533, 3)

In [50]:
path = '../data/processed/holdings_b_b.npz'
holdings = sparse.load_npz(path)
holdings.shape

(52540, 60533)

## Most common stocks

In [51]:
holdings_summary = pd.DataFrame(holdings.sum(0).T)
holdings_summary.columns = ['total']
holdings_summary.shape

(60533, 1)

In [68]:
holdings_df = pd.DataFrame(holdings.toarray())
holdings_df = holdings_df.groupby(by = summary['port_no']).tail(5)

In [70]:
holdings_df.shape

(12915, 60533)

## Merge Stock_map and Hold_co_info to get name of each security

In [73]:
stock_map.columns = ['crsp_company_key','nr','1']
stock_map = stock_map.drop(columns = '1')

In [74]:
holdings_co_info.sample()

Unnamed: 0,crsp_company_key,security_name,cusip,permno,permco
1257079,4537054.0,CALIFORNIA HIGHRIDGE APARTMENTS RANCHO PALOS V...,,,


In [75]:
stock_map_merged = stock_map.merge(holdings_co_info, how='left', left_on = 'crsp_company_key', right_on='crsp_company_key')

In [76]:
stock_map_merged.describe()

Unnamed: 0,crsp_company_key,nr,permno,permco
count,60533.0,60533.0,6023.0,6023.0
mean,4262814.0,447921.6,74379.897725,25601.904035
std,1365795.0,505161.6,23509.745818,15762.935972
min,3000001.0,0.0,4477.0,29.0
25%,3110521.0,48778.0,75710.0,13096.0
50%,4011241.0,296298.0,84180.0,20979.0
75%,4348222.0,685686.0,89150.5,41383.5
max,8780035.0,2379846.0,98965.0,56588.0


In [77]:
stock_map_merged = pd.concat([holdings_summary,stock_map_merged],axis=1)

In [86]:
stock_map_merged.loc[9650:9660]

Unnamed: 0,total,crsp_company_key,nr,security_name,cusip,permno,permco
9650,0.0,4123740,23209,FEDERAL NATIONAL MORTGAGE ASSOCIATION 01-FEB-2...,31412NYH,,
9651,4.0,4192325,23214,FEDERAL NATIONAL MORTGAGE ASSOCIATION 01-MAY-2...,31417UFC,,
9652,7495.0,3945041,23215,APPLE VY MINN ECONOMIC DEV AUTH HEALTH CARE RE...,03788AAU,,
9653,0.0,4076607,23216,PROMONTORIA 264 6.250% 15-AUG-2023,,,
9654,1.0,4185381,23217,,,,
9655,0.0,4192820,23225,,,,
9656,0.0,4018869,23226,CARDTRONICS PLC EQUITY SWAP,,,
9657,1.0,4193562,23228,,,,
9658,1.0,4206165,23229,FEDERAL NATIONAL MORTGAGE ASSOCIATION 01-JUL-2...,31416WL6,,
9659,8.0,4084769,23230,LEANDER TEX INDPT SCH DIST 0.000% 15-AUG-2041,521841VB,,


In [82]:
stock_map_merged.loc[stock_map_merged['security_name'].astype('str').str.contains('APPLE')]

Unnamed: 0,total,crsp_company_key,nr,security_name,cusip,permno,permco
1329,5.0,3007318,1350,NICHOLAS APPLEGATE CONV & INC FD,65370F10,89707.0,43985.0
2840,0.0,3000780,3625,APPLEBEES INTERNATIONAL INC,03789910,78717.0,28879.0
3896,62.0,3015687,4686,NICHOLAS APPLEGATE GROWTH EQ,65369820,,
7508,15126.0,4076247,15297,APPLE INC DISC COML PAPER 144A YRS 3&4 0.000% ...,03785ENN,,
8383,54.0,3007320,18054,NICHOLAS APPLEGATE EQ & CONV INC,65370K10,91757.0,51794.0
9147,15.0,3007319,21889,NICHOLAS APPLEGATE CON INC FD II,65370G10,89784.0,44187.0
9652,7495.0,3945041,23215,APPLE VY MINN ECONOMIC DEV AUTH HEALTH CARE RE...,03788AAU,,
10017,6.0,3007317,24941,NICHOLAS APPLEGATE INT & PREM ST,65370C10,90652.0,46548.0
21581,10.0,3023019,80046,MAUI LAND & PINEAPPLE CO,2572604,,
21796,0.0,3945042,82425,APPLE VY MINN ECONOMIC DEV AUTH HEALTH CARE RE...,03788AAW,,


In [79]:
stock_map_merged['total'].describe()

count    60533.000000
mean       158.718253
std        767.643389
min          0.000000
25%          0.000000
50%          0.000000
75%         10.000000
max      19701.000000
Name: total, dtype: float64

In [81]:
stock_map_merged.sort_values(by=['total'],ascending=False).head(100)

Unnamed: 0,total,crsp_company_key,nr,security_name,cusip,permno,permco
684,19701.0,3001062,685,B C E INC,05534B10,29647.0,20245.0
590,18392.0,3008890,591,REWARDS NETWORK INC,76155710,10404.0,8321.0
7508,15126.0,4076247,15297,APPLE INC DISC COML PAPER 144A YRS 3&4 0.000% ...,03785ENN,,
629,14663.0,3007520,630,NYER MEDICAL GROUP INC,67071110,77673.0,11568.0
720,14181.0,3014616,721,INDIAN RAYON & INDS INC,45435210,,
673,13909.0,3003276,674,E X X INC,26928220,81029.0,30485.0
773,13631.0,3000677,774,AMKOR TECHNOLOGY INC,03165210,86047.0,16080.0
9803,13252.0,6819469,23776,,,,
745,13163.0,3002370,746,COHEN & STEERS QLTY INCOME RLTY,19247L10,89298.0,42770.0
821,12901.0,3014867,822,JPMORGAN TR I,4812A278,,


## Distribution by date

In [None]:
# Analysis of selected funds

## Distribution by date(returns['report_dt']
(returns['crsp_fundno']
    .groupby([
         returns['caldt'].dt.year,
#         returns['caldt'].dt.month
     ])
     .count()
     .plot(kind='bar',
           color = 'b',
          figsize=(20,10)))

## Observations per Fund

In [None]:
# Analysis of selected funds

## Distribution by date(returns['report_dt']
a = (returns['crsp_fundno']
    .groupby([
         returns['crsp_fundno']
     ])
    .count()
)
a = a.groupby(a).count().cumsum().T

In [None]:
a.plot(kind='line',
       color = 'b',
       figsize=(20,10))

In [None]:
(returns.loc[returns['crsp_fundno'] == 27650,'mret']
    .plot(kind='bar',
          color = 'b',
          figsize=(20,10)))

## Map Obj codes to returns

In [None]:
summary.head().dtypes

In [None]:
returns.head().dtypes

In [None]:
returns_merged = returns.merge(summary, how='left', on=['crsp_fundno'])
returns_merged = returns_merged.dropna(axis = 0, how = 'any')

In [None]:
returns_merged.sample(10)

## Aggregate return

In [None]:
returns_merged.sample(5)

In [None]:
# Analysis of selected funds
return_class = returns_merged[returns_merged['crsp_obj_cd'] == 'EFSH']

In [None]:
return_class = (return_class['mret']
    .groupby([
        return_class['crsp_obj_cd'],
        return_class['caldt']
     ])
    .agg(['mean','count']))

In [None]:
return_class.loc[:,'cumulative'] = (return_class.loc[:,'mean'].fillna(0)+1).cumprod()

Cum return over time

In [None]:
return_class['mean'].plot(kind='line',
           color = 'b',
          figsize=(16,10))

Different from zero?

In [None]:
return_class.loc[:,['mean']].boxplot()

## Objective codes

In [None]:
returns_grouped = (returns_merged['cum']
    .groupby([
        returns_merged['crsp_obj_cd'],
#        returns_merged['caldt']
     ]))

In [None]:
returns_grouped = returns_grouped.describe()

In [None]:
returns_grouped['mean'].sort_values().plot(
            kind='bar',
            color = 'b',
            figsize=(16,10))