<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Setup" data-toc-modified-id="Setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#Table-1-/-whole-sample-over-the-years" data-toc-modified-id="Table-1-/-whole-sample-over-the-years-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Table 1 / whole sample over the years</a></span></li><li><span><a href="#Table-2-/-Over-the-years" data-toc-modified-id="Table-2-/-Over-the-years-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Table 2 / Over the years</a></span></li><li><span><a href="#Distribution-per-year" data-toc-modified-id="Distribution-per-year-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Distribution per year</a></span></li><li><span><a href="#Average-retun-per-year" data-toc-modified-id="Average-retun-per-year-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Average retun per year</a></span></li><li><span><a href="#Average-std-per-year" data-toc-modified-id="Average-std-per-year-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Average std per year</a></span></li><li><span><a href="#Old" data-toc-modified-id="Old-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Old</a></span></li></ul></div>

# Analysis of selected funds

## Setup

In [None]:
import feather
import pickle
import pandas as pd
import numpy as np
from scipy import sparse

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
path = '../data/processed/full.pickle'
pickle_off = open(path,"rb")
dict_all_years = pickle.load(pickle_off)
dict_year = dict_all_years[2018]

In [None]:
row_info = dict_all_years['full']
returns = dict_all_years['full_returns']

In [None]:
row_info['crsp_fundno'].nunique()

In [None]:
returns['crsp_fundno'].nunique()

## Table 1 / whole sample over the years

In [None]:
holdings_b = sparse.csr_matrix(holdings, copy=True)
holdings_b.data = np.ones(len(holdings.data))

boolean_sum = holdings_b.toarray().sum(1)
row_info['boolean_sum'] = boolean_sum

boolean_sum = row_info.groupby('lipper_class')[['boolean_sum']].mean()

boolean_sum.index = boolean_sum.index.astype(str)
boolean_sum.loc['Total',:] = boolean_sum.mean(axis=0)
boolean_sum = round(boolean_sum)

In [None]:
sum_tna = row_info.groupby(['report_dt','lipper_class']).agg({'tna_latest' : ['sum']})
sum_tna = sum_tna.groupby(['lipper_class']).mean()

sum_tna.index = sum_tna.index.astype(str)

sum_tna.loc['Total',:] = sum_tna.sum(axis=0)
sum_tna = round(sum_tna)

In [None]:
columns = ['Number of funds', 'Percent of funds', 
           'Return', 'Standard deviation', 
           'Min','Max','Skewness','Kurtosis']

table_1 = pd.DataFrame(columns=columns)

unique_funds = returns['crsp_fundno'].nunique()
percent = lambda x: x.nunique() / unique_funds * 100
annual_ret = lambda x: x.mean() * 12 * 100
annual_std = lambda x: x.std() * np.sqrt(12) * 100
quant_05 = lambda x: x.quantile([0.05])
quant_95 = lambda x: x.quantile([0.95])
kurt = lambda x: x.kurt()


temp = (returns
    .groupby([
         returns['lipper_class']
     ])
     .agg({'crsp_fundno' : ['nunique',percent],
           'mret' : [annual_ret, annual_std,
                     'min', 'max',
                     'skew', kurt]})
)

temp.index.name = 'Lipper class'
temp.columns = columns

temp.index = temp.index.astype(str)

temp_all = (returns
            .groupby(lambda _ : True)
            .aggregate(
                {'crsp_fundno' : ['nunique', percent],
                 'mret'        : [annual_ret, annual_std,
                            'min', 'max',
                            'skew', kurt]})
)

temp_all.columns = temp.columns
temp_all.index = ['Total']

temp = pd.concat([temp,temp_all])
temp['Sum TNA'] = sum_tna
#temp['No. holdings'] = boolean_sum
temp.index.name = 'Lipper class'
temp = temp.iloc[:,[0,1,8,2,3,4,5,6,7]]
temp = round(temp,2)
temp.T

In [None]:
print(temp.to_latex(index = True,
                    index_names = False,
                    bold_rows = False)
     )

## Table 2 / Over the years

In [None]:
columns = ['Number of funds', 'Percent of funds', 
           'Return', 'Standard deviation', 
           'Min','Max','Skewness','Kurtosis']

table_1 = pd.DataFrame(columns=columns)

unique_funds = returns['crsp_fundno'].nunique()
percent = lambda x: x.nunique() / unique_funds * 100
annual_ret = lambda x: x.mean() * 12 * 100
annual_std = lambda x: x.std() * np.sqrt(12) * 100
quant_05 = lambda x: x.quantile([0.05])
quant_95 = lambda x: x.quantile([0.95])
kurt = lambda x: x.kurt()


temp = (returns
    .groupby([
         returns['year']
     ])
     .agg({'crsp_fundno' : ['nunique',percent],
           'mret' : [annual_ret, annual_std,
                     'min', 'max',
                     'skew', kurt]})
)

temp.index.name = 'Year'
temp.columns = columns

temp.index = temp.index.astype(str)

temp_all = (returns
            .groupby(lambda _ : True)
            .aggregate(
                {'crsp_fundno' : ['nunique',percent],
                 'mret'        : [annual_ret, annual_std,
                            'min', 'max',
                            'skew', kurt]})
)

temp_all.columns = temp.columns
temp_all.index = ['Total']

temp = pd.concat([temp,temp_all])
temp.index.name = 'Year'

temp = round(temp,2)

sum_tna = row_info.groupby(['year']).agg({'tna_latest' : ['sum']})
sum_tna.index = sum_tna.index.astype(str)
sum_tna.loc['Total',:] = sum_tna.mean(axis=0)
sum_tna = round(sum_tna)

temp['Sum TNA'] = sum_tna

temp = temp.iloc[:,[0,1,8,2,3,4,5,6,7]]

In [None]:
temp

In [None]:
print(temp.to_latex(index = True,
                    index_names = False,
                    bold_rows = False)
     )

## Distribution per year

In [None]:
# Analysis of selected funds

def percent_per_year(row_info,column):
    
    row_info = row_info
    
    temp = (row_info['crsp_portno']
        .groupby([
             row_info['report_dt'].dt.year,
             row_info[column]
         ])
         .nunique()
         .reset_index()
         .pivot(columns=column,values='crsp_portno',index='report_dt')
    )

    temp.columns = temp.columns.astype(str)
    temp['total'] = np.sum(temp,axis=1)
    temp = temp.apply(lambda x: x / temp['total'] * 100)

    return(round(temp,2))

PAPER

In [None]:
percent_per_year(row_info,'lipper_class')

## Average retun per year

In [None]:
# Analysis of selected funds

def average_return_per_year(returns,column):
    
    returns = returns
    
    temp = (returns.groupby(['year',column])
                .mean()
                .reset_index()
                .pivot(columns=column,values='mret',index='year')
            )
    
    temp.columns = temp.columns.astype(str)

    temp['all'] = (returns.groupby(['year'])
                .mean()
                .drop(columns='crsp_fundno')
            )

    temp = temp.applymap(lambda x: (x+1) ** 12 - 1)
        
    return(round(temp,4) * 100)

In [None]:
average_return_per_year(returns,'lipper_class')

In [None]:
average_return_per_year(returns,'cap_class')

In [None]:
average_return_per_year(returns,'style_class')

## Average std per year

In [None]:
# Analysis of selected funds

def average_std_per_year(returns,column):
    
    returns = returns
    
    temp = (returns.groupby(['year',column])
                .std()
                .reset_index()
                .pivot(columns=column,values='mret',index='year')
            )
        
    temp.columns = temp.columns.astype(str)

    temp['all'] = (returns.groupby(['year'])
                .std()
                .drop(columns='crsp_fundno')
            )
    
    temp = temp.applymap(lambda x: (x+1) ** np.sqrt(12) - 1)
    
    return(round(temp,4) * 100)

In [None]:
average_std_per_year(returns,'lipper_class')

In [None]:
average_std_per_year(returns,'cap_class')

In [None]:
average_std_per_year(returns,'style_class')

## Old