<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Table-1-/-whole-sample-over-the-years" data-toc-modified-id="Table-1-/-whole-sample-over-the-years-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Table 1 / whole sample over the years</a></span></li><li><span><a href="#Table-2-/-Over-the-years" data-toc-modified-id="Table-2-/-Over-the-years-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Table 2 / Over the years</a></span></li><li><span><a href="#Distribution-per-year" data-toc-modified-id="Distribution-per-year-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Distribution per year</a></span></li><li><span><a href="#Average-retun-per-year" data-toc-modified-id="Average-retun-per-year-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Average retun per year</a></span></li><li><span><a href="#Average-std-per-year" data-toc-modified-id="Average-std-per-year-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Average std per year</a></span></li></ul></div>

# Analysis of selected funds

In [None]:
import feather
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Convert to date format
begin_date = '2010-01-01' 
end_date   = '2019-12-31'

In [None]:
path = '../data/processed/row_info.feather'
info_df = feather.read_dataframe(path)

path = '../data/processed/returns.feather'
returns = feather.read_dataframe(path)

In [None]:
info_df = info_df.query('report_dt >= @begin_date and report_dt <= @end_date')
returns = returns.query('caldt >= @begin_date and caldt <= @end_date')
returns = returns.assign(year = returns['caldt'].dt.year)

## Table 1 / whole sample over the years

In [None]:
returns['mret'].kurt()

In [None]:
columns = ['Number of funds', 'Percent of funds', 
           'Return', 'Standard deviation', 
           'Min','Max','Skewness','Kurtosis']

table_1 = pd.DataFrame(columns=columns)

unique_funds = returns['crsp_fundno'].nunique()
percent = lambda x: x.nunique() / unique_funds * 100
annual_ret = lambda x: x.mean() * 12 * 100
annual_std = lambda x: x.std() * np.sqrt(12) * 100
quant_05 = lambda x: x.quantile([0.05])
quant_95 = lambda x: x.quantile([0.95])
kurt = lambda x: x.kurt()


temp = (returns
    .groupby([
         returns['lipper_class']
     ])
     .agg({'crsp_fundno' : ['nunique',percent],
           'mret' : [annual_ret, annual_std,
                     'min', 'max',
                     'skew', kurt]})
)

temp.index.name = 'Lipper class'
temp.columns = columns

temp.index = temp.index.astype(str)

temp_all = (returns
            .groupby(lambda _ : True)
            .aggregate(
                {'crsp_fundno' : ['nunique',percent],
                 'mret'        : [annual_ret, annual_std,
                            'min', 'max',
                            'skew', kurt]})
)

temp_all.columns = temp.columns
temp_all.index = ['Average']

temp = pd.concat([temp,temp_all])
temp.index.name = 'Lipper class'

temp = round(temp,2)

In [None]:
temp

In [None]:
print(temp.to_latex(index = True,
                    index_names = False,
                    bold_rows = True)
     )

## Table 2 / Over the years

In [None]:
columns = ['Number of funds', 'Percent of funds', 
           'Return', 'Standard deviation', 
           'Min','Max','Skewness','Kurtosis']

table_1 = pd.DataFrame(columns=columns)

unique_funds = returns['crsp_fundno'].nunique()
percent = lambda x: x.nunique() / unique_funds * 100
annual_ret = lambda x: x.mean() * 12 * 100
annual_std = lambda x: x.std() * np.sqrt(12) * 100
quant_05 = lambda x: x.quantile([0.05])
quant_95 = lambda x: x.quantile([0.95])
kurt = lambda x: x.kurt()


temp = (returns
    .groupby([
         returns['year']
     ])
     .agg({'crsp_fundno' : ['nunique',percent],
           'mret' : [annual_ret, annual_std,
                     'min', 'max',
                     'skew', kurt]})
)

temp.index.name = 'Year'
temp.columns = columns

temp.index = temp.index.astype(str)

temp_all = (returns
            .groupby(lambda _ : True)
            .aggregate(
                {'crsp_fundno' : ['nunique',percent],
                 'mret'        : [annual_ret, annual_std,
                            'min', 'max',
                            'skew', kurt]})
)

temp_all.columns = temp.columns
temp_all.index = ['Average']

temp = pd.concat([temp,temp_all])
temp.index.name = 'Year'

temp = round(temp,2)

In [None]:
temp

In [None]:
print(temp.to_latex(index = True,
                    index_names = False,
                    bold_rows = True)
     )

## Distribution per year

In [None]:
# Analysis of selected funds

def percent_per_year(info_df,column):
    
    info_df = info_df
    
    temp = (info_df['crsp_portno']
        .groupby([
             info_df['report_dt'].dt.year,
             info_df[column]
         ])
         .nunique()
         .reset_index()
         .pivot(columns=column,values='crsp_portno',index='report_dt')
    )

    temp.columns = temp.columns.astype(str)
    temp['total'] = np.sum(temp,axis=1)
    temp = temp.apply(lambda x: x / temp['total'] * 100)

    return(round(temp,2))

In [None]:
percent_per_year(info_df,'lipper_class')

In [None]:
percent_per_year(info_df,'cap_class')

In [None]:
percent_per_year(info_df,'style_class')

## Average retun per year

In [None]:
# Analysis of selected funds

def average_return_per_year(returns,column):
    
    returns = returns
    
    temp = (returns.groupby(['year',column])
                .mean()
                .reset_index()
                .pivot(columns=column,values='mret',index='year')
            )
    
    temp.columns = temp.columns.astype(str)

    temp['all'] = (returns.groupby(['year'])
                .mean()
                .drop(columns='crsp_fundno')
            )

    temp = temp.applymap(lambda x: (x+1) ** 12 - 1)
        
    return(round(temp,4) * 100)

In [None]:
average_return_per_year(returns,'lipper_class')

In [None]:
average_return_per_year(returns,'cap_class')

In [None]:
average_return_per_year(returns,'style_class')

## Average std per year

In [None]:
# Analysis of selected funds

def average_std_per_year(returns,column):
    
    returns = returns
    
    temp = (returns.groupby(['year',column])
                .std()
                .reset_index()
                .pivot(columns=column,values='mret',index='year')
            )
        
    temp.columns = temp.columns.astype(str)

    temp['all'] = (returns.groupby(['year'])
                .std()
                .drop(columns='crsp_fundno')
            )
    
    temp = temp.applymap(lambda x: (x+1) ** np.sqrt(12) - 1)
    
    return(round(temp,4) * 100)

In [None]:
average_std_per_year(returns,'lipper_class')

In [None]:
average_std_per_year(returns,'cap_class')

In [None]:
average_std_per_year(returns,'style_class')