<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Table-1-/-whole-sample-over-the-years" data-toc-modified-id="Table-1-/-whole-sample-over-the-years-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Table 1 / whole sample over the years</a></span></li><li><span><a href="#Table-2-/-Over-the-years" data-toc-modified-id="Table-2-/-Over-the-years-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Table 2 / Over the years</a></span></li><li><span><a href="#Distribution-per-year" data-toc-modified-id="Distribution-per-year-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Distribution per year</a></span></li><li><span><a href="#Average-retun-per-year" data-toc-modified-id="Average-retun-per-year-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Average retun per year</a></span></li><li><span><a href="#Average-std-per-year" data-toc-modified-id="Average-std-per-year-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Average std per year</a></span></li></ul></div>

# Analysis of selected funds

In [1]:
import feather
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Convert to date format
begin_date = '2010-01-01' 
end_date   = '2019-12-31'

In [3]:
path = '../data/processed/row_info.feather'
info_df = feather.read_dataframe(path)

path = '../data/processed/returns.feather'
returns = feather.read_dataframe(path)

In [4]:
info_df = info_df.query('report_dt >= @begin_date and report_dt <= @end_date')
returns = returns.query('caldt >= @begin_date and caldt <= @end_date')
returns = returns.assign(year = returns['caldt'].dt.year)

## Table 1 / whole sample over the years

In [5]:
returns['mret'].kurt()

4.238621791691928

In [6]:
columns = ['Number of funds', 'Percent of funds', 
           'Return', 'Standard deviation', 
           'Min','Max','Skewness','Kurtosis']

table_1 = pd.DataFrame(columns=columns)

unique_funds = returns['crsp_fundno'].nunique()
percent = lambda x: x.nunique() / unique_funds * 100
annual_ret = lambda x: x.mean() * 12 * 100
annual_std = lambda x: x.std() * np.sqrt(12) * 100
quant_05 = lambda x: x.quantile([0.05])
quant_95 = lambda x: x.quantile([0.95])
kurt = lambda x: x.kurt()


temp = (returns
    .groupby([
         returns['lipper_class']
     ])
     .agg({'crsp_fundno' : ['nunique',percent],
           'mret' : [annual_ret, annual_std,
                     'min', 'max',
                     'skew', kurt]})
)

temp.index.name = 'Lipper class'
temp.columns = columns

temp.index = temp.index.astype(str)

temp_all = (returns
            .groupby(lambda _ : True)
            .aggregate(
                {'crsp_fundno' : ['nunique',percent],
                 'mret'        : [annual_ret, annual_std,
                            'min', 'max',
                            'skew', kurt]})
)

temp_all.columns = temp.columns
temp_all.index = ['Average']

temp = pd.concat([temp,temp_all])
temp.index.name = 'Lipper class'

temp = round(temp,2)

In [7]:
temp

Unnamed: 0_level_0,Number of funds,Percent of funds,Return,Standard deviation,Min,Max,Skewness,Kurtosis
Lipper class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
EIEI,0,,,,,,,
LCVE,335,11.32,9.53,13.18,-0.13,0.15,-0.27,0.6
LCCE,589,19.91,10.51,12.81,-0.2,0.17,-0.28,0.72
LCGE,463,15.65,11.85,14.29,-0.16,0.17,-0.21,0.51
MLVE,271,9.16,9.19,14.08,-0.27,0.21,-0.33,1.38
MLCE,542,18.32,9.57,13.59,-0.32,0.26,-0.3,1.34
MLGE,415,14.03,11.04,14.62,-0.9,0.19,-0.75,11.07
MCVE,160,5.41,10.27,14.98,-0.17,0.19,-0.21,1.12
MCCE,283,9.56,9.89,14.85,-0.17,0.87,0.35,12.63
MCGE,278,9.4,11.29,15.56,-0.2,1.04,0.42,16.64


In [209]:
print(temp.to_latex(index = True,
                    index_names = False,
                    bold_rows = True)
     )

\begin{tabular}{lrrrrrrrr}
\toprule
{} &  Number of funds &  Percent of funds &  Return &  Standard deviation &   Min &   Max &  Skewness &  Kurtosis \\
\midrule
\textbf{EIEI   } &              221 &              6.69 &    8.67 &               11.56 & -0.22 &  0.16 &     -0.32 &      1.06 \\
\textbf{LCVE   } &              340 &             10.29 &    9.55 &               13.17 & -0.12 &  0.15 &     -0.28 &      0.61 \\
\textbf{LCCE   } &              607 &             18.37 &   10.50 &               12.78 & -0.20 &  0.17 &     -0.28 &      0.73 \\
\textbf{LCGE   } &              470 &             14.23 &   11.84 &               14.30 & -0.16 &  0.17 &     -0.21 &      0.51 \\
\textbf{MLVE   } &              280 &              8.47 &    9.11 &               14.14 & -0.27 &  0.21 &     -0.32 &      1.37 \\
\textbf{MLCE   } &              639 &             19.34 &    9.25 &               13.31 & -0.32 &  0.26 &     -0.30 &      1.34 \\
\textbf{MLGE   } &              435 &             13

## Table 2 / Over the years

In [9]:
columns = ['Number of funds', 'Percent of funds', 
           'Return', 'Standard deviation', 
           'Min','Max','Skewness','Kurtosis']

table_1 = pd.DataFrame(columns=columns)

unique_funds = returns['crsp_fundno'].nunique()
percent = lambda x: x.nunique() / unique_funds * 100
annual_ret = lambda x: x.mean() * 12 * 100
annual_std = lambda x: x.std() * np.sqrt(12) * 100
quant_05 = lambda x: x.quantile([0.05])
quant_95 = lambda x: x.quantile([0.95])
kurt = lambda x: x.kurt()


temp = (returns
    .groupby([
         returns['year']
     ])
     .agg({'crsp_fundno' : ['nunique',percent],
           'mret' : [annual_ret, annual_std,
                     'min', 'max',
                     'skew', kurt]})
)

temp.index.name = 'Year'
temp.columns = columns

temp.index = temp.index.astype(str)

temp_all = (returns
            .groupby(lambda _ : True)
            .aggregate(
                {'crsp_fundno' : ['nunique',percent],
                 'mret'        : [annual_ret, annual_std,
                            'min', 'max',
                            'skew', kurt]})
)

temp_all.columns = temp.columns
temp_all.index = ['Average']

temp = pd.concat([temp,temp_all])
temp.index.name = 'Year'

temp = round(temp,2)

In [10]:
temp

Unnamed: 0_level_0,Number of funds,Percent of funds,Return,Standard deviation,Min,Max,Skewness,Kurtosis
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010,2443,82.56,19.73,20.16,-0.16,0.22,-0.24,-1.03
2011,2491,84.18,-1.13,19.11,-0.2,0.26,0.5,1.21
2012,2437,82.36,14.53,12.22,-0.19,0.21,-0.9,1.17
2013,2382,80.5,30.43,9.47,-0.18,0.3,-0.33,0.26
2014,2337,78.98,8.13,11.17,-0.16,0.12,-0.27,-0.44
2015,2330,78.74,-1.02,13.53,-0.27,0.12,0.32,-0.2
2016,2300,77.73,11.38,14.06,-0.19,1.04,0.51,16.67
2017,2218,74.96,17.99,6.97,-0.9,0.22,-5.83,235.53
2018,2138,72.25,-6.73,16.85,-0.27,0.87,-0.49,4.54
Average,2959,100.0,10.5,14.75,-0.9,1.04,-0.22,4.24


In [8]:
print(temp.to_latex(index = True,
                    index_names = False,
                    bold_rows = True)
     )

\begin{tabular}{lrrrrrrrr}
\toprule
{} &  Number of funds &  Percent of funds &  Return &  Standard deviation &   Min &   Max &  Skewness &  Kurtosis \\
\midrule
\textbf{2010   } &             2443 &             82.56 &   19.73 &               20.16 & -0.16 &  0.22 &     -0.24 &     -1.03 \\
\textbf{2011   } &             2491 &             84.18 &   -1.13 &               19.11 & -0.20 &  0.26 &      0.50 &      1.21 \\
\textbf{2012   } &             2437 &             82.36 &   14.53 &               12.22 & -0.19 &  0.21 &     -0.90 &      1.17 \\
\textbf{2013   } &             2382 &             80.50 &   30.43 &                9.47 & -0.18 &  0.30 &     -0.33 &      0.26 \\
\textbf{2014   } &             2337 &             78.98 &    8.13 &               11.17 & -0.16 &  0.12 &     -0.27 &     -0.44 \\
\textbf{2015   } &             2330 &             78.74 &   -1.02 &               13.53 & -0.27 &  0.12 &      0.32 &     -0.20 \\
\textbf{2016   } &             2300 &             77

## Distribution per year

In [6]:
# Analysis of selected funds

def percent_per_year(info_df,column):
    
    info_df = info_df
    
    temp = (info_df['crsp_portno']
        .groupby([
             info_df['report_dt'].dt.year,
             info_df[column]
         ])
         .nunique()
         .reset_index()
         .pivot(columns=column,values='crsp_portno',index='report_dt')
    )

    temp.columns = temp.columns.astype(str)
    temp['total'] = np.sum(temp,axis=1)
    temp = temp.apply(lambda x: x / temp['total'] * 100)

    return(round(temp,2))

In [7]:
percent_per_year(info_df,'lipper_class')

lipper_class,EIEI,LCVE,LCCE,LCGE,MLVE,MLCE,MLGE,MCVE,MCCE,MCGE,SCVE,SCCE,SCGE,total
report_dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2010,4.34,6.87,13.96,13.03,4.34,13.24,7.63,2.7,5.86,7.13,3.37,9.91,7.59,100.0
2011,4.4,6.97,15.21,12.07,4.44,12.51,7.97,3.36,4.4,7.02,4.36,9.59,7.71,100.0
2012,4.99,7.05,14.62,11.69,4.08,12.12,7.91,2.84,5.2,6.96,4.34,9.8,8.38,100.0
2013,6.12,7.04,13.95,11.23,3.87,12.25,7.73,2.67,5.39,7.14,4.42,9.94,8.24,100.0
2014,6.66,7.44,12.78,11.27,4.43,11.05,8.12,2.42,5.02,7.35,4.61,10.41,8.44,100.0
2015,7.11,7.36,13.19,11.36,4.2,9.63,9.04,2.42,5.58,6.67,4.69,9.88,8.89,100.0
2016,6.8,7.27,12.79,11.31,4.52,9.89,8.65,2.47,5.47,6.65,4.61,10.79,8.79,100.0
2017,7.37,7.42,12.69,11.25,4.68,9.46,8.36,2.44,5.38,6.12,4.63,12.05,8.16,100.0
2018,7.28,7.87,10.45,11.09,5.64,9.75,7.97,2.82,5.79,6.19,4.21,12.52,8.42,100.0


In [217]:
percent_per_year(info_df,'cap_class')

cap_class,SC,MC,ML,EI,LC,total
report_dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010,20.88,15.69,25.22,4.34,33.87,100.0
2011,21.66,14.77,24.92,4.4,34.25,100.0
2012,22.53,15.0,24.12,4.99,33.36,100.0
2013,22.61,15.19,23.85,6.12,32.23,100.0
2014,23.46,14.79,23.6,6.66,31.49,100.0
2015,23.46,14.67,22.86,7.11,31.9,100.0
2016,24.19,14.59,23.05,6.8,31.37,100.0
2017,24.84,13.94,22.5,7.37,31.36,100.0
2018,25.15,14.8,23.37,7.28,29.41,100.0


In [215]:
percent_per_year(info_df,'style_class')

style_class,E,V,C,G,total
report_dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010,4.34,17.29,42.98,35.39,100.0
2011,4.4,19.13,41.7,34.77,100.0
2012,4.99,18.31,41.75,34.95,100.0
2013,6.12,18.0,41.53,34.35,100.0
2014,6.66,18.9,39.25,35.19,100.0
2015,7.11,18.67,38.27,35.95,100.0
2016,6.8,18.87,38.93,35.41,100.0
2017,7.37,19.16,39.57,33.9,100.0
2018,7.28,20.54,38.51,33.66,100.0


## Average retun per year

In [216]:
# Analysis of selected funds

def average_return_per_year(returns,column):
    
    returns = returns
    
    temp = (returns.groupby(['year',column])
                .mean()
                .reset_index()
                .pivot(columns=column,values='mret',index='year')
            )
    
    temp.columns = temp.columns.astype(str)

    temp['all'] = (returns.groupby(['year'])
                .mean()
                .drop(columns='crsp_fundno')
            )

    temp = temp.applymap(lambda x: (x+1) ** 12 - 1)
        
    return(round(temp,4) * 100)

In [217]:
average_return_per_year(returns,'lipper_class')

lipper_class,EIEI,G,LCVE,LCCE,LCGE,MLVE,MLCE,MLGE,MCVE,MCCE,MCGE,SCVE,SCCE,SCGE,all
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010,16.59,,15.0,14.9,17.31,18.31,18.16,21.47,24.83,25.43,28.51,28.32,28.43,30.7,21.14
2011,3.1,,0.48,0.46,-0.88,-1.05,-1.52,-2.42,-1.02,-2.84,-2.63,-3.45,-1.45,-1.27,-1.05
2012,12.59,,16.05,14.93,16.49,15.79,15.46,14.92,16.96,15.45,14.85,16.23,15.87,14.4,15.37
2013,27.33,,33.05,31.48,34.35,33.26,30.93,34.08,35.69,35.28,35.89,36.38,37.41,43.07,34.14
2014,9.52,,10.75,11.72,10.47,9.77,8.26,9.58,9.52,8.43,7.4,3.19,4.68,3.42,8.37
2015,-3.29,,-3.22,0.02,6.31,-4.45,-1.71,2.87,-4.69,-4.02,-0.09,-6.74,-4.59,-0.83,-1.2
2016,14.03,,15.18,10.33,2.55,16.55,10.19,3.73,19.78,15.17,6.93,27.97,21.96,11.12,12.02
2017,15.18,,15.46,20.13,29.83,15.4,18.64,26.19,12.55,15.45,24.96,8.41,13.29,22.96,19.24
2018,-7.05,,-8.36,-3.91,0.55,-12.48,-7.62,-1.6,-15.62,-9.51,-4.02,-14.15,-11.81,-2.45,-6.58


In [218]:
average_return_per_year(returns,'cap_class')

cap_class,SC,MC,ML,EI,LC,all
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010,29.21,26.71,19.15,16.59,15.81,21.14
2011,-1.76,-2.4,-1.71,3.1,-0.03,-1.05
2012,15.43,15.52,15.35,12.59,15.71,15.37
2013,39.18,35.64,32.33,27.33,32.8,34.14
2014,3.94,8.17,8.94,9.52,11.08,8.37
2015,-3.72,-2.32,-0.75,-3.29,1.38,-1.2
2016,19.21,12.21,9.03,14.03,8.69,12.02
2017,15.58,18.96,20.55,15.18,22.27,19.24
2018,-9.18,-8.34,-6.57,-7.05,-3.46,-6.58


In [219]:
average_return_per_year(returns,'style_class')

style_class,E,V,C,G,all
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010,16.59,20.07,20.34,23.34,21.14
2011,3.1,-1.03,-1.0,-1.65,-1.05
2012,12.59,16.18,15.37,15.34,15.37
2013,27.33,34.31,33.15,36.58,34.14
2014,9.52,8.43,8.53,7.93,8.37
2015,-3.29,-4.6,-2.19,2.42,-1.2
2016,14.03,19.25,13.76,5.72,12.02
2017,15.18,13.32,17.19,26.27,19.24
2018,-7.05,-11.76,-8.0,-1.57,-6.58


## Average std per year

In [220]:
# Analysis of selected funds

def average_std_per_year(returns,column):
    
    returns = returns
    
    temp = (returns.groupby(['year',column])
                .std()
                .reset_index()
                .pivot(columns=column,values='mret',index='year')
            )
        
    temp.columns = temp.columns.astype(str)

    temp['all'] = (returns.groupby(['year'])
                .std()
                .drop(columns='crsp_fundno')
            )
    
    temp = temp.applymap(lambda x: (x+1) ** np.sqrt(12) - 1)
    
    return(round(temp,4) * 100)

In [221]:
average_std_per_year(returns,'lipper_class')

lipper_class,EIEI,G,LCVE,LCCE,LCGE,MLVE,MLCE,MLGE,MCVE,MCCE,MCGE,SCVE,SCCE,SCGE,all
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010,18.18,,19.89,19.77,21.46,20.96,20.05,21.51,21.46,21.43,21.9,24.6,23.55,23.79,21.4
2011,15.82,,18.12,17.26,17.97,20.15,18.68,19.65,22.55,21.17,21.71,24.26,24.1,24.24,20.16
2012,9.5,,11.44,11.19,13.8,12.68,11.66,13.6,12.05,12.26,13.68,13.16,13.05,14.48,12.57
2013,8.85,,9.08,8.83,8.93,9.84,9.12,9.18,10.48,10.17,9.44,11.49,11.57,10.56,9.72
2014,8.65,,8.4,8.61,10.62,9.44,9.47,11.31,11.15,11.66,13.22,14.3,14.05,16.1,11.37
2015,12.65,,14.32,13.72,14.66,13.51,13.09,13.83,13.03,13.16,13.66,14.37,14.47,16.24,14.03
2016,10.45,,12.34,10.99,12.64,13.88,12.25,13.61,15.13,14.58,17.82,18.68,17.23,18.47,14.42
2017,5.36,,5.81,4.53,5.6,6.26,5.93,8.86,6.92,6.0,5.51,11.58,8.23,8.08,6.99
2018,13.97,,15.01,14.94,18.07,16.59,15.22,18.42,16.97,18.29,19.11,18.43,18.83,22.75,17.51


In [222]:
average_std_per_year(returns,'cap_class')

cap_class,SC,MC,ML,EI,LC,all
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010,23.82,21.65,20.64,18.18,20.43,21.4
2011,24.18,21.68,19.24,15.82,17.7,20.16
2012,13.59,12.88,12.47,9.5,12.22,12.57
2013,11.22,9.91,9.27,8.85,8.92,9.72
2014,14.84,12.3,10.09,8.65,9.3,11.37
2015,15.11,13.39,13.42,12.65,14.24,14.03
2016,18.06,16.23,13.08,10.45,11.98,14.42
2017,9.01,6.11,7.18,5.36,5.41,6.99
2018,20.22,18.47,16.63,13.97,16.14,17.51


In [223]:
average_std_per_year(returns,'style_class')

style_class,E,V,C,G,all
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010,18.18,21.51,21.06,22.15,21.4
2011,15.82,20.81,19.92,20.61,20.16
2012,9.5,12.26,11.9,13.89,12.57
2013,8.85,10.1,9.79,9.52,9.72
2014,8.65,10.78,10.79,12.79,11.37
2015,12.65,13.98,13.65,14.69,14.03
2016,10.45,14.92,13.68,15.49,14.42
2017,5.36,7.85,6.33,7.23,6.99
2018,13.97,16.55,16.66,19.6,17.51
