<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Setup" data-toc-modified-id="Setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#Number-of-unique-port_nos-and-fund_nos" data-toc-modified-id="Number-of-unique-port_nos-and-fund_nos-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Number of unique port_nos and fund_nos</a></span></li><li><span><a href="#Number-of-unique-funds-per-year" data-toc-modified-id="Number-of-unique-funds-per-year-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Number of unique funds per year</a></span></li><li><span><a href="#Distribution-by-number-of-observations-per-fund" data-toc-modified-id="Distribution-by-number-of-observations-per-fund-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Distribution by number of observations per fund</a></span></li><li><span><a href="#Distribution-by-lipper_class" data-toc-modified-id="Distribution-by-lipper_class-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Distribution by lipper_class</a></span></li><li><span><a href="#How-often-do-lipper_classes-change?" data-toc-modified-id="How-often-do-lipper_classes-change?-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>How often do lipper_classes change?</a></span><ul class="toc-item"><li><span><a href="#Fund-names" data-toc-modified-id="Fund-names-6.1"><span class="toc-item-num">6.1&nbsp;&nbsp;</span>Fund names</a></span></li></ul></li></ul></div>

# Analysis of selected funds

## Setup

In [None]:
import feather
import pickle
import pandas as pd
import numpy as np
from scipy import sparse

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
path = '../data/processed/full.pickle'
pickle_off = open(path,"rb")
dict_all_years = pickle.load(pickle_off)
dict_year = dict_all_years[2018]

In [None]:
row_info = dict_all_years['full']

## Number of unique port_nos and fund_nos

In [None]:
row_info['report_dt'].describe()

In [None]:
print(row_info['crsp_portno'].nunique())

## Number of unique funds per year

In [None]:
# Analysis of selected funds
sns.set()
## Distribution by date(row_info["report_dt"]
(row_info["crsp_portno"]
    .groupby([
         row_info["report_dt"].dt.year,
         row_info["report_dt"].dt.month
     ])
     .nunique()
     .plot(kind="bar",
           color = 'b',
           figsize=(10,5)))

## Distribution by number of observations per fund 

In [None]:
(row_info["crsp_portno"]
     .groupby([
         row_info["crsp_portno"],
     ])
     .count()
     .plot(kind="hist",
           bins = 10,
           color = 'b',
           figsize=(10,5)))

## Distribution by lipper_class

In [None]:
(row_info['crsp_portno']
     .groupby([
         row_info["lipper_class"],
     ])
     .count()
#     .sort_values()
     .plot(kind="barh",
           color = 'b',
           figsize=(20,5)))

In [None]:
(row_info['crsp_portno']
     .groupby([
         row_info['cap_class'],
     ])
     .count()
     .plot(kind='barh',
           color = 'b',
           figsize=(10,5)))

In [None]:
(row_info['crsp_portno']
     .groupby([
         row_info['style_class'],
     ])
     .count()
     .plot(kind='barh',
           color = 'b',
           figsize=(10,5)))

## How often do lipper_classes change?


In [None]:
x = row_info.sample(1)['crsp_fundno'].values[0]

row_info.query('crsp_fundno == @x')

In [None]:
x = row_info.sample(1)['crsp_fundno'].values[0]
x = 6642
row_info.query('crsp_fundno == @x')

In [None]:
obj_per_portno = row_info[['crsp_fundno','lipper_class']]
ax = (obj_per_portno['lipper_class']
    .groupby([
        obj_per_portno['crsp_fundno']
    ])
    .nunique()
    .value_counts()
    .sort_values()
    .plot(kind='barh',
          color = 'b',
          figsize=(18,5),
          title='Number of unique objective codes per crsp_portno')
)

In [None]:
lagged_class = row_info[['crsp_fundno','report_dt','lipper_class']]
lagged_class = lagged_class.sort_values(by=['crsp_fundno','report_dt'])
lagged_class = lagged_class.reset_index(drop = True)

lagged_class = lagged_class.assign(lag_lipper_class = lagged_class.lipper_class.shift())
lagged_class = lagged_class.dropna()

mask = lagged_class.groupby(by = 'crsp_fundno').head(1).index

lagged_class = lagged_class.loc[~lagged_class.index.isin(mask)]

In [None]:
switch = round(pd.crosstab(lagged_class.lipper_class, lagged_class.lag_lipper_class, normalize = 'columns') * 100,2)
switch

In [None]:
lagged_class = row_info[['crsp_fundno','report_dt','style_class']]
lagged_class = lagged_class.sort_values(by=['crsp_fundno','report_dt'])
lagged_class = lagged_class.reset_index(drop = True)

lagged_class = lagged_class.assign(lag_style_class = lagged_class.style_class.shift())
lagged_class = lagged_class.dropna()

mask = lagged_class.groupby(by = 'crsp_fundno').head(1).index

lagged_class = lagged_class.loc[~lagged_class.index.isin(mask)]

lag_style = pd.crosstab(lagged_class.style_class, lagged_class.lag_style_class)
lag_style

In [None]:
print(lag_style.to_latex(index = True,
                    index_names = False,
                    bold_rows = True)
     )

In [None]:
lagged_class = row_info[['crsp_fundno','report_dt','cap_class']]
lagged_class = lagged_class.sort_values(by=['crsp_fundno','report_dt'])
lagged_class = lagged_class.reset_index(drop = True)

lagged_class = lagged_class.assign(lag_cap_class = lagged_class.cap_class.shift())
lagged_class = lagged_class.dropna()

mask = lagged_class.groupby(by = 'crsp_fundno').head(1).index

lagged_class = lagged_class.loc[~lagged_class.index.isin(mask)]

lag_cap = pd.crosstab(lagged_class.cap_class, lagged_class.lag_cap_class)
lag_cap

In [None]:
print(lag_cap.to_latex(index = True,
                    index_names = False,
                    bold_rows = False)
     )

### Fund names