# Analysis of monthly return data

Description:


## Data loading

In [None]:
import feather
import numpy as np
import pandas as pd

import datetime as dt
from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt

In [None]:
path = '../data/processed/returns.feather'
returns = feather.read_dataframe(path)
returns.query(''' caldt > '2007-01-01' ''', inplace=True)

In [None]:
path = '../data/processed/row_info.feather'
row_info = feather.read_dataframe(path)

In [None]:
path = '../data/processed/ff_factors.feather'
ff_all = feather.read_dataframe(path)

## Merge data

In [None]:
offset = pd.offsets.MonthBegin()
returns.loc[:,'Date'] = returns.caldt.apply(offset.rollforward)

# for checks
returns.loc[:,'diff'] = returns.caldt - returns.Date

returns_m = returns.merge(ff_all, how = 'left')

In [None]:
factors = ['MKT-RF','SMB','HML','MOM','RF']
returns_m[factors] = returns_m[factors] / 100
# Check if diff is small 
# Results from crsp being end of month and ff first day of month

In [None]:
returns_m.describe()

## Rolling regression

In [None]:
def regress(group_1):
        
    ''' Regress using all but the last row to fit and the last row to predict on  '''
    
    # Make copy first, if we modify an input of a function, otherwise we will always get the warning 
    result = group_1.copy()
    
    # Set up linear regession
    reg = LinearRegression()

    # Get number of total rows in this group
    n_rows = group_1.shape[0]

    # Divide the data into train and test sample (test is only the last observation)
    # Note python indexing does not include upper boundry
    data_train = group_1.iloc[:(n_rows - 1)]

    data_test  = group_1.iloc[[n_rows - 1], :] 

    reg.fit(data_train[['MKT-RF', 'SMB', 'HML', 'MOM']], data_train['mret'])

    # Since we use iloc we have to use 9 instead of prediction: 
    # Not good style since it is prone for future bugs.
    prediction = reg.predict(data_test.loc[:, ['MKT-RF', 'SMB', 'HML', 'MOM']])

    return(prediction)

# Function
def rolling_regress(data, window):
    
    group_1 = data.copy()
    predictions = group_1['prediction'].copy()
    
    n_rows = group_1.shape[0]
    
    for i in np.arange(window + 1, n_rows + 1):
    
        group_1_temp = group_1.copy()
        group_1_temp = group_1_temp.iloc[:i].tail(window+1)
        predictions.iloc[i-1] = regress(group_1_temp)
    
    group_1['prediction'] = predictions
    
    return(group_1)

def rolling_grouped_regress(data, window):
    # Has to optimized (parallized)
    
    
    # To store result_dfs
    results_list = []

    # For counter / percentage complete
    n_uniqued_groups = data['crsp_fundno'].nunique()
    count = 1

    # Groupby and loop over groups
    grouper = data.groupby('crsp_fundno')
    for i, group in grouper:
        
        # Rolling regress for group
        result_temp = rolling_regress(group, window)

        # Save result    
        results_list.append(result_temp)

        # Print percentage complete (Kann man auch löschen oder mit "if verbose == True: " als option hinzufügen)
        if count % 200 = 0:
            perc = count / n_uniqued_groups * 100
            print('Percentage complete: {:>5.2f}%'.format(perc))
        count += 1
        

    return_df = pd.concat(results_list)
    return(return_df)

In [None]:
start_time = dt.datetime.now()

returns_m.loc[:,'prediction'] = np.nan
returns_mf = rolling_grouped_regress(returns_m, 24)

end_time = dt.datetime.now()
print(end_time - start_time)

In [None]:
returns_mf.loc[:,'mret_adj'] = returns_mf.loc[:,'mret'] - returns_mf.loc[:,'prediction']

returns_mf.groupby('lipper_class').agg('mean') * 100

In [None]:
returns_mf = returns_mf[['crsp_fundno', 'caldt', 'mret', 'lipper_class', 'style_class',
                       'cap_class', 'mret_adj']]

In [None]:
path = '../data/processed/returns_ff.feather'
feather.write_dataframe(returns_mf,path)

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Data-loading" data-toc-modified-id="Data-loading-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Data loading</a></span></li><li><span><a href="#Merge-data" data-toc-modified-id="Merge-data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Merge data</a></span></li><li><span><a href="#Rolling-regression" data-toc-modified-id="Rolling-regression-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Rolling regression</a></span></li></ul></div>