## 1. Import Relevant Packages

In [3]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

import statsmodels.api as sm

from statsmodels.regression.rolling import RollingOLS

from functools import reduce

## 2. Importing and preprocessing data

### CRSP data

In [4]:
# OBS: Change file to "LSTM file" when momentum strategy code is discussed 
crsp_m = pd.read_csv('NYSE_m.csv', low_memory=False)

In [5]:
# Make Python datetime format
crsp_m['date'] =  pd.to_datetime(crsp_m['date'])

In [6]:
# Make all column names lower case
crsp_m.columns = crsp_m.columns.str.lower()

In [7]:
# Change variable format to int
crsp_m[['permno','shrcd','exchcd']]=\
    crsp_m[['permno','shrcd','exchcd']].astype(int)

In [8]:
# DISCUSS below! ---> is this eligable?

# forcing errors to become NaN to convert column to int
crsp_m['ret'] = pd.to_numeric(crsp_m['ret'], errors='coerce') 
crsp_m['dlret'] = pd.to_numeric(crsp_m['dlret'], errors='coerce') 

# Fill-in delisting return as last return
crsp_m['ret'] = crsp_m['ret'].fillna(value=crsp_m['dlret'])

# fill in missing return with 0
crsp_m['ret'] = crsp_m['ret'].fillna(0)

In [9]:
sorted_crsp = crsp_m[['permno', 'date', 'ret']].sort_values(['permno','date']).set_index('date')

In [10]:
sorted_crsp.head(10)

Unnamed: 0_level_0,permno,ret
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1963-01-31,10006,0.047002
1963-02-28,10006,0.0387
1963-03-29,10006,-0.009009
1963-04-30,10006,0.084848
1963-05-31,10006,0.09162
1963-06-28,10006,-0.001289
1963-07-31,10006,-0.045161
1963-08-30,10006,0.121081
1963-09-30,10006,-0.07767
1963-10-31,10006,0.057895


In [11]:
# To aling with Fama French data we set index to month 
sorted_crsp.index = sorted_crsp.index.to_period('M')

### Fama French data

In [12]:
# Loading Fama French 3 Factors (only include rows with factors)
ff_3f = pd.read_csv('FF_3.csv', skiprows = 3, nrows=1134, index_col = 0)

In [13]:
# Get in decimal format 
ff_3f = ff_3f / 100

In [14]:
# Checking
ff_3f.head()

Unnamed: 0,Mkt-RF,SMB,HML,RF
192607,0.0296,-0.023,-0.0287,0.0022
192608,0.0264,-0.014,0.0419,0.0025
192609,0.0036,-0.0132,0.0001,0.0023
192610,-0.0324,0.0004,0.0051,0.0032
192611,0.0253,-0.002,-0.0035,0.0031


In [15]:
# Setting datetime format (in months)
ff_3f.index = pd.to_datetime(ff_3f.index, format= '%Y%m')
ff_3f.index = ff_3f.index.to_period('M')

In [16]:
# Ensuring no errors 
ff_3f['RF'] = pd.to_numeric(ff_3f['RF'], errors='coerce') 
ff_3f['Mkt-RF'] = pd.to_numeric(ff_3f['Mkt-RF'], errors='coerce') 

## 3. Merging data

In [19]:
# Merging stock data and 3 factors  
merged = pd.merge(sorted_crsp, ff_3f, how='inner', left_index=True, right_index=True)

In [20]:
# Calculating market return
merged['Mkt'] = merged['Mkt-RF'] + merged['RF']

In [21]:
# Calculating log and sum of log returns (cumulative market return) over 12 months
merged['Mkt_logret'] = np.log(1+merged['Mkt'])
mkt_cumret = merged.groupby(['permno'])['Mkt_logret'].rolling(12, min_periods=12).sum().reset_index().set_index('level_1')
mkt_cumret = mkt_cumret.rename(columns={'Mkt_logret':'log12M_MA'})

In [22]:
# Get non log
mkt_cumret['Mkt_12M_MA']=np.exp(mkt_cumret['log12M_MA'])-1

In [23]:
merged_ff = pd.merge(sorted_crsp, mkt_cumret[['permno','Mkt_12M_MA']], how='inner', on='permno')mkt_cumret.head(12)

Unnamed: 0_level_0,permno,log12M_MA,Mkt_12M_MA
level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1963-01,10006,,
1963-02,10006,,
1963-03,10006,,
1963-04,10006,,
1963-05,10006,,
1963-06,10006,,
1963-07,10006,,
1963-08,10006,,
1963-09,10006,,
1963-10,10006,,


In [None]:
merged = pd.merge(sorted_crsp, ff_3f, how='inner', left_index=True, right_index=True)
merged_ff = pd.merge(merged, mkt_cumret[['permno','Mkt_12M_MA']], how='inner', on='permno')

In [None]:
merged_ff

In [None]:
# Make into pivot format
# OBS: Possibly switch level 1 and 2
sorted_crsp_p = merged_ff.pivot(columns='permno', values=['ret', 'Mkt-RF','RF', 'Mkt_12M_MA'])

In [530]:
# OBS: Discuss
#sorted_crsp_p['ret'] = sorted_crsp_p['ret'].fillna(0)
#sorted_crsp_p['Mkt-RF'] = sorted_crsp_p['Mkt-RF'].fillna(0)
#sorted_crsp_p['RF'] = sorted_crsp_p['RF'].fillna(0)

## 4. Calculating alpha and beta

In [575]:
# Calculating y and x variable for CAPM regression

excess_return = sorted_crsp_p.ret - sorted_crsp_p.RF.values
market_return = sorted_crsp_p['Mkt-RF']

In [589]:
# Estimating alpha and beta

CAPM_dict = {}

for stock in excess_return.columns:
    y=excess_return[stock]
    X=sm.add_constant(market_return[stock])
    rols = RollingOLS(y, X, window=12, missing='drop') #don't include NaN values
    rres = rols.fit()
    
    CAPM_dict[stock] = rres.params

  s2 = ssr / (nobs - tot_params)
  llf = -np.log(ssr) * nobs2  # concentrated likelihood
  s2 = ssr / (nobs - tot_params)


In [624]:
alpha_beta = pd.DataFrame.from_dict({(i,j): CAPM_dict[i][j] 
                           for i in CAPM_dict.keys() 
                           for j in CAPM_dict[i].keys()})

In [626]:
#OBS: Fix format

alpha_beta.head(10)

Unnamed: 0_level_0,10006,10006,10007,10007,10012,10012,10014,10014,10028,10028,...,93418,93418,93420,93420,93422,93422,93423,93423,93426,93426
Unnamed: 0_level_1,const,10006,const,10007,const,10012,const,10014,const,10028,...,const,93418,const,93420,const,93422,const,93423,const,93426
1963-01,,,,,,,,,,,...,,,,,,,,,,
1963-02,,,,,,,,,,,...,,,,,,,,,,
1963-03,,,,,,,,,,,...,,,,,,,,,,
1963-04,,,,,,,,,,,...,,,,,,,,,,
1963-05,,,,,,,,,,,...,,,,,,,,,,
1963-06,0.031713,0.477729,,,,,-0.02664,0.797554,,,...,,,,,,,,,,
1963-07,0.017939,0.820379,,,,,-0.023178,0.5131,,,...,,,,,,,,,,
1963-08,0.003425,1.719351,,,,,-0.022838,0.761662,,,...,,,,,,,,,,
1963-09,-0.00225,2.328302,,,,,-0.023363,0.794942,,,...,,,,,,,,,,
1963-10,-0.000853,2.52031,,,,,-0.03353,0.789679,,,...,,,,,,,,,,


## 5. Calculating standard deviation

In [631]:
#Calculating 6M rolling stddev
std_6M = sorted_crsp_p.rolling(n, min_periods=n).std().rename(columns={'ret':'6M_std'})
std_6M

Unnamed: 0_level_0,6M_std,6M_std,6M_std,6M_std,6M_std,6M_std,6M_std,6M_std,6M_std,6M_std,...,RF,RF,RF,RF,RF,RF,RF,RF,RF,RF
permno,10006,10007,10012,10014,10028,10030,10051,10057,10058,10064,...,93374,93382,93384,93387,93398,93418,93420,93422,93423,93426
1963-01,,,,,,,,,,,...,,,,,,,,,,
1963-02,,,,,,,,,,,...,,,,,,,,,,
1963-03,,,,,,,,,,,...,,,,,,,,,,
1963-04,,,,,,,,,,,...,,,,,,,,,,
1963-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08,,,,,,,0.182537,,,,...,4.589844e-04,,,,,,,4.589844e-04,4.589844e-04,4.589844e-04
2020-09,,,,,,,0.141961,,,,...,4.082483e-05,,,,,,,4.082483e-05,4.082483e-05,4.082483e-05
2020-10,,,,,,,0.126922,,,,...,3.830258e-11,,,,,,,3.830258e-11,3.830258e-11,3.830258e-11
2020-11,,,,,,,0.176436,,,,...,3.830258e-11,,,,,,,3.830258e-11,3.830258e-11,3.830258e-11


In [632]:
#Calculating 12M rolling stddev
std_12M = sorted_crsp_p.rolling(n, min_periods=n).std().rename(columns={'ret':'12M_std'})
std_12M

Unnamed: 0_level_0,12M_std,12M_std,12M_std,12M_std,12M_std,12M_std,12M_std,12M_std,12M_std,12M_std,...,RF,RF,RF,RF,RF,RF,RF,RF,RF,RF
permno,10006,10007,10012,10014,10028,10030,10051,10057,10058,10064,...,93374,93382,93384,93387,93398,93418,93420,93422,93423,93426
1963-01,,,,,,,,,,,...,,,,,,,,,,
1963-02,,,,,,,,,,,...,,,,,,,,,,
1963-03,,,,,,,,,,,...,,,,,,,,,,
1963-04,,,,,,,,,,,...,,,,,,,,,,
1963-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08,,,,,,,0.182537,,,,...,4.589844e-04,,,,,,,4.589844e-04,4.589844e-04,4.589844e-04
2020-09,,,,,,,0.141961,,,,...,4.082483e-05,,,,,,,4.082483e-05,4.082483e-05,4.082483e-05
2020-10,,,,,,,0.126922,,,,...,3.830258e-11,,,,,,,3.830258e-11,3.830258e-11,3.830258e-11
2020-11,,,,,,,0.176436,,,,...,3.830258e-11,,,,,,,3.830258e-11,3.830258e-11,3.830258e-11


In [633]:
#Calculating 24M rolling stddev
std_24M = sorted_crsp_p.rolling(n, min_periods=n).std().rename(columns={'ret':'24M_std'})
std_24M

Unnamed: 0_level_0,24M_std,24M_std,24M_std,24M_std,24M_std,24M_std,24M_std,24M_std,24M_std,24M_std,...,RF,RF,RF,RF,RF,RF,RF,RF,RF,RF
permno,10006,10007,10012,10014,10028,10030,10051,10057,10058,10064,...,93374,93382,93384,93387,93398,93418,93420,93422,93423,93426
1963-01,,,,,,,,,,,...,,,,,,,,,,
1963-02,,,,,,,,,,,...,,,,,,,,,,
1963-03,,,,,,,,,,,...,,,,,,,,,,
1963-04,,,,,,,,,,,...,,,,,,,,,,
1963-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08,,,,,,,0.182537,,,,...,4.589844e-04,,,,,,,4.589844e-04,4.589844e-04,4.589844e-04
2020-09,,,,,,,0.141961,,,,...,4.082483e-05,,,,,,,4.082483e-05,4.082483e-05,4.082483e-05
2020-10,,,,,,,0.126922,,,,...,3.830258e-11,,,,,,,3.830258e-11,3.830258e-11,3.830258e-11
2020-11,,,,,,,0.176436,,,,...,3.830258e-11,,,,,,,3.830258e-11,3.830258e-11,3.830258e-11


In [634]:
data_frames = [sorted_crsp_p, std_6M, std_12M, std_24M, beta]


df_merged = reduce(lambda  left,right: pd.merge(left,right,left_index=True, right_index=True,
                                            how='inner'), data_frames)

In [635]:
df_merged

Unnamed: 0_level_0,ret,ret,ret,ret,ret,ret,ret,ret,ret,ret,...,93418,93418,93420,93420,93422,93422,93423,93423,93426,93426
permno,10006,10007,10012,10014,10028,10030,10051,10057,10058,10064,...,const,93418,const,93420,const,93422,const,93423,const,93426
1963-01,0.047002,,,0.034483,,0.056122,,0.057471,,,...,,,,,,,,,,
1963-02,0.038700,,,-0.033333,,0.024155,,0.017544,,,...,,,,,,,,,,
1963-03,-0.009009,,,0.000000,,0.053774,,0.081897,,,...,,,,,,,,,,
1963-04,0.084848,,,-0.034483,,-0.015837,,0.047809,,,...,,,,,,,,,,
1963-05,0.091620,,,0.035714,,-0.032184,,0.049430,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08,,,,,,,0.131730,,,,...,,,,,-0.035407,7.908492,-0.096613,3.772006,-0.060233,1.467597
2020-09,,,,,,,-0.199393,,,,...,,,,,-0.218947,10.668097,-0.102004,4.070591,0.008845,0.606261
2020-10,,,,,,,0.104298,,,,...,,,,,-0.017234,1.241350,-0.014933,1.447547,0.000332,0.215372
2020-11,,,,,,,0.298798,,,,...,,,,,0.000436,4.478622,-0.028210,2.566443,-0.002837,1.130923
