In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm

In [11]:
crsp_data = pd.read_csv('crsp_1926_2020.csv')

crsp_data_cleaned = crsp_data[(crsp_data['SHRCD'] == 10) | (crsp_data['SHRCD'] == 11)]
crsp_data_cleaned = crsp_data_cleaned[(crsp_data_cleaned['EXCHCD'] == 1) |
                                      (crsp_data_cleaned['EXCHCD'] == 2) |
                                      (crsp_data_cleaned['EXCHCD'] == 3)]
crsp_data_cleaned['PRC'] = crsp_data_cleaned['PRC'].apply(lambda x: x if x >= 0 else np.nan)
crsp_data_cleaned

Unnamed: 0,PERMNO,date,SHRCD,EXCHCD,PRC,RET,SHROUT
1,10000,1986-01-31,10.0,3.0,,C,3680.0
2,10000,1986-02-28,10.0,3.0,,-0.257143,3680.0
3,10000,1986-03-31,10.0,3.0,,0.365385,3680.0
4,10000,1986-04-30,10.0,3.0,,-0.098592,3793.0
5,10000,1986-05-30,10.0,3.0,,-0.222656,3793.0
...,...,...,...,...,...,...,...
4705164,93436,2020-08-31,11.0,3.0,498.32001,0.741452,931809.0
4705165,93436,2020-09-30,11.0,3.0,429.01001,-0.139087,948000.0
4705166,93436,2020-10-30,11.0,3.0,388.04001,-0.095499,947901.0
4705167,93436,2020-11-30,11.0,3.0,567.59998,0.462736,947901.0


In [12]:
crsp_data_cleaned['date2'] = pd.to_datetime(crsp_data_cleaned['date'], format='%Y-%m-%d')
crsp_data_cleaned['year_month'] = crsp_data_cleaned['date2'].dt.to_period('M')
crsp_data_cleaned['date'] = np.floor(crsp_data_cleaned['date'].str.replace('-','').astype(float)/100).astype(int)
monthly_firms_count = crsp_data_cleaned.groupby('year_month')['PERMNO'].nunique()



In [13]:
# Market equity for each stock
crsp_data_cleaned['ME'] = (crsp_data_cleaned['PRC'] * crsp_data_cleaned['SHROUT'])/10 # check later

# Make decile
sortdf = crsp_data_cleaned.drop(['SHRCD', 'EXCHCD', 'PRC', 'SHROUT'], axis=1)
sortdf['rank'] = sortdf.groupby('date')['ME'].rank(pct=True)
sortdf['decile'] = np.ceil(sortdf['rank']*10)

def calc_weights(group):
    group['weights_eq'] = 1 / float(group['decile'].count())
    group['TME'] = group['ME'].sum()
    group['weights_val'] = group['ME'] / group['TME']
    return group

sortdf = sortdf.groupby(['date', 'decile']).apply(calc_weights)

sortdf['decile_lag'] = sortdf.groupby('PERMNO')['decile'].shift(1)
sortdf['weights_val_lag'] = sortdf.groupby('PERMNO')['weights_val'].shift(1)
sortdf['weights_eq_lag'] = sortdf.groupby('PERMNO')['weights_eq'].shift(1)

sortdf


  sortdf = sortdf.groupby(['date', 'decile']).apply(calc_weights)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PERMNO,date,RET,date2,year_month,ME,rank,decile,weights_eq,TME,weights_val,decile_lag,weights_val_lag,weights_eq_lag
date,decile,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
192512,1.0,13420,10073,192512,C,1925-12-31,1925-12,2.415000e+02,0.078947,1.0,0.024390,6.558950e+03,0.036820,,,
192512,1.0,74683,10444,192512,C,1925-12-31,1925-12,1.584000e+02,0.050239,1.0,0.024390,6.558950e+03,0.024150,,,
192512,1.0,104498,10639,192512,C,1925-12-31,1925-12,1.615000e+02,0.052632,1.0,0.024390,6.558950e+03,0.024623,,,
192512,1.0,107522,10655,192512,C,1925-12-31,1925-12,7.500000e+00,0.004785,1.0,0.024390,6.558950e+03,0.001143,,,
192512,1.0,129016,10794,192512,C,1925-12-31,1925-12,1.562500e+02,0.045455,1.0,0.024390,6.558950e+03,0.023822,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202012,10.0,4672714,93089,202012,0.048157,2020-12-31,2020-12,3.379939e+06,0.944131,10.0,0.002681,3.200056e+09,0.001056,10.0,0.001051,0.002717
202012,10.0,4673418,93096,202012,-0.037881,2020-12-31,2020-12,5.152371e+06,0.964545,10.0,0.002681,3.200056e+09,0.001610,10.0,0.001774,0.002717
202012,10.0,4677412,93132,202012,0.205307,2020-12-31,2020-12,2.413612e+06,0.925060,10.0,0.002681,3.200056e+09,0.000754,10.0,0.000652,0.002717
202012,10.0,4693918,93312,202012,0.056031,2020-12-31,2020-12,1.874040e+06,0.909213,10.0,0.002681,3.200056e+09,0.000586,10.0,0.000575,0.002717


In [14]:
sortdf.reset_index(drop = True, inplace = True)
sortdf['RET'] = pd.to_numeric(sortdf['RET'], errors='coerce')

sortdf['weighted_val_ret'] = sortdf['weights_val_lag'] * sortdf['RET']
sortdf['weighted_eq_ret'] = sortdf['weights_eq_lag'] * sortdf['RET']

# Sum up portfolio returns
eqports = sortdf.groupby(['date', 'decile_lag'])['weighted_eq_ret'].sum()
eqports = eqports.unstack()
# Missing accounting data in early years
eqports = eqports.dropna(axis=0)
# Match data format of FF factors
eqports = eqports * 100
eqports = eqports.reset_index()

valports = sortdf.groupby(['date', 'decile_lag'])['weighted_val_ret'].sum()
valports = valports.unstack()
valports = valports.dropna(axis=0)
valports = valports * 100
valports = valports.reset_index()

mean_monthly_returns_eq = eqports.mean(axis=0)
mean_monthly_returns_val = valports.mean(axis=0)

monotonic_eq = (mean_monthly_returns_eq.is_monotonic_increasing or
                mean_monthly_returns_eq.is_monotonic_decreasing)

monotonic_val = (mean_monthly_returns_val.is_monotonic_increasing or
                 mean_monthly_returns_val.is_monotonic_decreasing)

print("Mean monthly returns for each decile (Equal-weighted):")
print(mean_monthly_returns_eq)
print(f"Equal-weighted returns are monotonic: {monotonic_eq}\n")

print("Mean monthly returns for each decile (Value-weighted):")
print(mean_monthly_returns_val)
print(f"Value-weighted returns are monotonic: {monotonic_val}")

eqports

Mean monthly returns for each decile (Equal-weighted):
decile_lag
date    197306.500000
1.0          3.117020
2.0          1.502364
3.0          1.330793
4.0          1.273380
5.0          1.262522
6.0          1.221263
7.0          1.161920
8.0          1.091310
9.0          1.073214
10.0         0.920555
dtype: float64
Equal-weighted returns are monotonic: True

Mean monthly returns for each decile (Value-weighted):
decile_lag
date    197306.500000
1.0          2.478629
2.0          1.479960
3.0          1.336647
4.0          1.268084
5.0          1.258483
6.0          1.223613
7.0          1.159447
8.0          1.081011
9.0          1.067123
10.0         0.910811
dtype: float64
Value-weighted returns are monotonic: True


decile_lag,date,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0
0,192601,11.782215,1.830621,1.372321,3.038145,-0.275138,0.665188,-0.346502,0.457743,-0.197943,-0.193838
1,192602,-7.953088,-5.468666,-9.648219,-6.952050,-6.707273,-6.873791,-5.925456,-6.218172,-3.216784,-2.266515
2,192603,-17.366330,-14.768564,-13.770322,-10.571645,-10.750652,-10.258181,-12.519652,-5.036840,-7.028057,-6.620769
3,192604,0.934766,1.574623,0.309830,4.592213,4.434697,2.758254,5.305059,2.897505,3.691636,3.286036
4,192605,-3.108273,-0.567706,2.505172,0.644207,-1.212357,2.046563,1.193787,0.845630,0.955320,1.810399
...,...,...,...,...,...,...,...,...,...,...,...
1135,202008,-0.485998,1.913876,6.096398,7.985668,6.533205,6.202248,5.482641,5.480806,5.010431,4.332389
1136,202009,-1.173155,-0.764958,-2.175105,-2.597382,-4.539896,-3.893649,-3.622940,-3.931072,-1.355803,-1.832027
1137,202010,0.943012,-1.614138,-0.961324,0.661733,1.554059,2.092947,3.643933,2.556817,0.912082,-1.139210
1138,202011,27.361794,22.952993,23.495268,24.257942,21.846842,21.050520,19.772252,17.144183,15.526824,13.012835


In [16]:
eqports['1-10'] = eqports[1.0] - eqports[10.0]
valports['1-10'] = valports[1.0] - valports[10.0] 

mean_long_short_eq = eqports['1-10'].mean()
mean_long_short_val = valports['1-10'].mean()

vol_long_short_eq = eqports['1-10'].std()
vol_long_short_val = valports['1-10'].std()

sharpe_ratio_eq = mean_long_short_eq / vol_long_short_eq
sharpe_ratio_val = mean_long_short_val / vol_long_short_val

print(f"Equal-Weighted Long-Short Portfolio:")
print(f"Mean Return: {mean_long_short_eq}")
print(f"Volatility: {vol_long_short_eq}")
print(f"Sharpe Ratio: {sharpe_ratio_eq}\n")

print(f"Value-Weighted Long-Short Portfolio:")
print(f"Mean Return: {mean_long_short_val}")
print(f"Volatility: {vol_long_short_val}")
print(f"Sharpe Ratio: {sharpe_ratio_val}\n")


Equal-Weighted Long-Short Portfolio:
Mean Return: 2.196465045619146
Volatility: 15.065437159855007
Sharpe Ratio: 0.14579497576559441

Value-Weighted Long-Short Portfolio:
Mean Return: 1.5678189607302107
Volatility: 12.93038603178765
Sharpe Ratio: 0.12125074664251588



In [17]:
ff3 = pd.read_csv('ff3_factors.csv')
ff3_merged_eq = pd.merge(eqports, ff3, on='date')
ff3_merged_val = pd.merge(valports, ff3, on='date')

# Estimate CAPM - equally weighted
model1=sm.OLS(ff3_merged_eq['1-10'],
              sm.add_constant(ff3_merged_eq[['Mkt-RF']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.170
Model:                            OLS   Adj. R-squared:                  0.169
Method:                 Least Squares   F-statistic:                     231.4
Date:                Sun, 21 Apr 2024   Prob (F-statistic):           1.08e-47
Time:                        23:12:41   Log-Likelihood:                -4581.1
No. Observations:                1134   AIC:                             9166.
Df Residuals:                    1132   BIC:                             9176.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.4285      0.412      3.468      0.0

In [18]:
# Estimate CAPM - value weighted
model1=sm.OLS(ff3_merged_val['1-10'],
              sm.add_constant(ff3_merged_val[['Mkt-RF']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.195
Model:                            OLS   Adj. R-squared:                  0.194
Method:                 Least Squares   F-statistic:                     274.1
Date:                Sun, 21 Apr 2024   Prob (F-statistic):           2.66e-55
Time:                        23:12:44   Log-Likelihood:                -4390.0
No. Observations:                1134   AIC:                             8784.
Df Residuals:                    1132   BIC:                             8794.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.8520      0.348      2.448      0.0

In [19]:
# Estimate FF3 - equally weighted
model1=sm.OLS(ff3_merged_eq['1-10'],
              sm.add_constant(ff3_merged_eq[['Mkt-RF', 'SMB', 'HML']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.585
Model:                            OLS   Adj. R-squared:                  0.584
Method:                 Least Squares   F-statistic:                     531.0
Date:                Sun, 21 Apr 2024   Prob (F-statistic):          3.09e-215
Time:                        23:12:46   Log-Likelihood:                -4187.8
No. Observations:                1134   AIC:                             8384.
Df Residuals:                    1130   BIC:                             8404.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.8861      0.292      3.034      0.0

In [20]:
# Estimate FF3 - value weighted
model1=sm.OLS(ff3_merged_val['1-10'],
              sm.add_constant(ff3_merged_val[['Mkt-RF', 'SMB', 'HML']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.647
Model:                            OLS   Adj. R-squared:                  0.646
Method:                 Least Squares   F-statistic:                     691.1
Date:                Sun, 21 Apr 2024   Prob (F-statistic):          4.59e-255
Time:                        23:12:48   Log-Likelihood:                -3922.2
No. Observations:                1134   AIC:                             7852.
Df Residuals:                    1130   BIC:                             7873.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.3769      0.231      1.631      0.1

In [21]:
ff1992 = ff3_merged_eq[ff3_merged_eq['date']> 199200].copy()
model1=sm.OLS(ff1992['1-10'], sm.add_constant(ff1992[['Mkt-RF']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.011
Model:                            OLS   Adj. R-squared:                  0.008
Method:                 Least Squares   F-statistic:                     3.882
Date:                Sun, 21 Apr 2024   Prob (F-statistic):             0.0496
Time:                        23:12:50   Log-Likelihood:                -1198.6
No. Observations:                 348   AIC:                             2401.
Df Residuals:                     346   BIC:                             2409.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.6375      0.413      3.964      0.0

In [22]:
ff1992 = ff3_merged_val[ff3_merged_val['date']> 199200].copy()
model1=sm.OLS(ff1992['1-10'], sm.add_constant(ff1992[['Mkt-RF']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.018
Model:                            OLS   Adj. R-squared:                  0.016
Method:                 Least Squares   F-statistic:                     6.464
Date:                Sun, 21 Apr 2024   Prob (F-statistic):             0.0114
Time:                        23:12:52   Log-Likelihood:                -1174.1
No. Observations:                 348   AIC:                             2352.
Df Residuals:                     346   BIC:                             2360.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.7940      0.385      2.063      0.0

In [25]:
# Convert 'RET' column to numeric, turning non-numeric values into NaN
crsp_data_cleaned['RET'] = pd.to_numeric(crsp_data_cleaned['RET'], errors='coerce')

# Now you can calculate the rolling cumulative returns
crsp_data_cleaned['cumulative_returns'] = crsp_data_cleaned.groupby('PERMNO')['RET'].rolling(window=11, min_periods=11).apply(lambda x: np.prod(1+x)-1, raw=True).shift(2).reset_index(0,drop=True)

In [26]:

sortdf = crsp_data_cleaned.drop(['SHRCD', 'EXCHCD', 'PRC', 'SHROUT'], axis=1)
sortdf['rank'] = sortdf.groupby('date')['cumulative_returns'].rank(pct=True)
sortdf['decile'] = np.ceil(sortdf['rank']*10)

In [27]:
# Form the equal- and value-weighted portfolios for the ten momentum portfolios
def calc_weights(group):
    group['weights_eq'] = 1 / float(group['decile'].count())
    group['TME'] = group['ME'].sum()
    group['weights_val'] = group['ME'] / group['TME']
    return group

sortdf = sortdf.groupby(['date', 'decile']).apply(calc_weights)

sortdf['decile_lag'] = sortdf.groupby('PERMNO')['decile'].shift(1)
sortdf['weights_val_lag'] = sortdf.groupby('PERMNO')['weights_val'].shift(1)
sortdf['weights_eq_lag'] = sortdf.groupby('PERMNO')['weights_eq'].shift(1)

  sortdf = sortdf.groupby(['date', 'decile']).apply(calc_weights)


In [121]:
sortdf.reset_index(drop = True, inplace = True)
sortdf['weighted_val_lag'] = sortdf['weights_val_lag'] * sortdf['RET']
sortdf['weighted_eq_lag'] = sortdf['weights_eq_lag'] * sortdf['RET']

# Sum up portfolio returns
eqports = sortdf.groupby(['date', 'decile_lag'])['weighted_eq_lag'].sum()
eqports = eqports.unstack()
# Missing accounting data in early years
eqports = eqports.dropna(axis=0)
# Match data format of FF factors
eqports = eqports * 100
eqports = eqports.reset_index()

valports = sortdf.groupby(['date', 'decile_lag'])['weighted_val_lag'].sum()
valports = valports.unstack()
valports = valports.dropna(axis=0)
valports = valports * 100
valports = valports.reset_index()

mean_monthly_returns_eq = eqports.mean(axis=0)
mean_monthly_returns_val = valports.mean(axis=0)

monotonic_eq = (mean_monthly_returns_eq.is_monotonic_increasing or
                mean_monthly_returns_eq.is_monotonic_decreasing)

monotonic_val = (mean_monthly_returns_val.is_monotonic_increasing or
                 mean_monthly_returns_val.is_monotonic_decreasing)

print("Mean monthly returns for each decile (Equal-weighted):")
print(mean_monthly_returns_eq)
print(f"Equal-weighted returns are monotonic: {monotonic_eq}\n")

print("Mean monthly returns for each decile (Value-weighted):")
print(mean_monthly_returns_val)
print(f"Value-weighted returns are monotonic: {monotonic_val}")

eqports

Mean monthly returns for each decile (Equal-weighted):
decile_lag
date    197352.287865
1.0          1.144305
2.0          1.064822
3.0          1.217383
4.0          1.170327
5.0          1.219944
6.0          1.303896
7.0          1.410952
8.0          1.440834
9.0          1.539089
10.0         1.744611
dtype: float64
Equal-weighted returns are monotonic: False

Mean monthly returns for each decile (Value-weighted):
decile_lag
date    197352.287865
1.0          0.228607
2.0          0.533213
3.0          0.782392
4.0          0.903838
5.0          0.927468
6.0          0.973006
7.0          1.068319
8.0          1.155728
9.0          1.235009
10.0         1.479964
dtype: float64
Value-weighted returns are monotonic: False


decile_lag,date,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0
0,192601,0.001209,1.649523,0.407028,0.878536,1.416923,4.211967,-0.593649,1.557935,-0.237902,1.934547
1,192701,0.375342,-9.314924,7.988318,5.855666,1.500217,8.741004,-3.039191,0.567937,-0.213094,0.168475
2,192702,5.429484,6.045233,14.772018,26.628656,6.068283,5.879763,3.624080,5.895644,5.496386,5.259776
3,192703,-10.764280,-3.066426,2.790509,-2.690691,-6.627557,-1.865443,-15.965384,-0.490007,20.923493,3.651683
4,192704,0.708904,-9.864522,-0.692226,2.751067,-0.319615,-5.386724,1.436915,-0.911465,-2.914733,4.136957
...,...,...,...,...,...,...,...,...,...,...,...
1124,202008,3.599718,5.094813,6.928604,4.340953,6.657829,4.595906,5.090737,4.312206,3.172607,3.541746
1125,202009,-6.160866,-4.422428,-3.312075,-3.005215,-2.547669,-2.118888,-2.465467,-1.957995,-1.442655,0.558763
1126,202010,-2.315287,3.229721,4.299260,2.142781,2.828298,1.945891,1.904596,0.000267,0.726221,-2.216531
1127,202011,38.188981,23.794518,20.576148,19.699745,16.514408,16.663575,17.467311,15.855039,14.682045,20.481395


In [122]:
# Form the long-short winners-minus-losers portfolio
eqports['10-1'] = eqports[10.0] - eqports[1.0]
valports['10-1'] = valports[10.0] - valports[1.0] 

mean_long_short_eq = eqports['10-1'].mean()
mean_long_short_val = valports['10-1'].mean()

vol_long_short_eq = eqports['10-1'].std()
vol_long_short_val = valports['10-1'].std()

sharpe_ratio_eq = mean_long_short_eq / vol_long_short_eq
sharpe_ratio_val = mean_long_short_val / vol_long_short_val

print(f"Equal-Weighted Long-Short Portfolio:")
print(f"Mean Return: {mean_long_short_eq}")
print(f"Volatility: {vol_long_short_eq}")
print(f"Sharpe Ratio: {sharpe_ratio_eq}\n")

print(f"Value-Weighted Long-Short Portfolio:")
print(f"Mean Return: {mean_long_short_val}")
print(f"Volatility: {vol_long_short_val}")
print(f"Sharpe Ratio: {sharpe_ratio_val}\n")

Equal-Weighted Long-Short Portfolio:
Mean Return: 0.6003058776220164
Volatility: 8.071971118712762
Sharpe Ratio: 0.07436918056239865

Value-Weighted Long-Short Portfolio:
Mean Return: 1.2513567758419675
Volatility: 8.47887052551482
Sharpe Ratio: 0.14758531482186865



In [130]:
ff3 = pd.read_csv('ff3_factors.csv')
#make Date into date
ff3.rename(columns={'Date': 'date'}, inplace=True)
ff3_merged_eq = pd.merge(eqports, ff3, on='date')
ff3_merged_val = pd.merge(valports, ff3, on='date')

# Estimate CAPM - equally weighted
model1=sm.OLS(ff3_merged_eq['10-1'],
              sm.add_constant(ff3_merged_eq[['Mkt-RF']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   10-1   R-squared:                       0.076
Model:                            OLS   Adj. R-squared:                  0.076
Method:                 Least Squares   F-statistic:                     93.15
Date:                Sun, 21 Apr 2024   Prob (F-statistic):           3.13e-21
Time:                        22:26:09   Log-Likelihood:                -3911.4
No. Observations:                1128   AIC:                             7827.
Df Residuals:                    1126   BIC:                             7837.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.8812      0.233      3.782      0.0

In [131]:
# Estimate CAPM - value weighted
model1=sm.OLS(ff3_merged_val['10-1'],
              sm.add_constant(ff3_merged_val[['Mkt-RF']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   10-1   R-squared:                       0.131
Model:                            OLS   Adj. R-squared:                  0.130
Method:                 Least Squares   F-statistic:                     169.0
Date:                Sun, 21 Apr 2024   Prob (F-statistic):           4.16e-36
Time:                        22:26:34   Log-Likelihood:                -3932.9
No. Observations:                1128   AIC:                             7870.
Df Residuals:                    1126   BIC:                             7880.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.6393      0.237      6.903      0.0

In [132]:
# Estimate FF3 - equally weighted
model1=sm.OLS(ff3_merged_eq['10-1'],
              sm.add_constant(ff3_merged_eq[['Mkt-RF', 'SMB', 'HML']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   10-1   R-squared:                       0.233
Model:                            OLS   Adj. R-squared:                  0.231
Method:                 Least Squares   F-statistic:                     113.6
Date:                Sun, 21 Apr 2024   Prob (F-statistic):           2.93e-64
Time:                        22:26:46   Log-Likelihood:                -3806.9
No. Observations:                1128   AIC:                             7622.
Df Residuals:                    1124   BIC:                             7642.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0890      0.213      5.111      0.0

In [134]:
# Estimate FF3 - value weighted
model1=sm.OLS(ff3_merged_val['10-1'],
              sm.add_constant(ff3_merged_val[['Mkt-RF', 'SMB', 'HML']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   10-1   R-squared:                       0.238
Model:                            OLS   Adj. R-squared:                  0.236
Method:                 Least Squares   F-statistic:                     117.1
Date:                Sun, 21 Apr 2024   Prob (F-statistic):           5.41e-66
Time:                        22:26:59   Log-Likelihood:                -3858.3
No. Observations:                1128   AIC:                             7725.
Df Residuals:                    1124   BIC:                             7745.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.8253      0.223      8.186      0.0

In [135]:
# Estimate FF5 - equally weighted
model1=sm.OLS(ff5_merged_eq['10-1'],
              sm.add_constant(ff5_merged_eq[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   10-1   R-squared:                       0.083
Model:                            OLS   Adj. R-squared:                  0.076
Method:                 Least Squares   F-statistic:                     12.33
Date:                Sun, 21 Apr 2024   Prob (F-statistic):           1.81e-11
Time:                        22:27:04   Log-Likelihood:                -2281.9
No. Observations:                 690   AIC:                             4576.
Df Residuals:                     684   BIC:                             4603.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.5677      0.264      2.149      0.0

In [136]:
# Estimate FF5 - value weighted
model1=sm.OLS(ff5_merged_val['10-1'],
              sm.add_constant(ff5_merged_val[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   10-1   R-squared:                       0.105
Model:                            OLS   Adj. R-squared:                  0.098
Method:                 Least Squares   F-statistic:                     16.00
Date:                Sun, 21 Apr 2024   Prob (F-statistic):           6.32e-15
Time:                        22:27:16   Log-Likelihood:                -2374.8
No. Observations:                 690   AIC:                             4762.
Df Residuals:                     684   BIC:                             4789.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.6945      0.302      5.606      0.0

In [3]:
from statsmodels.regression.rolling import RollingOLS

# Convert 'RET' column to numeric, turning non-numeric values into NaN
crsp_data_cleaned['RET'] = pd.to_numeric(crsp_data_cleaned['RET'], errors='coerce')

# Merge ff5 and crsp
merged_df = pd.merge(crsp_data_cleaned, ff3, on='date', how='inner')

# Define a function to calculate rolling betas
def calc_rolling_beta(group):
    if len(group) >= 36:
        endog = group['RET']*100 - group['RF']
        exog = sm.add_constant(group['Mkt-RF'])  # Add a constant to the exogenous variable
        model = RollingOLS(endog, exog, window=36)
        rolling_params = model.fit().params
        group['rolling_beta'] = rolling_params['Mkt-RF'].values
    else:
        group['rolling_beta'] = np.nan
    return group

merged_df = merged_df.groupby('PERMNO').apply(calc_rolling_beta)

NameError: name 'ff3' is not defined

In [None]:
merged_df.to_csv('merged_df.csv', index=False)

# Now, to load the data back into a DataFrame
merged_df = pd.read_csv('merged_df.csv')

In [None]:
# Drop rows with NaN values in 'rolling_beta'
merged_df = merged_df.dropna(subset=['rolling_beta'])

# Make decile
sortdf = merged_df.drop(['SHRCD', 'EXCHCD', 'PRC', 'SHROUT'], axis=1)
sortdf['rank'] = sortdf.groupby('date')['rolling_beta'].rank(pct=True)
sortdf['decile'] = np.ceil(sortdf['rank']*10)

In [None]:
sortdf.reset_index(drop = True, inplace = True)

# Form the equal- and value-weighted portfolios for the ten momentum portfolios
def calc_weights(group):
    group['weights_eq'] = 1 / float(group['decile'].count())
    group['TME'] = group['ME'].sum()
    group['weights_val'] = group['ME'] / group['TME']
    return group

sortdf = sortdf.groupby(['date', 'decile']).apply(calc_weights)

In [None]:
sortdf.reset_index(drop = True, inplace = True)
sortdf['weighted_val'] = sortdf['weights_val'] * sortdf['RET']
sortdf['weighted_eq'] = sortdf['weights_eq'] * sortdf['RET']

# Sum up portfolio returns
eqports = sortdf.groupby(['date', 'decile'])['weighted_eq'].sum()
eqports = eqports.unstack()
# Missing accounting data in early years
eqports = eqports.dropna(axis=0)
# Match data format of FF factors
eqports = eqports * 100
eqports = eqports.reset_index()

valports = sortdf.groupby(['date', 'decile'])['weighted_val'].sum()
valports = valports.unstack()
valports = valports.dropna(axis=0)
valports = valports * 100
valports = valports.reset_index()

mean_monthly_returns_eq = eqports.mean(axis=0)
mean_monthly_returns_val = valports.mean(axis=0)

monotonic_eq = (mean_monthly_returns_eq.is_monotonic_increasing or
                mean_monthly_returns_eq.is_monotonic_decreasing)

monotonic_val = (mean_monthly_returns_val.is_monotonic_increasing or
                 mean_monthly_returns_val.is_monotonic_decreasing)

print("Mean monthly returns for each decile (Equal-weighted):")
print(mean_monthly_returns_eq)
print(f"Equal-weighted returns are monotonic: {monotonic_eq}\n")

print("Mean monthly returns for each decile (Value-weighted):")
print(mean_monthly_returns_val)
print(f"Value-weighted returns are monotonic: {monotonic_val}")

eqports

Mean monthly returns for each decile (Equal-weighted):
decile
date    199201.954198
1.0          1.365975
2.0          1.082790
3.0          1.155699
4.0          1.142098
5.0          1.179730
6.0          1.178783
7.0          1.226010
8.0          1.316394
9.0          1.383594
10.0         2.268856
dtype: float64
Equal-weighted returns are monotonic: False

Mean monthly returns for each decile (Value-weighted):
decile
date    199201.954198
1.0          1.388653
2.0          1.190388
3.0          1.238056
4.0          1.371167
5.0          1.408582
6.0          1.415341
7.0          1.547640
8.0          1.686813
9.0          1.915342
10.0         2.888195
dtype: float64
Value-weighted returns are monotonic: False


decile,date,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0
0,196307,0.018559,0.465628,-0.065529,-1.071928,-1.392242,-0.645709,-1.526954,-2.030629,-0.086817,-0.453827
1,196308,1.821955,3.702542,3.311469,4.914353,2.217302,4.273191,6.197755,5.861856,6.179927,7.204721
2,196309,0.044787,-2.636400,-0.861407,-1.333755,-2.573279,-1.503218,-1.406932,-0.168357,-1.648150,-1.918141
3,196310,-0.154338,0.872014,-0.908139,0.123325,4.566745,1.882373,0.968740,1.133016,2.904841,1.878807
4,196311,-0.333560,-0.730368,-2.428070,-0.977331,1.024467,-0.709377,0.434578,-2.468142,-2.370242,-0.259702
...,...,...,...,...,...,...,...,...,...,...,...
650,202008,-3.021696,2.207653,1.360620,3.026394,3.770437,4.933383,6.369575,8.022842,6.982988,13.359639
651,202009,-1.427279,-2.829860,-2.122748,-2.486423,-1.700532,-2.260240,-3.619642,-3.102314,-3.668469,-5.913530
652,202010,2.364607,2.183152,2.562575,4.184212,3.266622,3.574853,1.771700,2.311382,-0.365593,-3.248063
653,202011,5.199237,9.372699,11.508917,12.988898,14.580006,18.264854,19.384348,23.846234,29.430043,54.885666


In [None]:
# Form the long-short winners-minus-losers portfolio
eqports['1-10'] = eqports[1.0] - eqports[10.0]
valports['1-10'] = valports[1.0] - valports[10.0] 

mean_long_short_eq = eqports['1-10'].mean()
mean_long_short_val = valports['1-10'].mean()

vol_long_short_eq = eqports['1-10'].std()
vol_long_short_val = valports['1-10'].std()

sharpe_ratio_eq = mean_long_short_eq / vol_long_short_eq
sharpe_ratio_val = mean_long_short_val / vol_long_short_val

print(f"Equal-Weighted Long-Short Portfolio:")
print(f"Mean Return: {mean_long_short_eq}")
print(f"Volatility: {vol_long_short_eq}")
print(f"Sharpe Ratio: {sharpe_ratio_eq}\n")

print(f"Value-Weighted Long-Short Portfolio:")
print(f"Mean Return: {mean_long_short_val}")
print(f"Volatility: {vol_long_short_val}")
print(f"Sharpe Ratio: {sharpe_ratio_val}\n")

Equal-Weighted Long-Short Portfolio:
Mean Return: -0.9028815222771523
Volatility: 13.188104191719134
Sharpe Ratio: -0.06846181294534172

Value-Weighted Long-Short Portfolio:
Mean Return: -1.4995417306898877
Volatility: 11.921068173320073
Sharpe Ratio: -0.1257892085581672



In [None]:
ff5 = pd.read_csv('ff5_factors.csv')
#make Date into date
ff5.rename(columns={'Date': 'date'}, inplace=True)
ff5_merged_eq = pd.merge(eqports, ff5, on='date')
ff5_merged_val = pd.merge(valports, ff5, on='date')

# Estimate CAPM - equally weighted
model1=sm.OLS(ff5_merged_eq['1-10'],
              sm.add_constant(ff5_merged_eq[['Mkt-RF']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.739
Model:                            OLS   Adj. R-squared:                  0.739
Method:                 Least Squares   F-statistic:                     1851.
Date:                Sun, 21 Apr 2024   Prob (F-statistic):          1.00e-192
Time:                        16:42:39   Log-Likelihood:                -2178.2
No. Observations:                 655   AIC:                             4360.
Df Residuals:                     653   BIC:                             4369.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.5433      0.266      2.046      0.0

In [None]:
# Estimate CAPM - value weighted
model1=sm.OLS(ff5_merged_val['1-10'],
              sm.add_constant(ff5_merged_val[['Mkt-RF']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.688
Model:                            OLS   Adj. R-squared:                  0.687
Method:                 Least Squares   F-statistic:                     1437.
Date:                Sun, 21 Apr 2024   Prob (F-statistic):          4.35e-167
Time:                        16:42:55   Log-Likelihood:                -2171.3
No. Observations:                 655   AIC:                             4347.
Df Residuals:                     653   BIC:                             4355.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.2388      0.263     -0.909      0.3

In [None]:
# Estimate FF3 - equally weighted
model1=sm.OLS(ff5_merged_eq['1-10'],
              sm.add_constant(ff5_merged_eq[['Mkt-RF', 'SMB', 'HML']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.763
Model:                            OLS   Adj. R-squared:                  0.762
Method:                 Least Squares   F-statistic:                     698.4
Date:                Sun, 21 Apr 2024   Prob (F-statistic):          5.72e-203
Time:                        16:42:59   Log-Likelihood:                -2146.9
No. Observations:                 655   AIC:                             4302.
Df Residuals:                     651   BIC:                             4320.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.5703      0.255      2.233      0.0

In [None]:
# Estimate FF3 - value weighted
model1=sm.OLS(ff5_merged_val['1-10'],
              sm.add_constant(ff5_merged_val[['Mkt-RF', 'SMB', 'HML']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.727
Model:                            OLS   Adj. R-squared:                  0.726
Method:                 Least Squares   F-statistic:                     578.0
Date:                Sun, 21 Apr 2024   Prob (F-statistic):          4.98e-183
Time:                        16:43:05   Log-Likelihood:                -2127.0
No. Observations:                 655   AIC:                             4262.
Df Residuals:                     651   BIC:                             4280.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.2718      0.248     -1.097      0.2

In [None]:
# Estimate FF5 - equally weighted
model1=sm.OLS(ff5_merged_eq['1-10'],
              sm.add_constant(ff5_merged_eq[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.783
Model:                            OLS   Adj. R-squared:                  0.781
Method:                 Least Squares   F-statistic:                     467.5
Date:                Sun, 21 Apr 2024   Prob (F-statistic):          2.35e-212
Time:                        16:43:08   Log-Likelihood:                -2118.5
No. Observations:                 655   AIC:                             4249.
Df Residuals:                     649   BIC:                             4276.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1309      0.252      0.520      0.6

In [None]:
# Estimate FF5 - value weighted
model1=sm.OLS(ff5_merged_val['1-10'],
              sm.add_constant(ff5_merged_val[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   1-10   R-squared:                       0.751
Model:                            OLS   Adj. R-squared:                  0.749
Method:                 Least Squares   F-statistic:                     390.5
Date:                Sun, 21 Apr 2024   Prob (F-statistic):          6.03e-193
Time:                        16:43:13   Log-Likelihood:                -2097.5
No. Observations:                 655   AIC:                             4207.
Df Residuals:                     649   BIC:                             4234.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.7148      0.244     -2.931      0.0