# BEM114 Homework 4 - Risk-Weighted Portfolios
**Names:** Andrew Zabelo, Daniel Wen, Kyle McCandless  
**Student IDs:** 2176083, 2159859, 2157818

## Setup

### Imports and Helper Functions

In [1]:
import pandas as pd
import numpy as np

from datetime import datetime
from dateutil.relativedelta import relativedelta

import statsmodels.api as sm
import matplotlib.pyplot as plt

In [2]:
# Estimates the CAPM and FF3 models on df_old using the returns found in ret_col_name
def estimate_models(df, return_col_name, factor):
    # Estimate CAPM
    print(f'\n\n{factor} CAPM')
    capm_model = sm.OLS(df[return_col_name], sm.add_constant(df[['Mkt-RF']])).fit()
    capm_beta = capm_model.params['Mkt-RF']
    print(capm_model.summary())
    
    # Estimate FF3
    print(f'\n{factor} FF3')
    print(sm.OLS(df[return_col_name], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML']])).fit().summary())
    
    # Estimate FF5
    print(f'\n{factor} FF5+Mom')
    print(sm.OLS(df[return_col_name], sm.add_constant(df[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'Mom']])).fit().summary())
    

## Problem 1

### Part A - Process Dataframes


In [3]:
'''
Load FF5 daily and monthly data
'''

ff5_daily = pd.read_csv('ff5_daily.csv')
ff5_daily = ff5_daily.astype({'date': 'string'})
ff5_daily['date'] = ff5_daily['date'].apply(lambda x: x[:4] + '-' + x[4:6] + '-' + x[6:])
mom_daily = pd.read_csv('mom_daily.CSV')
mom_daily = mom_daily.astype({'date': 'string'})
mom_daily['date'] = mom_daily['date'].apply(lambda x: x[:4] + '-' + x[4:6] + '-' + x[6:])
ff6_daily = pd.merge(ff5_daily, mom_daily, how='inner', on=['date'])

ff5_monthly = pd.read_csv('ff5_monthly.csv')
ff5_monthly = ff5_monthly.astype({'date': 'string'})
ff5_monthly['date'] = ff5_monthly['date'].apply(lambda x: x[:4] + '-' + x[4:])
mom_monthly = pd.read_csv('mom_monthly.CSV')
mom_monthly = mom_monthly.astype({'date': 'string'})
mom_monthly['date'] = mom_monthly['date'].apply(lambda x: x[:4] + '-' + x[4:])
ff6_monthly = pd.merge(ff5_monthly, mom_monthly, how='inner', on=['date'])


### Part B - Calculate Rolling Variance of Factor Returns


In [4]:
def make_rolling_var(df, factor):
    rolling_var = ff6_daily[factor].rolling(window=22, min_periods=22).var()
    rolling_var = pd.Series(rolling_var.values).rename(f'var_{factor}')
    ff6_daily[f'var_{factor}'] = rolling_var

In [5]:
# make the rolling variances
factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'Mom']
for factor in factors:
    make_rolling_var(ff6_daily, factor)

ff6_daily

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF,Mom,var_Mkt-RF,var_SMB,var_HML,var_RMW,var_CMA,var_Mom
0,1963-07-01,-0.67,0.02,-0.35,0.03,0.13,0.012,-0.21,,,,,,
1,1963-07-02,0.79,-0.28,0.28,-0.08,-0.21,0.012,0.42,,,,,,
2,1963-07-03,0.63,-0.18,-0.10,0.13,-0.25,0.012,0.41,,,,,,
3,1963-07-05,0.40,0.09,-0.28,0.07,-0.30,0.012,0.07,,,,,,
4,1963-07-08,-0.63,0.07,-0.20,-0.27,0.06,0.012,-0.45,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15285,2024-03-22,-0.23,-0.98,-0.53,0.29,-0.37,0.021,0.43,0.505910,0.708938,0.458988,0.309487,0.250368,0.683857
15286,2024-03-25,-0.26,-0.10,0.88,-0.22,-0.17,0.021,-0.34,0.349962,0.607054,0.395853,0.307150,0.185330,0.517176
15287,2024-03-26,-0.26,0.10,-0.13,-0.50,0.23,0.021,0.09,0.355926,0.601206,0.397961,0.319346,0.186243,0.516868
15288,2024-03-27,0.88,1.29,0.91,-0.14,0.58,0.021,-1.34,0.376593,0.637681,0.421055,0.293502,0.199281,0.558861


### Part C - Collapse Data to End of Month And Regress Current Variance on Lag Variance

In [6]:
ff6_daily['date'] = pd.to_datetime(ff6_daily['date'])
ff6_daily.set_index('date', inplace=True)
df_ff6 = ff6_daily.resample('M').last()

factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'Mom']
for factor in factors:
    df_ff6[f'lag_var_{factor}'] = df_ff6[f'var_{factor}'].shift(1)

df_ff6 = df_ff6.iloc[1:].reset_index()
df_ff6['date']

0     1963-08-31
1     1963-09-30
2     1963-10-31
3     1963-11-30
4     1963-12-31
         ...    
723   2023-11-30
724   2023-12-31
725   2024-01-31
726   2024-02-29
727   2024-03-31
Name: date, Length: 728, dtype: datetime64[ns]

In [7]:
for factor in factors:
    temp = df_ff6.dropna(subset=[f'var_{factor}', f'lag_var_{factor}'])
    print(f"\n\n{factor}")
    print(sm.OLS(temp[f'var_{factor}'], sm.add_constant(temp[[f'lag_var_{factor}']])).fit().summary())



Mkt-RF
                            OLS Regression Results                            
Dep. Variable:             var_Mkt-RF   R-squared:                       0.207
Model:                            OLS   Adj. R-squared:                  0.206
Method:                 Least Squares   F-statistic:                     189.9
Date:                Tue, 07 May 2024   Prob (F-statistic):           1.51e-38
Time:                        23:45:00   Log-Likelihood:                -1519.8
No. Observations:                 728   AIC:                             3044.
Df Residuals:                     726   BIC:                             3053.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const              0.5672      0.08

### Part D - Regress Future Factor Returns on Variance

In [8]:
df_ff6 = df_ff6.astype({'date': 'string'})
df_ff6['date'] = df_ff6['date'].apply(lambda x: x[:4] + '-' + x[5:7])
tmp = df_ff6.iloc[:, 8:]
tmp.insert(0, 'date', df_ff6['date'])

df_ff6 = pd.merge(ff6_monthly, tmp, how='inner', on=['date'])
df_ff6

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF,Mom,var_Mkt-RF,var_SMB,var_HML,var_RMW,var_CMA,var_Mom,lag_var_Mkt-RF,lag_var_SMB,lag_var_HML,lag_var_RMW,lag_var_CMA,lag_var_Mom
0,1963-08,5.07,-0.80,1.80,0.36,-0.35,0.25,1.01,0.086949,0.033653,0.039540,0.013295,0.023129,0.047702,0.223726,0.020938,0.037883,0.022160,0.031652,0.077050
1,1963-09,-1.57,-0.52,0.13,-0.71,0.29,0.27,0.19,0.154223,0.052577,0.035055,0.028740,0.028083,0.072686,0.086949,0.033653,0.039540,0.013295,0.023129,0.047702
2,1963-10,2.53,-1.39,-0.10,2.80,-2.01,0.29,3.12,0.171368,0.077462,0.116457,0.061698,0.085728,0.094405,0.154223,0.052577,0.035055,0.028740,0.028083,0.072686
3,1963-11,-0.85,-0.88,1.75,-0.51,2.24,0.27,-0.74,1.493974,0.207698,0.079971,0.092893,0.081876,0.639916,0.171368,0.077462,0.116457,0.061698,0.085728,0.094405
4,1963-12,1.83,-2.10,-0.02,0.03,-0.07,0.29,1.75,0.171291,0.086630,0.042985,0.063767,0.085114,0.086355,1.493974,0.207698,0.079971,0.092893,0.081876,0.639916
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,2023-11,8.84,-0.12,1.64,-3.91,-1.00,0.44,2.75,0.547768,1.151056,0.410597,0.353418,0.090708,1.064724,0.863721,0.412867,0.363854,0.428135,0.145862,0.561597
724,2023-12,4.87,7.32,4.93,-3.07,1.32,0.43,-5.51,0.406426,0.973010,0.476797,0.187247,0.102177,0.804803,0.547768,1.151056,0.410597,0.353418,0.090708,1.064724
725,2024-01,0.71,-5.74,-2.38,0.69,-0.96,0.47,5.19,0.537473,0.613752,0.340327,0.271399,0.087964,0.581611,0.406426,0.973010,0.476797,0.187247,0.102177,0.804803
726,2024-02,5.06,-0.78,-3.49,-1.99,-2.14,0.42,4.96,0.767954,1.372533,0.458630,0.401818,0.318679,0.893207,0.537473,0.613752,0.340327,0.271399,0.087964,0.581611


In [9]:
for factor in factors:
    print(f"\n\n{factor}")
    print(sm.OLS(df_ff6[factor], sm.add_constant(df_ff6[[f'lag_var_{factor}']])).fit().summary())



Mkt-RF
                            OLS Regression Results                            
Dep. Variable:                 Mkt-RF   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                  0.007564
Date:                Tue, 07 May 2024   Prob (F-statistic):              0.931
Time:                        23:45:00   Log-Likelihood:                -2126.6
No. Observations:                 728   AIC:                             4257.
Df Residuals:                     726   BIC:                             4266.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const              0.5858      0.18

### Part E - Calculate Monthly Volatility Managed Portfolio Excess Returns

In [10]:
indiv_port = df_ff6.iloc[:, list(range(8)) + list(range(14, len(df_ff6.columns)))].copy()

for factor in factors:
    c = indiv_port[f'lag_var_{factor}'].mean()
    indiv_port[f'lag_weights_{factor}'] = c / indiv_port[f'lag_var_{factor}']

for factor in factors:
    indiv_port[f'returns_{factor}'] = indiv_port[f'{factor}'] * indiv_port[f'lag_weights_{factor}']

indiv_port

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF,Mom,lag_var_Mkt-RF,lag_var_SMB,...,lag_weights_HML,lag_weights_RMW,lag_weights_CMA,lag_weights_Mom,returns_Mkt-RF,returns_SMB,returns_HML,returns_RMW,returns_CMA,returns_Mom
0,1963-08,5.07,-0.80,1.80,0.36,-0.35,0.25,1.01,0.223726,0.020938,...,8.807194,7.074614,4.434829,7.560788,23.590728,-11.184954,15.852949,2.546861,-1.552190,7.636396
1,1963-09,-1.57,-0.52,0.13,-0.71,0.29,0.27,0.19,0.086949,0.033653,...,8.438082,11.792022,6.069087,12.212486,-18.796775,-4.523336,1.096951,-8.372336,1.760035,2.320372
2,1963-10,2.53,-1.39,-0.10,2.80,-2.01,0.29,3.12,0.154223,0.052577,...,9.517852,5.454720,4.998408,8.014808,17.077374,-7.739252,-0.951785,15.273217,-10.046799,25.006200
3,1963-11,-0.85,-0.88,1.75,-0.51,2.24,0.27,-0.74,0.171368,0.077462,...,2.864951,2.540924,1.637404,6.170911,-5.163423,-3.325620,5.013664,-1.295871,3.667785,-4.566474
4,1963-12,1.83,-2.10,-0.02,0.03,-0.07,0.29,1.75,1.493974,0.207698,...,4.172073,1.687648,1.714455,0.910373,1.275137,-2.959810,-0.083441,0.050629,-0.120012,1.593152
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,2023-11,8.84,-0.12,1.64,-3.91,-1.00,0.44,2.75,0.863721,0.412867,...,0.916973,0.366170,0.962359,1.037332,10.654367,-0.085084,1.503836,-1.431723,-0.962359,2.852662
724,2023-12,4.87,7.32,4.93,-3.07,1.32,0.43,-5.51,0.547768,1.151056,...,0.812583,0.443582,1.547522,0.547148,9.255100,1.861621,4.006037,-1.361798,2.042729,-3.014788
725,2024-01,0.71,-5.74,-2.38,0.69,-0.96,0.47,5.19,0.406426,0.973010,...,0.699762,0.837238,1.373817,0.723857,1.818553,-1.726918,-1.665433,0.577694,-1.318864,3.756817
726,2024-02,5.06,-0.78,-3.49,-1.99,-2.14,0.42,4.96,0.537473,0.613752,...,0.980364,0.577637,1.595795,1.001635,9.800379,-0.372031,-3.421470,-1.149497,-3.415002,4.968108


In [11]:
for factor in factors:
    estimate_models(indiv_port, f'returns_{factor}', factor)



Mkt-RF CAPM
                            OLS Regression Results                            
Dep. Variable:         returns_Mkt-RF   R-squared:                       0.450
Model:                            OLS   Adj. R-squared:                  0.449
Method:                 Least Squares   F-statistic:                     593.1
Date:                Tue, 07 May 2024   Prob (F-statistic):           3.22e-96
Time:                        23:45:00   Log-Likelihood:                -2570.3
No. Observations:                 728   AIC:                             5145.
Df Residuals:                     726   BIC:                             5154.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.4944      0.309      

In [12]:
for factor in factors:
    m = indiv_port[f'returns_{factor}'].mean()
    print(f'{factor} mean monthly return: {m}')

Mkt-RF mean monthly return: 1.4568009274706042
SMB mean monthly return: 0.34327447256959287
HML mean monthly return: 0.9507457696501198
RMW mean monthly return: 0.855247464813075
CMA mean monthly return: 0.43818206049065234
Mom mean monthly return: 4.034928537369595


## Problem 2

### Part A - Calculate Monthly Volatility


In [13]:
comp_port = df_ff6.iloc[:, list(range(8)) + list(range(14, len(df_ff6.columns)))].copy()


lag_vars = ['lag_var_Mkt-RF', 'lag_var_SMB', 'lag_var_HML', 
                    'lag_var_RMW', 'lag_var_CMA', 'lag_var_Mom']

comp_port['tot_inv_var'] = comp_port[lag_vars].apply(lambda x: sum(1 / x), axis=1)

for factor in factors:
    comp_port[f'weight_{factor}'] = (1 / comp_port[f'lag_var_{factor}']) / comp_port['tot_inv_var']

rets = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'Mom']
weights = ['weight_Mkt-RF', 'weight_SMB', 'weight_HML', 
        'weight_RMW', 'weight_CMA', 'weight_Mom']

comp_port['returns'] = (comp_port[weights].values * comp_port[rets].values).sum(axis=1)

comp_port

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF,Mom,lag_var_Mkt-RF,lag_var_SMB,...,lag_var_CMA,lag_var_Mom,tot_inv_var,weight_Mkt-RF,weight_SMB,weight_HML,weight_RMW,weight_CMA,weight_Mom,returns
0,1963-08,5.07,-0.80,1.80,0.36,-0.35,0.25,1.01,0.223726,0.020938,...,0.031652,0.077050,168.326327,0.026554,0.283737,0.156820,0.268094,0.187691,0.077103,0.298613
1,1963-09,-1.57,-0.52,0.13,-0.71,0.29,0.27,0.19,0.086949,0.033653,...,0.023129,0.047702,205.924592,0.055850,0.144301,0.122815,0.365272,0.209959,0.101801,-0.325869
2,1963-10,2.53,-1.39,-0.10,2.80,-2.01,0.29,3.12,0.154223,0.052577,...,0.028083,0.072686,138.191526,0.046921,0.137634,0.206431,0.251784,0.257674,0.099556,0.404444
3,1963-11,-0.85,-0.88,1.75,-0.51,2.24,0.27,-0.74,0.171368,0.077462,...,0.085728,0.094405,65.797276,0.088687,0.196203,0.130505,0.246332,0.177283,0.160990,0.132693
4,1963-12,1.83,-2.10,-0.02,0.03,-0.07,0.29,1.75,1.493974,0.207698,...,0.081876,0.639916,42.530085,0.015738,0.113206,0.294017,0.253118,0.287177,0.036744,-0.163020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,2023-11,8.84,-0.12,1.64,-3.91,-1.00,0.44,2.75,0.863721,0.412867,...,0.145862,0.561597,17.300356,0.066922,0.140002,0.158861,0.135009,0.396280,0.102925,0.194203
724,2023-12,4.87,7.32,4.93,-3.07,1.32,0.43,-5.51,0.547768,1.151056,...,0.090708,1.064724,19.922995,0.091632,0.043606,0.122245,0.142022,0.553352,0.047142,1.402778
725,2024-01,0.71,-5.74,-2.38,0.69,-0.96,0.47,5.19,0.406426,0.973010,...,0.102177,0.804803,21.955604,0.112066,0.046810,0.095526,0.243243,0.445762,0.056593,-0.382848
726,2024-02,5.06,-0.78,-3.49,-1.99,-2.14,0.42,4.96,0.537473,0.613752,...,0.087964,0.581611,23.200542,0.080195,0.070228,0.126650,0.158816,0.490003,0.074109,-1.088073


In [14]:
estimate_models(comp_port, f'returns', '')



 CAPM
                            OLS Regression Results                            
Dep. Variable:                returns   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     3.481
Date:                Tue, 07 May 2024   Prob (F-statistic):             0.0625
Time:                        23:45:01   Log-Likelihood:                -1060.9
No. Observations:                 728   AIC:                             2126.
Df Residuals:                     726   BIC:                             2135.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.3333      0.039      8.573 

                            OLS Regression Results                            
Dep. Variable:                returns   R-squared:                       0.387
Model:                            OLS   Adj. R-squared:                  0.385
Method:                 Least Squares   F-statistic:                     152.6
Date:                Tue, 07 May 2024   Prob (F-statistic):           1.20e-76
Time:                        23:45:01   Log-Likelihood:                -884.21
No. Observations:                 728   AIC:                             1776.
Df Residuals:                     724   BIC:                             1795.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2510      0.031      8.150      0.0