#Importing Libraries

In [0]:
import numpy as np
import pandas as pd

#1. Load PredictorData2017.xlsx file.

In [0]:
predictors = pd.read_excel('PredictorData2017.xlsx')#,parse_dates[0],index_col =0)

In [0]:
predictors.head()

Unnamed: 0,yyyymm,Index,D12,E12,b/m,tbl,AAA,BAA,lty,ntis,Rfree,infl,ltr,corpr,svar,csp,CRSP_SPvw,CRSP_SPvwx
0,187101,4.44,0.26,0.4,,,,,,,0.004955,,,,,,,
1,187102,4.5,0.26,0.4,,,,,,,0.004514,,,,,,,
2,187103,4.61,0.26,0.4,,,,,,,0.004243,,,,,,,
3,187104,4.74,0.26,0.4,,,,,,,0.004632,,,,,,,
4,187105,4.86,0.26,0.4,,,,,,,0.003691,,,,,,,


#2. This file contains data that can be used to construct a number of predictors (you don’t have to know what these mean).  These include:
* The E/P ratio (column D divided by column B)
* The term spread (column I minus column F)
* The default spread (column H minus column G)
* Net issuance (column J)
* The data also contains market returns in column Q and risk-free returns in column K.  
#Use Q and K to compute the excess return on the market. (market return – risk free return) 


In [0]:
# Creating the 4 predictors 
predictors['E/P_Ratio'] = predictors['E12']/predictors['Index']
predictors['Term_Spread'] = predictors['lty'] - predictors['tbl']
predictors['Default_Spread'] = predictors['BAA'] - predictors['AAA']
predictors['Net_Issuance'] = predictors['ntis']
# Creating 'Excess_Market_Return' column
predictors['Excess_Market_Return'] = predictors['CRSP_SPvw']-predictors['Rfree']
# Filter out all of the useless columns
predictors = predictors[['yyyymm','E/P_Ratio','Term_Spread','Default_Spread','Net_Issuance','Excess_Market_Return']]

predictors.head()

Unnamed: 0,yyyymm,E/P_Ratio,Term_Spread,Default_Spread,Net_Issuance,Excess_Market_Return
0,187101,0.09009,,,,
1,187102,0.088889,,,,
2,187103,0.086768,,,,
3,187104,0.084388,,,,
4,187105,0.082305,,,,


#3. Drop data before 1950 (we don’t want to use too old/irrelevant data in our models).

In [0]:
# Convert 'yyyymm' into a datetime with format 'yyyy-mm'
predictors['yyyymm'] = pd.to_datetime(predictors['yyyymm'], format='%Y%m')
predictors.head()

Unnamed: 0,yyyymm,E/P_Ratio,Term_Spread,Default_Spread,Net_Issuance,Excess_Market_Return
0,1871-01-01,0.09009,,,,
1,1871-02-01,0.088889,,,,
2,1871-03-01,0.086768,,,,
3,1871-04-01,0.084388,,,,
4,1871-05-01,0.082305,,,,


In [0]:
# Getting rid of everything before 1950
predictors = predictors[predictors['yyyymm']>='1950-01-01']
predictors
# Set the date as my dataframe index
predictors = predictors.set_index('yyyymm')
predictors

Unnamed: 0_level_0,E/P_Ratio,Term_Spread,Default_Spread,Net_Issuance,Excess_Market_Return
yyyymm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1950-01-01,0.137048,0.0108,0.0067,0.027102,0.018811
1950-02-01,0.136663,0.0102,0.0066,0.025491,0.018670
1950-03-01,0.137073,0.0103,0.0066,0.029291,0.007252
1950-04-01,0.134293,0.0099,0.0063,0.026396,0.044929
1950-05-01,0.132233,0.0097,0.0064,0.028571,0.045935
...,...,...,...,...,...
2017-08-01,0.042911,0.0141,0.0068,-0.012138,0.001396
2017-09-01,0.042503,0.0156,0.0067,-0.011027,0.019192
2017-10-01,0.041943,0.0154,0.0072,-0.012358,0.023266
2017-11-01,0.042148,0.0137,0.0070,-0.012243,0.030363


#4. Run an out-of-sample analysis of the four predictive variables given above.  The dependent variable is the excess market return. Use a linear regression for prediction (you can try other models if you want).


---

#5. All regressions should use exactly 10 years of lagged data.  So your first prediction will be made in December of 1959.  You will be predicting the January 1960 return.  Move the sample forward one month and repeat (the dependent variable should be the one month return and the independent variables are all four predictors 10 year lagged data). This is a “rolling regression”.

* In other words, each month, you train a new regression based on 10 years of lagged predictor data, so your window length of predictors is fixed, 10 years, and the trained model is incrementally moving forward to make a forecast for next month’s excess returns.

In [0]:
# Create a predictors dataframe (x) 
x = predictors
x = predictors.iloc[:, 0:4]
x

Unnamed: 0_level_0,E/P_Ratio,Term_Spread,Default_Spread,Net_Issuance
yyyymm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1950-01-01,0.137048,0.0108,0.0067,0.027102
1950-02-01,0.136663,0.0102,0.0066,0.025491
1950-03-01,0.137073,0.0103,0.0066,0.029291
1950-04-01,0.134293,0.0099,0.0063,0.026396
1950-05-01,0.132233,0.0097,0.0064,0.028571
...,...,...,...,...
2017-08-01,0.042911,0.0141,0.0068,-0.012138
2017-09-01,0.042503,0.0156,0.0067,-0.011027
2017-10-01,0.041943,0.0154,0.0072,-0.012358
2017-11-01,0.042148,0.0137,0.0070,-0.012243


In [0]:
# Creating a response variable dataframe (y)
y = predictors.iloc[:, 4:5]
y

Unnamed: 0_level_0,Excess_Market_Return
yyyymm,Unnamed: 1_level_1
1950-01-01,0.018811
1950-02-01,0.018670
1950-03-01,0.007252
1950-04-01,0.044929
1950-05-01,0.045935
...,...
2017-08-01,0.001396
2017-09-01,0.019192
2017-10-01,0.023266
2017-11-01,0.030363


In [0]:
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

lag = 120 #we need at least a 10 years of data before we can train our model
# Let's make a dataframe for our y predictions
y_pred = pd.DataFrame(index=y.index)
y_pred['pred_Excess_Market_Return'] = np.nan
y_pred


Unnamed: 0_level_0,pred_Excess_Market_Return
yyyymm,Unnamed: 1_level_1
1950-01-01,
1950-02-01,
1950-03-01,
1950-04-01,
1950-05-01,
...,...
2017-08-01,
2017-09-01,
2017-10-01,
2017-11-01,


In [0]:
for i in range(lag,len(x)): # From month 120 to last month
    x_train = x.iloc[0:i] # Our x training data will be the predictors data before 1960
    y_train = y.iloc[0:i] # Our y training data will be the response data before 1960

    x_test = x.iloc[i:i+1] # Our x test data is now the predictors data from 1960 through 2017
    
    model=LinearRegression()
    # Train our linear regression model on training data
    fit_model=model.fit(x_train, y_train)
    # Given we have trained our linear regression model, 
    # we can start moving our sampling one month at a time and predict excess
    # market return from 1960 through 2017. 
    y_pred.iloc[i] = fit_model.predict(x_test)

y_pred

Unnamed: 0_level_0,pred_Excess_Market_Return
yyyymm,Unnamed: 1_level_1
1950-01-01,
1950-02-01,
1950-03-01,
1950-04-01,
1950-05-01,
...,...
2017-08-01,0.007700
2017-09-01,0.007992
2017-10-01,0.007951
2017-11-01,0.007665


In [0]:
# Let's drop every 'NA' for our predicted excess market returns
y_pred['pred_Excess_Market_Return'].dropna()

yyyymm
1960-01-01    0.006595
1960-02-01    0.001792
1960-03-01    0.007811
1960-04-01    0.011156
1960-05-01    0.009105
                ...   
2017-08-01    0.007700
2017-09-01    0.007992
2017-10-01    0.007951
2017-11-01    0.007665
2017-12-01    0.007868
Name: pred_Excess_Market_Return, Length: 696, dtype: float64

### RMSE Evaluation


In [0]:
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(y_pred.iloc[lag:], y.iloc[lag:]))

0.04319508550384095

#6. If m(t) is your return excess forecast at time t, then set a portfolio weight equal to:
 
###w(t) = min{1.5, max{-.5, 100×m(t)}}.
 


In [0]:
max(y_pred['pred_Excess_Market_Return'].dropna())

0.044420436379202774

In [0]:
t_min = 120 # month 120 -> '1960-01-01'
portfolio_weight = [] # Create an empty list of portfolio_weights

for t in range(t_min,len(y_pred['pred_Excess_Market_Return'])): #From month 120 to last month associated with our predicted response variable
  # Implementation of our weight formula
  portfolio_weight.append(min(1.5, max(-0.5, 100*y_pred['pred_Excess_Market_Return'][t])))


In [0]:
len(portfolio_weight)

696

#7. Given the weight w(t), your portfolio’s excess return in time t+1 will therefore be w(t) x r(t+1), where r is the actual excess market return (not predicted). Compute your portfolio return.
 

In [0]:
# Actual excess market returns
actual_excess_market_returns = y[y.index >= '1960-01-01']
actual_excess_market_returns

Unnamed: 0_level_0,Excess_Market_Return
yyyymm,Unnamed: 1_level_1
1960-01-01,-0.073558
1960-02-01,0.010622
1960-03-01,-0.015324
1960-04-01,-0.018805
1960-05-01,0.030771
...,...
2017-08-01,0.001396
2017-09-01,0.019192
2017-10-01,0.023266
2017-11-01,0.030363


In [0]:
# Create a list of portfolio returns
portfolio_return = [] 

for t in range(0,len(portfolio_weight)-1): # From the first weight to the 
                                           # appropriate second to last month 
                                           # associated with our weights (given 
                                           # we want want a portfolio return for 
                                           # for the last month in 2017)
  # Implementation of portfolio return formula
  portfolio_return.append(portfolio_weight[t]*actual_excess_market_returns['Excess_Market_Return'][t+1])

portfolio_return

[0.007005373031708603,
 -0.002746004510031664,
 -0.014687636783513333,
 0.03432836548796396,
 0.01798432782362968,
 -0.037297000000000004,
 0.0443255,
 -0.0911695,
 -0.0035309279581662322,
 0.05761694138165109,
 0.070446,
 0.09368,
 0.046231999999999995,
 0.04103,
 0.0039430000000000055,
 0.032467,
 -0.042922,
 0.04789249999999999,
 0.03410749999999999,
 -0.030861,
 0.03938749999999999,
 0.06382908161559464,
 0.0034200485854643347,
 -0.059434,
 0.027133022653178848,
 -0.0035156860596154836,
 -0.07806967858189094,
 -0.08274551147493409,
 -0.051313746647827674,
 0.05626573290021552,
 -0.0031834626999513362,
 -0.008690506970850002,
 0.00051797203816819,
 0.047263221946385006,
 0.010619323593341211,
 0.071913,
 -0.03993349999999999,
 0.051259000000000006,
 0.06998,
 0.025848499999999993,
 -0.031645000000000006,
 -0.006706500000000001,
 0.076049,
 -0.019828000000000005,
 0.047924999999999995,
 -0.011130499999999995,
 0.034697500000000006,
 0.036686500000000004,
 0.01973936066073035,
 0.0152

#8. Assess this strategy using mean, and standard deviation.  Report corresponding values for the strategy that simply puts 100% into the market (this is a baseline model) and see which one performs better.

##A Base Model Assessment (Your basically buying everything so the base model returns simply be the actual returns you'd have if you were to 100% into the market every month)

In [0]:
np.mean(actual_excess_market_returns['Excess_Market_Return'])

0.005119051245210725

In [0]:
np.std(actual_excess_market_returns['Excess_Market_Return'])

0.042271595780990066

##Strategy Assessment

In [0]:
np.mean(portfolio_return) # Greater mean return than the base model's mean return

0.006308344661332584

In [0]:
np.std(portfolio_return) # However, greater stdev than the base model's stdev

0.048359006994840886