In [1]:
import numpy as np
np.random.seed(1)
import pandas as pd
import scipy

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

from xgboost.sklearn import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor

from functools import partial

import matplotlib.pyplot as plt
import seaborn as sns

from xgb_utils import *

In [2]:
data = pd.read_csv('clean_monthly_data.csv')
data

Unnamed: 0,Date,unemployment_rate,CPI,treasury_yield,GDP_growth,SP500_return,AZN,BMY,JNJ,LLY,MRK,NVO,NVS,PFE,ROG,inflation_change,unemp_change,treasury_yield_change
0,2000-02-01,4.1,170.000,6.661000,0.496560,-1.522563,-12.828964,-13.228004,-16.339821,-11.121498,-21.701151,2.220031,3.838386,-11.226228,54.440789,1.000,-0.1,-0.141500
1,2000-03-01,4.0,171.000,6.519500,0.511258,9.413333,22.264136,-0.218329,-2.079067,5.804243,0.913712,8.390897,6.420237,14.101954,6.922258,1.000,-0.1,-0.141500
2,2000-04-01,3.8,170.900,6.256522,1.327803,-3.266805,5.567379,-8.205683,17.437698,23.153694,12.400712,-0.097663,2.559423,15.213674,7.370518,-0.100,-0.2,-0.262978
3,2000-05-01,4.0,171.200,5.990526,-0.181797,-1.572223,-0.148357,5.395746,8.484832,-1.296597,7.374072,20.863985,5.169310,5.638019,-8.163265,0.300,0.2,-0.265995
4,2000-06-01,4.0,172.200,6.440455,0.305565,1.728613,10.549735,5.788826,14.239888,31.641749,3.078671,2.813690,8.474599,8.076012,13.131313,1.000,0.0,0.449928
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,2023-01-01,3.4,300.536,3.616190,0.390254,6.776820,-3.584079,0.972908,-7.489384,-5.928822,-2.549213,2.541749,-0.110227,-13.817335,16.968326,1.546,-0.1,-0.274810
276,2023-02-01,3.6,301.648,3.531500,-0.442183,-2.514271,-0.290649,-4.328217,-6.217115,-9.568502,-1.089288,1.592445,-7.172811,-7.286115,5.451681,1.112,0.2,-0.084690
277,2023-03-01,3.5,301.808,3.746842,-0.442183,3.313488,8.035329,0.507544,1.862736,10.703390,0.141189,12.873250,9.367574,0.566924,11.025813,0.160,-0.1,0.215342
278,2023-04-01,3.4,302.918,3.663043,-0.442183,1.985238,5.489119,-3.664707,5.612908,15.269915,9.289214,5.836894,16.334413,-4.681371,-1.517467,1.110,-0.1,-0.083799


In [3]:
macros = ['unemployment_rate', 'GDP_growth', 'SP500_return', 'inflation_change', 'unemp_change', 'treasury_yield_change']

for m in macros:
    data[f'{m}_lag'] = data[m].shift(1)
    data[f'{m}_lag2'] = data[m].shift(2)

In [4]:
data['Quater_dummy'] = 0
data['Quater_rippel'] = 0
data['Date'] = pd.to_datetime(data['Date'])
for index, row in data.iterrows():
    if row['Date'].month in [3, 6, 9, 12]:
        data.loc[index, 'Quater_dummy'] = 1
    if row['Date'].month in [1, 4, 7, 10]:
        data.loc[index, 'Quater_dummy'] = 1


In [5]:
tickers = ['NVS', 'AZN', 'BMY', 'JNJ', 'LLY', 'MRK', 'NVO', 'PFE', 'ROG']

for t in tickers:
    data[f'{t}_lag'] = data[t].shift(1)
    data[f'{t}_lag2'] = data[t].shift(2)

#### 1. Macros Only

In [6]:
data_macro = data.copy()
to_exclude = []

for t in tickers:
    to_exclude.append(t)
    to_exclude.append(f'{t}_lag')
    to_exclude.append(f'{t}_lag2')


to_exclude.append('Date')

1 month

In [7]:
dates1m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01']

params = {'n_estimators': [100, 150, 200],
          'max_depth': [3, 5, 7],
          'learning_rate': [0.01, 0.1]}

In [8]:
for t in tickers:
    loss = get_model_performance(data_macro, t, 0.05, to_exclude, dates1m, 1, params)
    print(f'{t}-1m-loss: {loss}')

NVS-1m-loss: 3.2618896020753922
AZN-1m-loss: 2.3879767866548227
BMY-1m-loss: 3.3772761038209924
JNJ-1m-loss: 2.775884336227969
LLY-1m-loss: 2.724265567092514
MRK-1m-loss: 1.7247076488973418
NVO-1m-loss: 2.2703321926043105
PFE-1m-loss: 4.4454702710296905
ROG-1m-loss: 6.529772320564978


In [9]:
dates3m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01', '2023-02-01']

for t in tickers:
    loss = get_model_performance(data_macro, t, 0.05, to_exclude, dates3m, 3, params)
    print(f'{t}-1m-loss: {loss}')

NVS-1m-loss: 7.6856894581403505
AZN-1m-loss: 7.913814541466462
BMY-1m-loss: 10.320303291058712
JNJ-1m-loss: 8.557777771943062
LLY-1m-loss: 9.370765301931797
MRK-1m-loss: 4.76074037038329
NVO-1m-loss: 6.529063424849665
PFE-1m-loss: 14.277869771777205
ROG-1m-loss: 22.197556289628714


In [10]:
dates6m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01']

for t in tickers:
    loss = get_model_performance(data_macro, t, 0.05, to_exclude, dates3m, 6, params)
    print(f'{t}-1m-loss: {loss}')

NVS-1m-loss: 20.16495909932879
AZN-1m-loss: 16.02819610088363
BMY-1m-loss: 25.541271103880096
JNJ-1m-loss: 23.563171157674816
LLY-1m-loss: 22.075649022442207
MRK-1m-loss: 11.503687753886336
NVO-1m-loss: 14.521913364211676
PFE-1m-loss: 37.24632054716845
ROG-1m-loss: 60.22955917742569


In [11]:
dates9m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01']

for t in tickers:
    loss = get_model_performance(data_macro, t, 0.05, to_exclude, dates3m, 9, params)
    print(f'{t}-9m-loss: {loss}')

NVS-9m-loss: 48.428739769688015
AZN-9m-loss: 31.654271872536
BMY-9m-loss: 59.31931111117942
JNJ-9m-loss: 56.18294772545206
LLY-9m-loss: 50.85359430320336
MRK-9m-loss: 27.938960661463405
NVO-9m-loss: 32.00232499801434
PFE-9m-loss: 91.7555504113376
ROG-9m-loss: 112.29689827583852


In [12]:
dates12m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01']

for t in tickers:
    loss = get_model_performance(data_macro, t, 0.05, to_exclude, dates3m, 9, params)
    print(f'{t}-13m-loss: {loss}')

NVS-13m-loss: 48.428739769688015
AZN-13m-loss: 31.654271872536
BMY-13m-loss: 59.17253914529902
JNJ-13m-loss: 55.65514019745779
LLY-13m-loss: 50.968996357415136
MRK-13m-loss: 27.934788460652697
NVO-13m-loss: 31.927137535453873
PFE-13m-loss: 90.8494930067548
ROG-13m-loss: 112.29170217235227
