In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNet, Lasso
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler, normalize

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
macro = pd.read_csv('../data/again1.csv')
tech = pd.read_csv('../data/technical_dataset.csv')

In [5]:
macro

Unnamed: 0,target,DTB3,MCUMFN,CPIAUCSL,GEPU_current,IPG211111CN,Killian,IRLTLT01USM156N,M2SL,Close,CFNAI,UNRATE,Imports,Production,Stocks,target_data,mean_by_month
0,24.20,5.033810,82.0867,159.400,76.906476,98.3693,-4.346762,6.58,3834.6,786.16,0.26,5.3,7218.20,6489.4,838890.8,1997-02,22.176316
1,24.00,5.033810,82.0867,159.400,76.906476,98.3693,-4.346762,6.58,3834.6,786.16,0.26,5.3,7218.20,6489.4,838890.8,1997-02,22.176316
2,23.90,5.033810,82.0867,159.400,76.906476,98.3693,-4.346762,6.58,3834.6,786.16,0.26,5.3,7218.20,6489.4,838890.8,1997-02,22.176316
3,23.05,5.033810,82.0867,159.400,76.906476,98.3693,-4.346762,6.58,3834.6,786.16,0.26,5.3,7218.20,6489.4,838890.8,1997-02,22.176316
4,22.30,5.033810,82.0867,159.400,76.906476,98.3693,-4.346762,6.58,3834.6,786.16,0.26,5.3,7218.20,6489.4,838890.8,1997-02,22.176316
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6081,63.03,0.031304,76.3791,265.028,212.409145,165.8403,24.093239,1.61,19853.7,3972.89,2.22,6.0,5686.25,10975.0,1138710.0,2021-04,61.716667
6082,63.81,0.031304,76.3791,265.028,212.409145,165.8403,24.093239,1.61,19853.7,3972.89,2.22,6.0,5686.25,10975.0,1138710.0,2021-04,61.716667
6083,65.00,0.031304,76.3791,265.028,212.409145,165.8403,24.093239,1.61,19853.7,3972.89,2.22,6.0,5686.25,10975.0,1138710.0,2021-04,61.716667
6084,63.50,0.031304,76.3791,265.028,212.409145,165.8403,24.093239,1.61,19853.7,3972.89,2.22,6.0,5686.25,10975.0,1138710.0,2021-04,61.716667


In [6]:
#macro_date = macro['date']
y = macro['target']
hs = macro['mean_by_month']
macro.drop(columns=['target','mean_by_month', 'target_data'], inplace=True)
tech_date = tech['date']
tech.drop(columns=['date'], inplace=True)

In [7]:
data = pd.concat([macro, tech], axis=1)

# Fit using five different window sizes (30%, 35%, 40%, 45%, and 50% of estimation sample) to estimate LASSO parameters

In [10]:
def window_fit(x, y, hs, model, window_size):
    window = int(x.shape[0]*window_size)
    x_train = x.iloc[:window]
    x_test = x.iloc[window:]
    y_train = y.iloc[:window]
    y_test = y.iloc[window:]
    hs_test = hs.iloc[window:]
    
    scaler = StandardScaler()
    scaler.fit(x_train)
    x_train_sc = scaler.transform(x_train)
    x_test_sc = scaler.transform(x_test)

    model.fit(x_train_sc, y_train)
    pred = model.predict(x_test_sc)
    
    print("R2", r2_score(y_test, pred))
    print("R2OS", 1 - mean_squared_error(y_test, pred)/mean_squared_error(y_test, hs_test))
    print("MSE", mean_squared_error(y_test, pred))
    return model

In [13]:
windows = [0.3, 0.35, 0.4, 0.45, 0.5]
for i in windows:
    model = window_fit(data, y, hs, ElasticNet(), i)

R2 -3.685266176273694
R2OS -282.9926204815684
MSE 2427.8432367556734
R2 -2.43354658748185
R2OS -199.03027336676422
MSE 1788.7765993148944
R2 -1.5912432897942654
R2OS -153.30589638522906
MSE 1450.8239694021013
R2 -1.2221056439403188
R2OS -135.20021812943205
MSE 1323.0910537283564
R2 -0.5805344641489896
R2OS -104.35117405234882
MSE 791.4584372022588


In [152]:
for i in windows:
    model = window_fit(data, y, hs, ElasticNet(), i)

R2 -3.952235250614655
R2OS -299.1746780546405
MSE 2566.183095618745
R2 -2.057586544942382
R2OS -177.12773377160352
MSE 1592.91249517724
R2 -2.3024373208050686
R2OS -195.65677593836412
MSE 1849.0178986059072
R2 -2.5712234521282546
R2OS -217.89211905618802
MSE 2126.3857608485046
R2 -6.8506477066335005
R2OS -522.2881482344283
MSE 3931.2406694422666


# FIt ElasticNet and Lasso with a fixed number of selected predictors. Аnd we also use a different value of the alpha parameter(0.3, 0.5, 0.7) for ElasticNet

In [14]:
coef = model.coef_
cols = data.columns
zipped = list(zip(cols, coef))

res = sorted(zipped, key = lambda x: x[1])

features = []
for i in res:
    if (i[1] != 0.0) and (len(features)< 10):
        features.append(i[0])

x_train = data[features].iloc[:5813]
y_train = y.iloc[:5813]
x_test = data[features].iloc[5813:]
y_test = y.iloc[5813:]
hs_test = hs.iloc[5813:]


In [15]:
alpha = [0.3, 0.5, 0.7]

In [16]:
for i in alpha:
    model = ElasticNet(alpha=i)
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    print("R2", r2_score(y_test, pred))
    print("R2OS", 1 - mean_squared_error(y_test, pred)/mean_squared_error(y_test, hs_test))
    print("MAPE", mean_squared_error(y_test, pred))

R2 -17.019366883504514
R2OS -180.71301805295562
MAPE 3157.89261646746
R2 -15.395547486364261
R2OS -164.33790757682667
MAPE 2873.3183959713983
R2 -14.073712381149349
R2OS -151.00810260144598
MAPE 2641.666898672297


In [159]:
model = Lasso()
model.fit(x_train, y_train)
pred = model.predict(x_test)
print("R2", r2_score(y_test, pred))
print("R2OS", 1 - mean_squared_error(y_test, pred)/mean_squared_error(y_test, hs_test))
print("MAPE", mean_squared_error(y_test, pred))

R2 -15.408708608040747
R2OS -164.47062850738197
MAPE 2875.6248815008457
