## Elastic Net Model Accuracy Comparison for some Stocks

---

|  |  |
|:---|:---|
|**Reading Time** |  10 minutes |
|**Packages used** | Defined with relevant DocStrings within repository   |
|**Model used** | Model already trained stored in the directory ./model/output|
|**Outcome** | Comparison of the model accuracy on the real dataset | 

---

In [1]:
import os
import pickle

import pandas as pd
import numpy as np
import yfinance as yf

import warnings
warnings.filterwarnings("ignore") # just to remove clutter in the notebook

from datetime import date, datetime, timedelta
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [2]:
os.chdir("..")

In [3]:
from model.utils.pre_processing import create_features_and_target_split, standardize_and_limit_outliers_returns, add_technical_indicators

In [4]:
ticker = "SBIN.NS"
end_date = datetime.today()
start_date = date(2016, 1, 1)
df_hist = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [5]:
param = {'training_end': end_date - timedelta(seconds=2 * 365.2425 * 24 * 60 * 60),
             'validation_end': end_date - timedelta(seconds=1 * 365.2425 * 24 * 60 * 60),
             'past_day_returns_for_predicting': 21}

In [6]:
df_hist = add_technical_indicators(df_hist)
df_hist['daily_returns'] = df_hist['Adj Close'].pct_change()
df_hist = df_hist.dropna()
dt_model = create_features_and_target_split(pd.DataFrame(df_hist['daily_returns']),
                                                param['past_day_returns_for_predicting'])
dt_model = pd.merge(dt_model, df_hist, how="left", left_index=True, right_index=True)
technical_indicator_features = ['RSI', 'MACD', 'UpperBollingerBand', 'LowerBollingerBand']

In [7]:
X_train, Y_train, X_val, Y_val, scaler = standardize_and_limit_outliers_returns(dt_model, param['past_day_returns_for_predicting'], technical_indicator_features, **param)

In [42]:
model = ElasticNet(alpha=0, l1_ratio=0)
model.fit(X_train, Y_train)

In [46]:
df_hist['actual_returns'] = df_hist['Adj Close'].pct_change()
df_hist = df_hist.dropna()
df_hist['predicted_returns'] = ""

In [27]:
df_training = df_hist[df_hist.index <= param['training_end']]
df_test = df_hist[df_hist.index > param['validation_end']]

In [23]:
rol_freq_for_prediction = 60
df['rolling_returns'] = df['actual_returns']
for i in range(rol_freq_for_prediction, len(df_training)):
    features_ls = np.array(df_training.iloc[i - rol_freq_for_prediction:i, 0:df_training.shape[1]]['actual_returns'].to_list())
    features_ls = features_ls.reshape(-1,1)
    new_scaled_ls = scaler.transform(features_ls.T)
    predicted_return = model.predict(new_scaled_ls)[0]
    df.iloc[i, df.columns.get_loc('rolling_returns')] = predicted_return
    df.iloc[i, df.columns.get_loc('predicted_returns')] = predicted_return

In [15]:
rol_freq_for_prediction = 60
for i in range(rol_freq_for_prediction, len(df_training)):
    features_ls = np.array(df_training.iloc[i - rol_freq_for_prediction:i, 0:df_training.shape[1]]['actual_returns'].to_list())
    features_ls = features_ls.reshape(-1,1)
    new_scaled_ls = scaler.transform(features_ls.T)
    predicted_return = model.predict(new_scaled_ls)[0]
    df_training.iloc[i, df_training.columns.get_loc('predicted_returns')] = predicted_return

NameError: name 'scaler' is not defined

In [33]:
y_pred = model.predict(X_train)

In [34]:
df = pd.DataFrame(y_pred)

In [35]:
mse = mean_squared_error(Y_train, y_pred)

In [36]:
r2 = r2_score(Y_train, y_pred)

In [37]:
df.to_clipboard()

In [38]:
df_actual = pd.DataFrame(Y_train)

In [39]:
df_actual.to_clipboard()

In [40]:
r2

0.3687790044646897

In [43]:
df_hist.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'RSI', 'MACD',
       'UpperBollingerBand', 'LowerBollingerBand', 'daily_returns'],
      dtype='object')

In [44]:
X_train.shape

(1259, 25)

In [45]:
df_tr.shape

NameError: name 'df_training' is not defined