## Elastic Net Model Accuracy Comparison for some Stocks

---

|  |  |
|:---|:---|
|**Reading Time** |  10 minutes |
|**Packages used** | Defined with relevant DocStrings within repository   |
|**Model used** | Model already trained stored in the directory ./model/output|
|**Outcome** | Comparison of the model accuracy on the real dataset | 

---

In [1]:
import os
import pickle

import pandas as pd
import numpy as np
import yfinance as yf

import warnings
warnings.filterwarnings("ignore") # just to remove clutter in the notebook

from datetime import date, datetime, timedelta
from sklearn.linear_model import ElasticNet

In [2]:
os.chdir("..")

In [3]:
from model.utils.pre_processing import train_validation_test_split

In [4]:
ticker = "SBIN.NS"
end_date = datetime.today()
start_date = date(2016, 1, 1)
df = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [5]:
param = {'training_end': end_date - timedelta(seconds=2 * 365.2425 * 24 * 60 * 60),
             'validation_end': end_date - timedelta(seconds=1 * 365.2425 * 24 * 60 * 60),
             'past_day_returns_for_predicting': 60}

In [6]:
X_train_norm, Y_train, X_val_norm, Y_val, scaler = train_validation_test_split(df, **param)

In [28]:
from sklearn.svm import SVR

In [41]:
model = ElasticNet(alpha=0.001, l1_ratio=0.2)
model.fit(X_train_norm, Y_train)

In [23]:
df['actual_returns'] = df['Adj Close'].pct_change()
df = df.dropna()
df['predicted_returns'] = ""

In [46]:
df_training = df[df.index <= param['training_end']]

In [23]:
rol_freq_for_prediction = 60
df['rolling_returns'] = df['actual_returns']
for i in range(rol_freq_for_prediction, len(df)):
    features_ls = np.array(df.iloc[i - rol_freq_for_prediction:i, 0:df.shape[1]]['rolling_returns'].to_list())
    features_ls = features_ls.reshape(-1,1)
    new_scaled_ls = scaler.transform(features_ls.T)
    predicted_return = model.predict(new_scaled_ls)[0]
    df.iloc[i, df.columns.get_loc('rolling_returns')] = predicted_return
    df.iloc[i, df.columns.get_loc('predicted_returns')] = predicted_return

In [47]:
rol_freq_for_prediction = 60
for i in range(rol_freq_for_prediction, len(df_training)):
    features_ls = np.array(df_training.iloc[i - rol_freq_for_prediction:i, 0:df_training.shape[1]]['actual_returns'].to_list())
    features_ls = features_ls.reshape(-1,1)
    new_scaled_ls = scaler.transform(features_ls.T)
    predicted_return = model.predict(new_scaled_ls)[0]
    df_training.iloc[i, df_training.columns.get_loc('predicted_returns')] = predicted_return

In [48]:
df_training.to_clipboard()

In [36]:
df_training.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,daily_returns,actual_returns,predicted_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-04-08,361.0,363.299988,354.299988,355.600006,346.932678,33370259,-0.008228,-0.008228,
2021-04-09,354.399994,364.5,351.200012,353.0,344.396027,46473100,-0.007312,-0.007312,
2021-04-12,344.0,344.0,322.549988,328.850006,320.834686,75501713,-0.068414,-0.068414,
2021-04-13,332.0,342.25,330.5,341.0,332.688538,49773360,0.036947,0.036947,
2021-04-15,342.100006,347.549988,336.100006,342.700012,334.347107,52992349,0.004985,0.004985,


In [42]:
y_pred = model.predict(X_train_norm)

In [44]:
df_check = pd.DataFrame({'y_actual' : Y_train, 'y_predicted':y_pred})

In [45]:
df_check.to_clipboard()