In [24]:
from Models import ML_Models
exam_models = ML_Models()

In [25]:
import yfinance as yf
import pandas as pd
import inspect
import datetime as dt
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns

In [26]:
# Parameters used for fetching the data from yfinance.
start_date = "2012-01-01"
end_date = dt.date.today()
main_col = "Adj Close"
interval = "1d"
stocks_list = ["EQNR.OL", "DNB.OL", "TEL.OL", "NHY.OL", "AKRBP.OL", "YAR.OL", "MOWI.OL", "CL=F", "OSEBX.OL"]

# Specifying the indicators wanted for further analysis.
indicators = ["MA5", "MA20", "MA50", "MA200", "MIN", "MAX", "LOG_RET", "MOM", "VOLA", "DIFF"]

# Models to utilize for forecasting/prediction.
models = ["LR", "DTR", "MLP", "XGBoost", "XGBoost_LR", "ADA", "GBR", "Bagging", "StackedRegressor"]

# Metrics used to evaluate the performance of each model.
# MAE, MSE, RMSE and MAPE are named with "neg_" to be recognized by the cross_validate function from scikit-learn.
metric_names = ["r2", "neg_mean_absolute_error", "neg_mean_squared_error", "neg_root_mean_squared_error", "neg_mean_absolute_percentage_error"]
pretty_metric_names = {"r2":"R^2: ", "neg_mean_absolute_error":"MAE: ", "neg_mean_squared_error":"MSE: ","neg_root_mean_squared_error":"RMSE: ", "neg_mean_absolute_percentage_error":"MAPE: "}

# Collecting data from Yahoo Finance

In [27]:
stock_data = {}
for ticker in stocks_list:
    print(f"Downloading {ticker} data")
    # fetch stock data from yahoo finance
    raw_data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
    stock_data[ticker] = raw_data

print("All the data is now downloaded!")

# Save fetched data to csv
for ticker in stocks_list:
    #stock_data[ticker].to_csv("raw_data/data_"+ticker+".csv")

Downloading EQNR.OL data
[*********************100%***********************]  1 of 1 completed
Downloading DNB.OL data
[*********************100%***********************]  1 of 1 completed
Downloading TEL.OL data
[*********************100%***********************]  1 of 1 completed
Downloading NHY.OL data
[*********************100%***********************]  1 of 1 completed
Downloading AKRBP.OL data
[*********************100%***********************]  1 of 1 completed
Downloading YAR.OL data
[*********************100%***********************]  1 of 1 completed
Downloading MOWI.OL data
[*********************100%***********************]  1 of 1 completed
Downloading CL=F data
[*********************100%***********************]  1 of 1 completed
Downloading OSEBX.OL data
[*********************100%***********************]  1 of 1 completed
All the data is now downloaded!


# Preproccesing the data

In [28]:
def add_indicator_columns(data, indicators):
    # Creating label and shifting the selected main_col value by 1.
    label_name = "Label"
    data[label_name] = data[main_col].shift(periods=1)

    # Checking which of the different indicators that should be added as a column (based on input from indicators list).
    if "MA5" in indicators:
        data["MA5"] = data[label_name].rolling(5).mean()
    if "MA20" in indicators:
        data["MA20"] = data[label_name].rolling(20).mean()
    if "MA50" in indicators:
        data["MA50"] = data[label_name].rolling(50).mean()
    if "MA200" in indicators:
        data["MA200"] = data[label_name].rolling(200).mean()
    if "MIN" in indicators:
        data["MIN"] = data[label_name].rolling(20).min()
    if "MAX" in indicators:
        data["MAX"] = data[label_name].rolling(20).max()
    log_ret = np.log(data[label_name] / data[label_name].shift(1))
    if "LOG_RET" in indicators:
        data["LOG_RET"] = log_ret
    if "MOM" in indicators:
        data["MOM"] = log_ret.rolling(20).mean()
    if "VOLA" in indicators:
        data["VOLA"] = log_ret.rolling(20).std()
    if "DIFF" in indicators:
        data["DIFF"] = data[label_name] - data[label_name].shift(1)

    # remove empty vals.
    data.dropna(axis=0, inplace=True)

In [29]:
def create_X_y_arrays(data, label_name):
        # array that contains the indicators data.
        X = data.loc[:, indicators].to_numpy()
        # array with the target data (based on main_col).
        y = data[label_name].to_numpy()
        return X, y

In [30]:
n_splits = 5
tscv = TimeSeriesSplit(n_splits=n_splits)

def create_X_y_train_test_split(X, y, current_stock):
    data = stock_data[current_stock]

    for train_index, test_index in tscv.split(data):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

    return X_train, X_test, y_train, y_test

In [31]:
# Adding the specified indicators to the data.
for ticker, data in stock_data.items():
    add_indicator_columns(data=data, indicators=indicators)

  result = getattr(ufunc, method)(*inputs, **kwargs)


# Training models

In [32]:
cv_results = {}
trained_models = {}

def train_models(input_models):
        for ticker, data in stock_data.items():

            X, y = create_X_y_arrays(data=data, label_name="Label")

            X_train, X_test, y_train, y_test = create_X_y_train_test_split(X=X, y=y, current_stock=ticker)

            # Evaluating and training selected models.
            for model_i in input_models:
                model = exam_models.pick_model(model=model_i)
                metric_dict = {}
                for metric_name in metric_names:
                    metric_dict[metric_name] = metric_name
                    
                # using method from sci-kit lib to cross-validate
                cross_val_results = cross_validate(
                    model,
                    X,
                    y,
                    cv=tscv,
                    scoring=metric_dict,
                    return_train_score=True,
                    n_jobs=-1,
                    verbose=0  
                )
                model.fit(X_train, y_train)
                cv_results[ticker+"_model_"+model_i] = cross_val_results 
                trained_models["trained_model_"+model_i+"_"+ticker] = model

        return cv_results

In [33]:
cv_stocks_models = train_models(input_models=models)



# Predicting values based on trained models 

In [34]:
stock_predictions = {}
def predict_trained_models(input_models):
    for ticker, data in stock_data.items():

        # Creating X and y arrays for train and test sets.
        X, y = create_X_y_arrays(data=data, label_name= "Label")

        X_train, X_test, y_train, y_test = create_X_y_train_test_split(X=X, y=y, current_stock=ticker)

        last_train_index, last_test_index = None, None

        for train_index, test_index in tscv.split(data):
            last_train_index, last_test_index = train_index, test_index

        prediction = data.loc[data.index[last_test_index], [main_col]].copy(deep=True)
        stock_predictions[ticker] = prediction

        for model_i in input_models:
            model = trained_models["trained_model_"+model_i+"_"+ticker]
            y_pred = model.predict(X_test)
            prediction.loc[:, model_i+" Prediction"] = y_pred

    return stock_predictions

In [35]:
predicted_stock_data = predict_trained_models(input_models=models)

In [36]:
for ticker in stocks_list:
    print(ticker)
    display(predicted_stock_data[ticker])

EQNR.OL


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-08-11,177.834885,172.316810,172.243942,172.558017,173.507355,171.043854,172.470428,173.267839,173.068433,172.328178
2021-08-12,180.592270,176.806584,175.946594,176.924242,175.033615,174.888245,173.154510,176.667954,174.750446,176.703754
2021-08-13,181.174835,179.059650,175.822571,178.393636,179.794601,177.517609,175.524471,178.579158,179.634190,179.073726
2021-08-16,178.902863,178.961513,175.822571,178.197804,179.922943,177.458664,175.524471,178.967578,181.096266,178.954194
2021-08-17,183.330307,178.637226,181.806335,177.326883,181.090668,177.455292,177.672146,178.963687,179.980495,178.631437
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,297.100006,306.968082,189.201675,310.413344,194.206619,292.572083,191.309291,197.213052,192.543054,300.526632
2023-05-03,293.399994,295.793690,188.859863,297.858180,190.254135,283.107758,190.840377,191.138348,190.492087,289.607508
2023-05-04,296.149994,295.854591,188.859863,299.263676,189.804108,283.386871,190.840377,191.291655,190.645909,289.692049
2023-05-05,310.500000,299.235690,188.859863,303.349380,190.501648,286.541412,190.840377,192.103352,191.201379,292.963049


DNB.OL


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-06-24,165.566391,165.328389,160.323730,164.723173,159.140671,166.024445,152.813074,159.964588,159.194864,165.298703
2021-06-25,167.595779,163.058997,160.323730,162.243159,159.400848,163.052368,152.813074,159.157052,159.296333,163.043700
2021-06-28,163.832916,166.527248,160.323730,165.724866,159.256241,165.322906,152.813074,159.014374,159.194864,166.547620
2021-06-29,162.353134,163.436365,154.954224,162.098372,156.675751,161.854477,152.813074,156.363562,157.943385,163.368363
2021-06-30,158.632553,165.053061,157.829224,164.243310,157.786758,162.847305,152.813074,158.157395,157.638971,165.039035
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,187.000000,186.236150,158.970795,186.429129,157.936050,187.960373,152.813074,159.504216,157.638983,186.389280
2023-05-03,187.750000,184.115197,159.393570,183.943011,158.275589,186.103012,152.813074,158.501943,157.182356,184.303785
2023-05-04,183.149994,186.713504,158.463409,187.138675,158.029526,187.151611,152.813074,158.703219,157.266920,186.933848
2023-05-05,185.399994,182.397107,154.954224,182.555183,155.686493,182.693787,152.813074,155.947727,156.484744,182.580838


TEL.OL


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-08-11,138.361984,137.873184,138.050247,138.468653,138.390823,136.622070,137.706387,138.552091,138.005714,137.900198
2021-08-12,139.742477,137.480174,138.094788,138.047500,137.512527,136.486725,137.706387,138.297336,137.649448,137.488337
2021-08-13,140.321396,138.849854,138.629150,139.609270,139.386292,137.830566,138.433800,139.567254,138.433864,138.879376
2021-08-16,140.232315,138.863133,138.629150,139.534716,139.125671,137.798981,138.433800,139.419476,138.536288,138.883012
2021-08-17,141.033920,139.633103,140.291122,139.616004,140.355896,139.556335,139.412972,140.723853,140.945033,139.642126
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,131.149994,131.680164,133.152939,131.146849,132.885223,132.582581,131.291136,132.142713,131.211269,131.697763
2023-05-03,130.949997,131.197236,133.152939,130.986670,131.670578,131.756424,131.291136,131.688021,131.555347,131.210266
2023-05-04,128.399994,132.244930,133.152939,132.382524,132.080566,132.598557,131.291136,132.316567,132.691180,132.262270
2023-05-05,128.000000,129.618837,128.982056,130.018589,127.983070,130.509949,131.270207,130.240275,129.723573,129.617354


NHY.OL


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-08-11,57.671207,56.449810,55.268242,57.274044,54.604717,56.003208,54.251952,55.566545,55.258783,56.550199
2021-08-12,58.352367,56.561751,55.268242,57.322797,55.257801,56.225105,54.251952,55.642682,55.258783,56.675306
2021-08-13,59.128128,56.886879,55.268242,57.637277,55.271454,56.223598,54.251952,56.121442,55.149040,57.005703
2021-08-16,57.936108,57.886797,55.268242,58.536764,55.137451,57.261105,54.251952,55.551077,55.258783,58.040326
2021-08-17,58.030708,57.300024,54.662773,57.768235,54.932076,56.984867,54.251952,54.696941,54.944693,57.435980
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,76.559998,77.581132,54.662773,78.530051,54.941650,77.121475,54.186403,54.222608,54.946585,78.427053
2023-05-03,76.720001,77.153240,54.662773,78.156330,54.089603,77.070404,54.186403,53.501294,54.946585,77.986674
2023-05-04,74.940002,78.202233,54.662773,79.223090,54.486225,77.861671,54.186403,53.935568,55.048759,79.076187
2023-05-05,73.160004,75.755017,54.662773,76.913445,53.965084,75.872658,54.186403,53.154433,54.946585,76.552769


AKRBP.OL


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-08-11,210.668625,216.053991,213.334213,213.054088,213.633575,214.703751,212.458634,213.751332,214.059868,215.460706
2021-08-12,212.356827,210.394461,212.139908,208.182312,211.639908,206.720642,212.458634,209.891843,212.488251,210.110607
2021-08-13,210.846359,213.956727,213.334213,211.566937,213.321884,211.526886,212.458634,212.125203,215.065237,213.592396
2021-08-16,206.137161,211.226714,212.139908,208.648884,211.942749,209.359222,212.362988,210.493391,212.385635,210.857300
2021-08-17,208.713882,207.504736,205.927261,204.968069,211.425201,206.220093,208.701904,208.261489,205.404619,207.204898
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,234.464951,249.296814,233.926163,248.566913,232.453751,255.086197,241.195852,239.557686,237.979761,248.472016
2023-05-03,236.199997,230.805601,229.094803,229.978605,221.337692,235.399292,240.171487,225.867751,229.406078,230.436299
2023-05-04,235.800003,240.521664,233.926163,239.965835,223.259186,247.002808,240.171487,232.637075,231.946730,239.850520
2023-05-05,241.300003,238.451204,233.926163,237.786943,223.651123,244.567963,240.171487,231.157178,232.641533,237.861789


YAR.OL


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-08-11,389.759003,395.647485,388.977570,403.094027,380.417389,396.017090,392.206429,391.141631,388.508710,395.923667
2021-08-12,389.498505,390.107836,390.193115,397.432585,377.838593,390.496613,392.206429,388.124353,388.873373,390.415869
2021-08-13,391.756012,387.710175,390.193115,395.460529,377.713898,385.855316,391.920996,389.031594,388.986249,388.054579
2021-08-16,395.923584,392.081937,390.193115,398.098472,390.187653,393.140015,393.752958,391.508683,390.193121,391.800523
2021-08-17,389.064392,395.536572,390.193115,400.601868,392.220947,396.671936,398.462260,392.684391,390.549106,395.202034
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,429.299988,428.215703,408.773712,433.208894,405.930695,425.851868,412.703537,404.623276,411.222217,428.044852
2023-05-03,431.799988,437.849055,410.076111,444.821516,407.346008,435.882355,412.703537,412.457649,418.133514,438.054083
2023-05-04,427.000000,437.423079,410.076111,444.520047,409.977875,435.788055,412.703537,415.624534,417.829626,437.678713
2023-05-05,431.899994,428.441205,403.216919,435.560710,405.546967,428.867371,412.703537,408.100052,410.458154,428.656426


MOWI.OL


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-08-11,215.904449,219.277143,216.575272,217.405019,217.550354,218.676453,214.635613,218.417178,217.868967,219.263354
2021-08-12,216.287766,217.267966,214.121323,215.149052,218.125473,217.685043,214.635613,217.241738,217.916881,217.168788
2021-08-13,217.054398,217.417704,217.246063,215.884530,216.535248,217.933319,214.635613,216.451197,216.517520,217.367880
2021-08-16,216.958542,217.590599,217.246063,216.133840,216.325745,218.288147,214.635613,216.690617,216.564281,217.537270
2021-08-17,216.096130,218.028584,217.246063,215.662954,215.928650,218.767090,214.635613,216.505656,216.790698,217.997600
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,201.600006,201.056443,201.276337,202.359313,200.872910,201.411545,197.779186,203.195044,202.258742,201.190437
2023-05-03,199.800003,200.289406,198.502655,200.513331,202.551651,200.062714,201.090623,201.154306,200.887828,200.225112
2023-05-04,196.500000,199.756678,198.502655,200.534391,201.964355,199.107666,201.090623,201.106923,200.710956,199.724090
2023-05-05,191.350006,197.013639,197.834320,198.453676,195.933411,196.240143,195.172344,197.284154,197.477560,197.096290


CL=F


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-08-13,68.440002,68.165812,66.820000,67.996134,68.740768,69.134277,69.023731,68.495539,67.295000,68.145306
2021-08-16,67.290001,67.899155,66.480003,67.649485,68.562996,68.643890,69.023731,67.955054,67.410001,67.893440
2021-08-17,66.589996,67.886667,66.430000,67.473132,67.772232,69.689308,69.023731,67.325084,67.495000,67.806822
2021-08-18,65.459999,67.789805,66.430000,67.460303,68.158363,69.411011,67.947079,67.857614,67.639999,67.733627
2021-08-19,63.689999,66.443057,66.809998,66.365724,66.044060,67.855118,67.138213,66.453508,66.535000,66.371286
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,71.660004,74.620481,74.209999,75.374098,75.419594,75.149574,73.729856,74.381626,74.882999,74.830649
2023-05-03,68.599998,70.845895,74.580002,71.989872,74.956955,70.775650,73.426117,72.282750,73.865998,71.286449
2023-05-04,68.559998,69.850691,71.650002,71.508773,70.132805,69.676292,72.277657,70.915482,69.914999,69.955306
2023-05-05,71.339996,70.923995,71.650002,72.545273,71.154556,71.036789,70.900383,71.421057,70.345000,71.099960


OSEBX.OL


Unnamed: 0_level_0,Adj Close,LR Prediction,DTR Prediction,MLP Prediction,XGBoost Prediction,XGBoost_LR Prediction,ADA Prediction,GBR Prediction,Bagging Prediction,StackedRegressor Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-10-22,1217.709961,1211.440835,1217.469971,1198.608059,1207.861328,1186.326538,1189.720101,1202.033309,1206.851978,1211.937267
2021-10-25,1215.099976,1219.210989,1217.469971,1206.956815,1209.122437,1194.847900,1189.720101,1203.789945,1207.803979,1219.897165
2021-10-26,1215.739990,1215.234722,1217.469971,1203.440291,1207.066284,1190.081909,1189.720101,1203.063291,1206.851978,1215.823763
2021-10-27,1211.869995,1218.774696,1217.469971,1208.287866,1207.745117,1195.960449,1189.720101,1203.063291,1207.803979,1219.428946
2021-10-28,1202.089966,1213.151088,1217.469971,1205.008373,1208.679077,1191.535522,1189.720101,1202.033309,1206.851978,1213.767367
...,...,...,...,...,...,...,...,...,...,...
2023-05-02,1208.410034,1221.809359,1217.469971,1221.840198,1211.428711,1212.672485,1189.720101,1208.909152,1207.803979,1222.779471
2023-05-03,1203.479980,1199.928514,1217.469971,1200.306298,1199.211182,1187.240234,1189.720101,1193.040876,1207.096985,1200.858668
2023-05-04,1189.719971,1209.028723,1217.469971,1213.750717,1204.815674,1195.505249,1189.720101,1198.345987,1205.402979,1210.470494
2023-05-05,1204.540039,1197.968307,1217.469971,1204.103023,1195.100830,1182.949463,1189.720101,1185.367440,1203.974976,1198.773298


In [None]:
metrics_df_output = {}
def print_metrics(save_data_to_df=True):
    for ticker, data in stock_data.items():
        print("\n--------", ticker, "--------")

        # prepare dataframe if requested
        if save_data_to_df:
            metrics_df = pd.DataFrame(index=list(pretty_metric_names.values()))
            metrics_df_output[ticker] = metrics_df
            current_metrics_df = metrics_df

        for model_name in models:        
            cv = cv_results[ticker+"_model_"+model_name]

            print("-", model_name, "-")
            print("-", "Training Scores:", "-")
            split1_errors_string = f"Score for first data split \n"
            split3_errors_string = f"Score for third data split \n"
            split5_errors_string = f"Score for last data split \n"

            for error_metric_name in metric_names:
                # some metrics are saved as negative, so change sign
                if error_metric_name.startswith("neg"):
                    try:
                        error_metric_value = -cv["train"+"_"+error_metric_name]
                    # string being passed that can't be negative
                    except:
                        error_metric_value = cv["train"+"_"+error_metric_name]
                else:
                    error_metric_value = cv["train"+"_"+error_metric_name]

                if save_data_to_df:
                    # pass string, used to indicate missing metrics
                    if isinstance(error_metric_value, str):
                        current_metrics_df.loc[pretty_metric_names[error_metric_name], model_name+" Model "+"Train"] = error_metric_value
                    # otherwise mean of all splits
                    else:
                        current_metrics_df.loc[pretty_metric_names[error_metric_name], model_name+" Model "+"Train"] = np.mean(error_metric_value)
        
                if isinstance(error_metric_value, str):
                    split1_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value}\n"
                else:
                    split1_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value[0]:.3f}\n"

                if isinstance(error_metric_value, str):
                    split3_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value}\n"
                else:
                    split3_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value[2]:.3f}\n"
    
                if isinstance(error_metric_value, str):
                    split5_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value}\n"
                else:
                    split5_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value[-1]:.3f}\n"

            # print the two strings
            print(split1_errors_string+"\n"+split3_errors_string+"\n"+split5_errors_string+"\n")
            
            print("-", "Testing Scores:", "-")
            split1_errors_string = f"Score for first data split \n"
            split3_errors_string = f"Score for third data split \n"
            split5_errors_string = f"Score for last data split \n"

            for error_metric_name in metric_names:
                # some metrics are saved as negative, so change sign
                if error_metric_name.startswith("neg"):
                    try:
                        error_metric_value = -cv["test"+"_"+error_metric_name]
                    # string being passed that can't be negative
                    except:
                        error_metric_value = cv["test"+"_"+error_metric_name]
                else:
                    error_metric_value = cv["test"+"_"+error_metric_name]

                if save_data_to_df:
                    # pass string, used to indicate missing metrics
                    if isinstance(error_metric_value, str):
                        current_metrics_df.loc[pretty_metric_names[error_metric_name], model_name+" Model "+"Test"] = error_metric_value
                    # otherwise mean of all splits
                    else:
                        current_metrics_df.loc[pretty_metric_names[error_metric_name], model_name+" Model "+"Test"] = np.mean(error_metric_value)
        
                if isinstance(error_metric_value, str):
                    split1_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value}\n"
                else:
                    split1_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value[0]:.3f}\n"

                if isinstance(error_metric_value, str):
                    split3_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value}\n"
                else:
                    split3_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value[2]:.3f}\n"

                if isinstance(error_metric_value, str):
                    split5_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value}\n"
                else:
                    split5_errors_string += f"{pretty_metric_names[error_metric_name]} {error_metric_value[-1]:.3f}\n"

            # print the two strings
            print(split1_errors_string+"\n"+split3_errors_string+"\n"+split5_errors_string+"\n")

    if save_data_to_df:
        return metrics_df_output
    else:
        return None

In [None]:
metrics_output = print_metrics(save_data_to_df=True)

# Plots for insights about the data

In [None]:
# PLOT FOR SHOWING DIFFERENT DATA SPLITS FOR THE DIFFERENT STOCKS
for ticker in stocks_list:
    fig, sub_plots = plt.subplots(n_splits, figsize=(16,20))
    fig.set_tight_layout(True)

    data = stock_data[ticker]
    idx = data.index

    tscv = TimeSeriesSplit(n_splits=n_splits)

    splits = list(tscv.split(data))
        
    current_split = 1
    for i in range(len(sub_plots)):
        train_index, test_index = splits[i]

        sub_plots[i].plot(idx[train_index], data.loc[idx[train_index], main_col], label=f"Training data {current_split}", color="blue")
        sub_plots[i].plot(idx[test_index], data.loc[idx[test_index], main_col], label=f"Test data {current_split}", color="red")
        sub_plots[i].set_xlim(idx[0], idx[-1])
        sub_plots[i].set_title(f"Train / test split {current_split} for {ticker}")
        sub_plots[i].set_xlabel("Date")
        sub_plots[i].set_ylabel(f"{main_col}")
        sub_plots[i].legend()

        current_split = current_split + 1
    
    #fig.savefig("data_splits_plots/"+ticker+".png")

fig.show()

In [None]:
# PLOT FOR COMPARING ACTUAL TO THE DIFFERENT MODELS PREDICTED VALUES
for ticker, data in stock_data.items():
    figure, axs = plt.subplots(figsize=(32,16))
    
    X, y = create_X_y_arrays(data=data, label_name="Label")
    X_train, X_test, y_train, y_test = create_X_y_train_test_split(X=X, y=y, current_stock=ticker)

    X_test_index = np.arange(X_train.shape[0], X_train.shape[0]+X_test.shape[0])

    print(X_test_index)

    plt.plot(data.index[X_test_index], y_test, color='purple', label='Actual', linewidth="4.0")
    for model in models:
        plt.plot(data.index[X_test_index], stock_predictions[ticker][model+" Prediction"], label=model)
    plt.title(f'Actual vs Predicted, {ticker}')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()

    #plt.savefig("actual_predicted_plots/"+ticker+".png")
    plt.show()

In [None]:
# CORRELATION PLOT BETWEEN THE DIFFERENT STOCKS
adj_close_prices = pd.DataFrame({i: j[main_col] for i, j in stock_data.items()})

corr = adj_close_prices.corr()

sns.heatmap(corr, cmap="coolwarm", annot=True)

plt.title("Correlation between stocks")

#plt.savefig("correlation_plots/correlation_stocks.png")
plt.show()

# Saving the data

In [37]:
for ticker in stocks_list:
    print(ticker)
    display(metrics_output[ticker])
    #metrics_output[ticker].to_csv("saved_metrics/stock_"+ticker+".csv")
    #stock_data[ticker].to_pickle("saved_data_pickle/stock_"+ticker+".pkl")
    predicted_stock_data[ticker].to_csv("saved_predictions/_"+ticker+".csv")

EQNR.OL


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.996985,0.990769,1.0,-0.21438,0.991094,0.96479,0.999925,-0.266899,0.967126,0.734653,0.990928,-0.270874,0.998195,-0.232131,0.999012,-0.268687,0.996932,0.987479
MAE:,0.743181,1.633022,0.0,26.163471,1.231936,2.508528,0.127656,26.172964,2.322553,7.160678,1.446361,26.416699,0.615402,25.669145,0.376656,26.417137,0.744425,2.332848
MSE:,1.062961,5.990698,0.0,2973.662219,2.620583,12.762969,0.046184,3030.749249,8.299787,81.274224,3.599389,3000.76623,0.724019,2921.262496,0.333223,3002.072999,1.053638,16.429672
RMSE:,1.001151,2.077144,0.0,30.677009,1.609779,3.207665,0.177935,30.737324,2.845208,8.198133,1.813293,31.063832,0.797773,30.337502,0.561429,31.05513,1.000669,2.880701
MAPE:,0.006715,0.009519,0.0,0.099931,0.011304,0.016034,0.001123,0.097844,0.021952,0.0444,0.013135,0.100877,0.005528,0.096896,0.00343,0.102535,0.006736,0.011704


DNB.OL


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.998002,0.983546,1.0,-0.856536,0.992907,0.945687,0.999953,-0.871664,0.980911,0.866086,0.993257,-1.434293,0.998953,-0.786255,0.999328,-0.802833,0.998035,0.983394
MAE:,0.509645,0.868352,0.0,9.359718,0.816137,1.508948,0.090386,9.347225,1.391779,3.066436,1.062995,10.489471,0.408647,9.017193,0.259506,9.089574,0.503361,0.864117
MSE:,0.493871,1.428907,0.0,174.136664,1.140976,5.585848,0.021377,176.014437,3.226879,15.043678,1.970143,219.629367,0.319329,167.390655,0.175546,168.10333,0.479535,1.435764
RMSE:,0.686201,1.133516,0.0,12.278676,1.065704,2.137921,0.124787,12.418069,1.791846,3.646611,1.334923,13.430718,0.535573,12.013536,0.403432,12.067085,0.677175,1.136575
MAPE:,0.007209,0.008043,0.0,0.073903,0.012102,0.01603,0.001229,0.073846,0.022169,0.030361,0.015285,0.080857,0.005683,0.070377,0.003664,0.071104,0.007145,0.008027


TEL.OL


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.997267,0.983683,1.0,0.472683,0.99004,0.965697,0.999944,0.512742,0.964627,0.766699,0.99219,0.469688,0.998584,0.557869,0.999168,0.530449,0.997098,0.983096
MAE:,0.566236,0.733967,0.0,4.106922,0.91154,1.190547,0.090641,3.757261,1.994093,3.218683,1.054456,4.1864,0.448504,3.595307,0.278473,3.760937,0.577446,0.754117
MSE:,0.580806,0.938487,0.0,43.533395,1.464095,2.442197,0.021967,43.290654,5.764096,17.742748,1.849286,47.275213,0.375936,40.035146,0.176833,41.980213,0.600166,0.982848
RMSE:,0.754305,0.959887,0.0,5.762163,1.201603,1.519934,0.125751,5.551631,2.398316,3.913891,1.330742,5.879049,0.590369,5.234063,0.416253,5.356821,0.7681,0.982606
MAPE:,0.006258,0.006577,0.0,0.036199,0.010326,0.010977,0.000974,0.032988,0.023752,0.028825,0.011745,0.036689,0.00488,0.031563,0.003075,0.033224,0.006391,0.006777


NHY.OL


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.997467,0.987415,1.0,0.001633,0.979465,0.958291,0.999942,0.032483,0.980248,0.832984,0.992308,-0.01822,0.998676,0.015144,0.99917,0.032348,0.997556,0.98741
MAE:,0.216049,0.50299,0.0,4.715225,0.369692,0.719381,0.037713,4.499134,0.434566,1.332363,0.431083,4.682816,0.176837,4.525284,0.107361,4.606038,0.215456,0.523171
MSE:,0.092438,0.5059,0.0,65.447635,0.24443,1.026624,0.004083,65.196745,0.2883,3.052542,0.321495,67.689685,0.062692,65.745855,0.027983,63.643742,0.092318,0.553801
RMSE:,0.292398,0.64969,0.0,6.082134,0.482135,0.919295,0.05292,5.811735,0.534114,1.554044,0.536984,5.988071,0.233045,5.758971,0.162365,5.952242,0.291366,0.687865
MAPE:,0.008281,0.013185,0.0,0.092884,0.015402,0.02063,0.001449,0.083906,0.018296,0.039226,0.017508,0.08925,0.006765,0.085031,0.004126,0.08976,0.008246,0.013527


AKRBP.OL


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.997471,0.987651,1.0,-1.173344,0.989144,0.958319,0.999974,-1.179201,0.979571,0.878016,0.992993,-1.240224,0.998899,-1.119444,0.999251,-1.321336,0.997697,0.974024
MAE:,0.792449,2.143667,0.0,28.506369,1.112373,3.211854,0.144429,28.828296,1.639652,6.070729,1.999527,28.372468,0.626558,27.788454,0.376157,29.477289,0.769233,2.798304
MSE:,1.749204,10.004887,0.0,1591.433511,3.06793,19.279097,0.066856,1657.88316,5.811785,69.131383,9.353629,1573.255916,1.038882,1527.413469,0.510305,1699.09977,1.63061,15.179106
RMSE:,1.16519,2.805306,0.0,33.92759,1.633364,4.191481,0.200391,33.816781,2.256487,7.231358,2.551002,33.831904,0.87673,32.936472,0.632009,34.784198,1.126615,3.593715
MAPE:,0.011515,0.014702,0.0,0.19552,0.016497,0.026847,0.002136,0.189457,0.027964,0.050698,0.03444,0.195748,0.008951,0.18707,0.005388,0.194583,0.011147,0.022037


YAR.OL


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.995745,0.98301,1.0,-0.016135,0.978203,0.960703,0.999909,-0.007071,0.961541,0.655861,0.990474,-0.031936,0.997809,0.07079,0.99873,0.045047,0.995412,0.977786
MAE:,1.563545,2.820176,0.0,20.143209,2.64801,4.275038,0.27815,19.340437,4.102428,11.25237,2.71247,19.83943,1.254311,18.231405,0.766314,18.561577,1.588073,3.184701
MSE:,4.346831,14.05796,0.0,1090.942853,11.492002,31.804501,0.205592,1068.602849,26.861801,292.556698,12.519057,1047.805089,2.898339,958.608659,1.263751,988.295989,4.474987,18.346529
RMSE:,2.038675,3.610949,0.0,27.888699,3.373558,5.380367,0.385528,27.206476,5.145519,13.377806,3.390294,27.326063,1.623027,25.618995,1.102195,25.837961,2.072681,4.026311
MAPE:,0.007311,0.009782,0.0,0.066625,0.012813,0.014967,0.001285,0.062821,0.020639,0.041348,0.013115,0.064975,0.005838,0.059766,0.00358,0.061318,0.007434,0.011185


MOWI.OL


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.998952,0.990506,1.0,-1.839472,0.988862,0.968156,0.999981,-1.231077,0.992089,0.92296,0.995671,-1.336703,0.999517,-1.140787,0.999649,-1.144605,0.998741,0.989415
MAE:,0.638998,1.288049,0.0,16.824719,1.133521,2.279128,0.119343,15.946646,1.399639,3.246322,1.66967,16.328259,0.51002,15.211037,0.306631,15.224386,0.654097,1.333954
MSE:,0.895393,3.204626,0.0,544.75093,2.635649,9.303142,0.042872,517.692688,3.412336,17.106117,5.771571,534.368429,0.585606,482.974193,0.245824,492.101659,0.916591,3.349062
RMSE:,0.881441,1.688602,0.0,19.943286,1.536275,2.89853,0.16568,19.220786,1.789781,3.981474,2.114815,19.584958,0.686433,18.476414,0.469815,18.482437,0.901027,1.750849
MAPE:,0.008083,0.008576,0.0,0.123415,0.015487,0.01704,0.001468,0.11348,0.021246,0.026051,0.025576,0.117837,0.006153,0.110186,0.003812,0.110084,0.008363,0.009041


CL=F


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.996575,0.987903,1.0,0.129621,0.982882,0.979212,0.999959,0.280338,0.96319,0.462852,0.991886,0.180349,0.998569,0.233707,0.998896,0.192584,0.996882,0.986949
MAE:,0.575141,0.856079,0.0,8.46447,0.840873,1.237754,0.089111,7.554537,1.615894,4.674823,1.060319,8.38088,0.448366,7.717834,0.285155,8.076862,0.54576,0.960926
MSE:,0.545862,1.55469,0.0,297.042538,1.221928,3.165144,0.019427,260.902947,4.370454,47.12439,1.725674,283.861814,0.32666,280.74572,0.158959,284.523953,0.49224,1.966455
RMSE:,0.738008,1.153589,0.0,10.027757,1.056359,1.58025,0.122525,8.922371,2.052797,5.862376,1.302739,9.903212,0.563966,9.143775,0.39835,9.549923,0.700607,1.260132
MAPE:,0.009054,0.015343,0.0,0.18494,0.012319,0.02196,0.001464,0.167498,0.02641,0.099569,0.016861,0.181413,0.007076,0.171894,0.004492,0.177457,0.008556,0.017739


OSEBX.OL


Unnamed: 0,LR Model Train,LR Model Test,DTR Model Train,DTR Model Test,MLP Model Train,MLP Model Test,XGBoost Model Train,XGBoost Model Test,XGBoost_LR Model Train,XGBoost_LR Model Test,ADA Model Train,ADA Model Test,GBR Model Train,GBR Model Test,Bagging Model Train,Bagging Model Test,StackedRegressor Model Train,StackedRegressor Model Test
R^2:,0.994848,0.979712,1.0,0.125176,0.820841,0.776224,0.999962,0.156037,0.911728,0.323643,0.988226,-0.250857,0.997741,0.129419,0.998428,0.167747,0.995239,0.979147
MAE:,3.163232,4.788909,0.0,44.864994,10.663121,14.103808,0.439403,43.684256,14.501346,41.679637,6.173987,53.017135,2.454449,43.827614,1.562857,43.333654,3.092078,4.799669
MSE:,17.624453,43.234834,0.0,5719.018999,240.674259,396.297767,0.548877,5953.288481,321.697051,2531.94864,64.197117,6927.159022,10.599083,5850.139278,5.710568,5591.893622,17.114569,44.461639
RMSE:,4.162509,6.329614,0.0,59.851689,12.715524,17.029071,0.607344,59.713119,17.702334,45.634958,7.667849,68.623445,3.145412,59.989699,2.36419,58.848024,4.088779,6.426422
MAPE:,0.004804,0.005503,0.0,0.047953,0.017121,0.019347,0.000632,0.046523,0.022299,0.047157,0.009242,0.055653,0.003675,0.046529,0.002368,0.046238,0.004689,0.005541
