In [33]:
from Models import ML_Models

In [36]:
import yfinance as yf
import pandas as pd
import inspect
import datetime as dt
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [18]:
start_date = "2012-01-01"
end_date = dt.date.today()
main_col = "Adj Close"
interval = "1d"
stocks_list = ["EQNR.OL", "DNB.OL", "TEL.OL", "NHY.OL"]#, "AKRBP.OL", "YAR.OL", "MOWI.OL", "CL=F", "OSEBX.OL"]
indicators = ["MA5", "MA20", "MA50", "MA200"]#, "MIN", "MAX", "LOG_RET", "MOM", "VOLA", "DIFF"]
models = ["Linear", "DTR", "MLP"]
metric_names = ["r2", "neg_mean_absolute_error", "neg_root_mean_squared_error", "neg_mean_absolute_percentage_error"]

In [19]:
exam_models = ML_Models()

# Collecting data from Yahoo Finance

In [20]:
stock_data = {}
for ticker in stocks_list:
    print(f"Downloading {ticker} data")
    # fetch stock data from yahoo finance
    raw_data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
    stock_data[ticker] = raw_data

# Save fetched data to csv
#for ticker in stocks_list:
#    stock_data[ticker].to_csv("Saved_Data/rawdata_"+ticker+".csv")

Downloading EQNR.OL data
[*********************100%***********************]  1 of 1 completed
Downloading DNB.OL data
[*********************100%***********************]  1 of 1 completed
Downloading TEL.OL data
[*********************100%***********************]  1 of 1 completed
Downloading NHY.OL data
[*********************100%***********************]  1 of 1 completed


# Preproccesing the data

In [21]:
def add_indicator_columns(data, indicators):
        # Creating label and shifting the selected main_feature value by 1.
        label_name = "Label"
        label_name = label_name
        data[label_name] = data[main_col].shift(periods=1)

        # Checking which of the different indicators that should be added as a column (based on input from indicators list).
        if "MA5" in indicators:
            data["MA5"] = data[label_name].rolling(5).mean()
        if "MA20" in indicators:
            data["MA20"] = data[label_name].rolling(20).mean()
        if "MA50" in indicators:
            data["MA50"] = data[label_name].rolling(50).mean()
        if "MA200" in indicators:
            data["MA200"] = data[label_name].rolling(200).mean()
        if "MIN" in indicators:
            data["MIN"] = data[label_name].rolling(20).min()
        if "MAX" in indicators:
            data["MAX"] = data[label_name].rolling(20).max()
        log_ret = np.log(data[label_name] / data[label_name].shift(1))
        if "LOG_RET" in indicators:
            data["LOG_RET"] = log_ret
        if "MOM" in indicators:
            data["MOM"] = log_ret.rolling(20).mean()
        if "VOLA" in indicators:
            data["VOLA"] = log_ret.rolling(20).std()
        if "DIFF" in indicators:
            data["DIFF"] = data[label_name] - data[label_name].shift(1)

        # remove empty vals.
        data.dropna(axis=0, inplace=True)

In [22]:
def create_X_y_arrays(data, label_name):
        # array that contains the indicators data.
        X = data.loc[:, indicators].to_numpy()
        # array with the target data (based on main_feature).
        y = data[label_name].to_numpy()
        return X, y

In [23]:
n_splits = 5
tscv = TimeSeriesSplit(n_splits=n_splits)

def create_X_y_train_test_split(X, y, current_stock):
        data = stock_data[current_stock]

        for train_index, test_index in tscv.split(data):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

        return X_train, X_test, y_train, y_test

In [24]:
for ticker, data in stock_data.items():
    add_indicator_columns(data=data, indicators=indicators)
    X, y = create_X_y_arrays(data=data, label_name="Label")
    X_train, X_test, y_train, y_test = create_X_y_train_test_split(X=X, y=y, current_stock=ticker)


In [25]:
#display(stock_data)

# Training models

In [26]:
# Helper function that choose model from models class
def pick_model(model):
    if model == "Linear":
        return exam_models.model_Linear()
    elif model == "DTR":
        return exam_models.model_DTR()
    elif model == "MLP":
        return exam_models.model_MLP()

In [27]:
cv_results = {}
trained_models = {}

def train_models(input_models):
        for ticker, data in stock_data.items():

            X, y = create_X_y_arrays(data=data, label_name="Label")

            X_train, X_test, y_train, y_test = create_X_y_train_test_split(X=X, y=y, current_stock=ticker)

            # Evaluating and training selected models.
            for model_i in input_models:
                model = pick_model(model=model_i)
                metric_dict = {}
                for metric_name in metric_names:
                    metric_dict[metric_name] = metric_name
                    # using method from sci-kit lib to cross-validate
                    cross_val_results = cross_validate(
                        model,
                        X,
                        y,
                        cv=tscv,
                        scoring=metric_dict,
                        return_train_score=True,
                        n_jobs=-1,
                        verbose=0  
                    )
                    model.fit(X_train, y_train)
                cv_results[ticker+"_model_"+model_i] = cross_val_results 
                trained_models["trained_model_"+model_i+"_"+ticker] = model

        return cv_results

In [28]:
cv_stocks_models = train_models(input_models=models)

In [29]:
#display(cv_stocks_models)
display(trained_models)

{'trained_model_Linear_EQNR.OL': LinearRegression(),
 'trained_model_DTR_EQNR.OL': DecisionTreeRegressor(criterion='friedman_mse', min_samples_leaf=5),
 'trained_model_MLP_EQNR.OL': MLPRegressor(max_iter=500, random_state=1),
 'trained_model_Linear_DNB.OL': LinearRegression(),
 'trained_model_DTR_DNB.OL': DecisionTreeRegressor(criterion='friedman_mse', min_samples_leaf=5),
 'trained_model_MLP_DNB.OL': MLPRegressor(max_iter=500, random_state=1),
 'trained_model_Linear_TEL.OL': LinearRegression(),
 'trained_model_DTR_TEL.OL': DecisionTreeRegressor(criterion='friedman_mse', min_samples_leaf=5),
 'trained_model_MLP_TEL.OL': MLPRegressor(max_iter=500, random_state=1),
 'trained_model_Linear_NHY.OL': LinearRegression(),
 'trained_model_DTR_NHY.OL': DecisionTreeRegressor(criterion='friedman_mse', min_samples_leaf=5),
 'trained_model_MLP_NHY.OL': MLPRegressor(max_iter=500, random_state=1)}

# Predicting and evaluating values based on trained models

In [30]:
stock_predictions = {}
def predict_and_evaluate_trained_models(input_models):
    for ticker, data in stock_data.items():

        # Creating X and y arrays for train and test sets.
        X, y = create_X_y_arrays(data=data, label_name= "Label")

        X_train, X_test, y_train, y_test = create_X_y_train_test_split(X=X, y=y, current_stock=ticker)

        last_train_index, last_test_index = None, None

        for train_index, test_index in tscv.split(data):
            last_train_index, last_test_index = train_index, test_index

        prediction = data.loc[data.index[last_test_index], [main_col]].copy(deep=True)
        stock_predictions[ticker] = prediction

        for model_i in input_models:
            model = trained_models["trained_model_"+model_i+"_"+ticker]
            y_pred = model.predict(X_test)
            prediction.loc[:, model_i+" Prediction"] = y_pred
            mse = mean_squared_error(y_test, y_pred)
            print(f"--------{ticker} {model_i}--------")
            print(f"Mean squared error for: ",mse)
            r2 = r2_score(y_test, y_pred)
            print(f"R-squared score for: ", r2)
            mae = mean_absolute_error(y_test, y_pred)
            print(f"Mean absolute error: ", mae)

    return stock_predictions

In [31]:
predicted_stock_data = predict_and_evaluate_trained_models(input_models=models)

--------EQNR.OL Linear--------
Mean squared error for:  45.12388528167932
R-squared score for:  0.9869846078583563
Mean absolute error:  5.199018305983749
--------EQNR.OL DTR--------
Mean squared error for:  14852.86444036222
R-squared score for:  -3.284113699679094
Mean absolute error:  107.30717027641735
--------EQNR.OL MLP--------
Mean squared error for:  53.13509532697631
R-squared score for:  0.9846738795241773
Mean absolute error:  5.562724241802289
--------DNB.OL Linear--------
Mean squared error for:  6.510242176398309
R-squared score for:  0.9071688451899822
Mean absolute error:  1.9461447622548322
--------DNB.OL DTR--------
Mean squared error for:  368.1044720213788
R-squared score for:  -4.248892791171307
Mean absolute error:  17.324426934026903
--------DNB.OL MLP--------
Mean squared error for:  7.299168852333417
R-squared score for:  0.8959193444182609
Mean absolute error:  2.037388315049247
--------TEL.OL Linear--------
Mean squared error for:  2.284998483534875
R-squared

In [32]:
for ticker in stocks_list:
    display(predicted_stock_data[ticker])

Unnamed: 0_level_0,Adj Close,Linear Prediction,DTR Prediction,MLP Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-08-09,173.749207,171.673255,170.295610,171.356427
2021-08-10,174.115448,172.485196,172.761688,172.080679
2021-08-11,177.834869,173.435193,170.942284,172.951312
2021-08-12,180.592300,175.060614,170.942284,174.415759
2021-08-13,181.174835,177.182880,176.028475,176.414501
...,...,...,...,...
2023-04-27,297.250000,302.110461,189.685941,304.289768
2023-04-28,305.000000,301.083183,189.685941,303.650337
2023-05-02,297.100006,301.481060,189.685941,304.207576
2023-05-03,293.399994,299.467854,189.685941,302.595009


Unnamed: 0_level_0,Adj Close,Linear Prediction,DTR Prediction,MLP Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-06-22,160.323730,158.407342,158.209743,158.522835
2021-06-23,165.397263,158.708993,158.209743,158.818027
2021-06-24,165.566376,160.506737,158.209743,160.493120
2021-06-25,167.595779,161.975570,158.209743,161.861803
2021-06-28,163.832916,164.341682,158.209743,164.018377
...,...,...,...,...
2023-04-27,184.100006,185.316047,158.209743,184.987886
2023-04-28,187.199997,184.206112,158.209743,184.108571
2023-05-02,187.000000,184.311770,158.209743,184.349010
2023-05-03,187.750000,184.025552,158.209743,184.216548


Unnamed: 0_level_0,Adj Close,Linear Prediction,DTR Prediction,MLP Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-08-09,138.050247,138.031895,138.776857,137.402065
2021-08-10,138.540100,138.082038,138.776857,137.493009
2021-08-11,138.361984,138.165635,138.776857,137.620337
2021-08-12,139.742477,138.180224,138.776857,137.706395
2021-08-13,140.321396,138.385848,138.776857,137.944717
...,...,...,...,...
2023-04-27,134.199997,130.348199,130.791400,129.491796
2023-04-28,132.949997,131.331017,131.780276,130.401464
2023-05-02,131.149994,132.425302,133.512674,131.399528
2023-05-03,130.949997,132.697473,133.512674,131.724433


Unnamed: 0_level_0,Adj Close,Linear Prediction,DTR Prediction,MLP Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-08-09,54.662773,56.051552,55.623537,55.968070
2021-08-10,56.384583,55.637979,55.623537,55.640400
2021-08-11,57.671207,55.624090,55.623537,55.659353
2021-08-12,58.352367,55.872325,55.623537,55.904983
2021-08-13,59.128128,56.699481,55.623537,56.623605
...,...,...,...,...
2023-04-27,80.519997,80.234851,55.623537,80.079203
2023-04-28,78.220001,79.829266,55.623537,79.836103
2023-05-02,76.559998,79.349533,55.623537,79.475615
2023-05-03,76.720001,78.296494,55.623537,78.612526
