# TRANSFORMER CODE

Place template in data directory <br>
One instance for each stock in cyclic portfolio with identified dictionaries for models

In [None]:
import os, pickle
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')

In [None]:
ticker = 'ms' # ticker name -> directory for data
wkdir = f'drive/MyDrive/Colab/FYP/{ticker}/data'

In [None]:
import pandas as pd
ftraindf = pd.read_csv(f'{wkdir}/ftraindf.csv', index_col = 'Date')
fvaldf = pd.read_csv(f'{wkdir}/fvaldf.csv', index_col = 'Date')
ftestdf = pd.read_csv(f'{wkdir}/ftestdf.csv', index_col = 'Date')

with open(f'{wkdir}/cluster_details.pkl', 'rb') as handle:
    cluster_details = pickle.load(handle)


Config

In [None]:

from datetime import date

ZERO_REPLACEMENT = 0.0001
PERIOD = 14
PRED_PERIOD = 13
TEST_START = date(2019, 1, 1)
TEST_END = date(2024, 1, 1)
VAL_START = date(2018, 1, 1)


INITIALIZATION_MIN = 5

CLUSTER_MAXCLUSTERS = 15
CLUSTER_MINCLUSTERS = 15
FACTOR = 2

MERGED_MIN_STD = 2
MERGED_MIN = 4
MERGED_MAX = 8

MERGED_YMAX = 8
MERGED_MINY_STD = 2


MAX_LAYERS = 4

Neural

In [None]:

import torch, gc
from tqdm.auto import tqdm
import os, sys
import pandas as pd
import torch, pickle
import gc

# from tqdm import tqdm
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error
from tqdm.auto import tqdm

def monoNeuralPipeline(ftraindf, fvaldf, ftestdf, model_dict, cluster_details, plus_target, chosen_features = None, mode = 1):

    header = f'y_Tp{plus_target}_PriceChg'

    # load into train/val/test sets using the chosen features
    if chosen_features != None: trainset, valset, testset = neuralDataPreparation(ftraindf=ftraindf[chosen_features], fvaldf=fvaldf[chosen_features], ftestdf=ftestdf[chosen_features], plus_target = plus_target)
    else: trainset, valset, testset = neuralDataPreparation(ftraindf=ftraindf, fvaldf=fvaldf, ftestdf=ftestdf, plus_target = plus_target)

    # update model_dict for sizes
    model_dict['input_size'], model_dict['output_size'], model_dict['day_target'] = len(trainset['X'].columns), len(trainset['y'].columns), plus_target

    if model_dict['model_type'] == 'transformer':
        # select nheads for transformer
        nheads = [i for i in range(1, 11) if model_dict['input_size']%i == 0]
        nhead = max(nheads)
        model_dict['transformer']['nhead'] = nhead

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = neuralConstructor(model_dict).to(device)

    else:
        model = neuralConstructor(model_dict)

    outcome = {}

    # train the model
    if model_dict['model_type'] != 'transformer': model, outcome['val_pred'], outcome['test_pred'] = kerasTrain(model, model_dict, cluster_details, trainset, valset, testset, header)
    else: model, outcome['train_pred'], outcome['val_pred'], outcome['test_pred'] = pytorchTrain(model=model, model_dict=model_dict, trainset=trainset, valset=valset, testset=testset)

    # evaluate the model
    eval_res = evaluateModel(outcome=outcome, trainset=trainset, valset=valset, testset=testset, cluster_details=cluster_details, header=header, plus_target=plus_target)

    # update header value for dict
    eval_res['header'] = header

    gc.collect()

    return model, eval_res


def neuralDataPreparation(ftraindf, fvaldf, ftestdf, plus_target, period = PERIOD):

    x_cols = [col for col in ftraindf.columns if 'x_' in col]
    y_cols = [col for col in ftraindf.columns if f'y_Tp{plus_target}_PriceChg_' in col]
    ref_cols = [col for col in ftraindf.columns if f'yref_Tp{plus_target}_Price' in col or f'refPrice_Tm{period}' in col or f'ypcref_Tp{plus_target}_PriceChg' in col or f'yref_Tp{plus_target}_Date' in col or 'yref_Tm0_close' in col]


    trainset = {'X': ftraindf[x_cols], 'y': ftraindf[y_cols], 'ref': ftraindf[ref_cols]}
    valset = {'X': fvaldf[x_cols], 'y': fvaldf[y_cols], 'ref': fvaldf[ref_cols]}
    testset = {'X': ftestdf[x_cols], 'y': ftestdf[y_cols], 'ref': ftestdf[ref_cols]}

    return trainset, valset, testset


def neuralConstructor(model_dict):

    model = None

    if model_dict['model_type'] == 'mlp': model = mlpConstructor(model_dict)
    # elif model_dict['model_type'] == 'rnn': model = rnnConstructor(model_dict)
    elif model_dict['model_type'] == 'transformer': model = Transformer(model_dict)

    return model

def ensemble(val_df, test_df, ref, pred_period = PRED_PERIOD):
    output_results = test_df[[col for col in test_df.columns if 'mdate_ref' in col or 'close' in col]]
    # print(output_results.columns)
    traincols = [col for col in val_df.columns if 'mdate_ref' not in col]
    traindata = val_df[traincols]

    testcols = [col for col in test_df.columns if 'mdate_ref' not in col]
    testdata = test_df[testcols]

    # print(f'traindata: {traincols}')
    # print(f'testdata: {testcols}')

    headers = []
    r2_scores = []
    rmse_scores = []
    mape_scores = []

    for index in range(1, pred_period + 1):

        # train test preparation
        X_train = traindata.iloc[:, index :]

        y_train = val_df.iloc[:, [0]]

        X_test = testdata.iloc[:, index :]
        y_test = testdata.iloc[:, [0]]

        model = LinearRegression()
        model.fit(X_train, y_train)
        pred = model.predict(X_test)

        r2 = r2_score(y_test['close'], pred)
        rmse = mean_squared_error(y_test['close'], pred)**0.5
        mape = mean_absolute_percentage_error(y_test['close'], pred)

        print(f'[TEST OUTCOME_Tp{index}] - R2: {r2}, RMSE: {rmse}, MAPE: {mape}')

        headers.append(f'Tp{index}_pred')
        r2_scores.append(r2)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

        output_results[f'Tp{index}_pred'] = pred

    stats_data = {
        'columns': headers,
        'r2': r2_scores,
        'rmse': rmse_scores,
        'mape': mape_scores
    }
    stats_df = pd.DataFrame(stats_data)


    output = ref

    for pred_day in range(1, pred_period + 1):

        header_ref = f'Tp{pred_day}_'

        instance = output_results[[col for col in output_results.columns if header_ref in col]]
        # print(instance.columns)
        instance = instance.reset_index(drop = False) # pred_date out
        instance = instance.rename(columns = {'pred_date': f'Tp{pred_day}_date_ref', f'Tp{pred_day}_mdate_ref':'Date'})
        instance = instance.set_index('Date')

        output = pd.concat([output, instance], axis = 1)

    output = output.dropna()
    output1_dates = output[[col for col in output.columns if 'mdate' in col]]
    output1_values = output[[col for col in output.columns if 'mdate' not in col]]

    return output_results, output1_values, output1_dates, stats_df


def rangeNeuralPipeline(ticker, ftraindf, fvaldf, ftestdf, cluster_details, model_dict, chosen_features = None, google = 1, pred_period = PRED_PERIOD, n_elements = 6):
    if chosen_features == None:
        if google == 1:
            wkdir = f'drive/MyDrive/Colab/FYP/{ticker}/data'
            with open(f'{wkdir}/mces/features_selected.pkl', 'rb') as handle:
                chosen_features = pickle.load(handle)
        else:
            wkdir = f'data/{ticker}/transformer'
            with open(f'data/{ticker}/mces/features_selected.pkl', 'rb') as handle:
                chosen_features = pickle.load(handle)

    headers_compile = []
    r2_compile = []
    rmse_compile = []
    mape_compile = []

    date_cols = [col for col in ftestdf.columns if '_Date' in col]

    for plus_target in tqdm(range(1, pred_period + 1)):

        # train model, predict & evaluate model performance
        model, eval_res = monoNeuralPipeline(ftraindf=ftraindf, fvaldf=fvaldf, ftestdf=ftestdf, model_dict=model_dict[plus_target], cluster_details=cluster_details, plus_target=plus_target, chosen_features=chosen_features[plus_target])

        # evaluation metrics (for summary)
        iheader = eval_res['header']
        ir2 = eval_res['test_pred']['r2']
        irmse = eval_res['test_pred']['rmse']
        imape = eval_res['test_pred']['mape']

        headers_compile.append(iheader)
        r2_compile.append(ir2)
        rmse_compile.append(irmse)
        mape_compile.append(imape)

        # save model
        torch.save(model.state_dict(), f'{wkdir}/Tp{plus_target}.pt')

        # save prediction dataset
        # obtain the columns for reference
        instance_references = [col for col in date_cols if f'Tp{plus_target}_' in col] + ['yref_Tm0_close']

        # to include predicted dates by models
        date_testref = ftestdf[instance_references]
        date_valref = fvaldf[instance_references]

        # concat prediction results (to obtain date references )
        test_results = pd.concat([eval_res['test_pred']['ref'], date_testref], axis = 1)
        test_results = test_results.rename(columns = {'price_pred' : f'Tp{plus_target}_pred', 'yref_Tm0_close' : 'close', f'yref_Tp{plus_target}_Date': f'Tp{plus_target}_date_ref'})

        val_results = pd.concat([eval_res['val_pred']['ref'], date_valref], axis = 1)
        val_results = val_results.rename(columns = {'price_pred' : f'Tp{plus_target}_pred', 'yref_Tm0_close' : 'close', f'yref_Tp{plus_target}_Date': f'Tp{plus_target}_date_ref'})

        # save the results
        val_results.to_csv(f'{wkdir}/val/Tp{plus_target}_valresults.csv')
        test_results.to_csv(f'{wkdir}/test/Tp{plus_target}_testresults.csv')

        # save train predictions for fuzzy logic
        eval_res['train_pred']['cluster_ref'].to_csv(f'{wkdir}/train/Tp{plus_target}_train_clustermembership.csv')
        eval_res['val_pred']['cluster_ref'].to_csv(f'{wkdir}/val/Tp{plus_target}_val_clustermembership.csv')
        eval_res['test_pred']['cluster_ref'].to_csv(f'{wkdir}/test/Tp{plus_target}_test_clustermembership.csv')


        if plus_target == 1:

            for index in range(2):

                if index == 1:
                    item = test_results
                    header = 'test'
                elif index == 0:
                    item = val_results
                    header = 'val'


                # predictions by model dates
                overall_results_predmodel = item[['close', f'Tp{plus_target}_pred', f'Tp{plus_target}_date_ref']] # by prediction model
                overall_results_predmodel.to_csv(f'{wkdir}/{header}/OVERALL_prediction_by_model.csv')

                # predictions by prediction dates
                if index == 1: overall_results_predday = pd.concat([val_results, test_results], axis = 0)
                else: overall_results_predday = val_results
                close_ref = overall_results_predday[['close']]
                overall_results_predday = overall_results_predday[[f'Tp{plus_target}_date_ref', f'Tp{plus_target}_pred']]

                # reindex to reference predicted date
                overall_results_predday = overall_results_predday.reset_index(drop = False)
                overall_results_predday = overall_results_predday.rename(columns = {'Date':f'Tp{plus_target}_mdate_ref', f'Tp{plus_target}_date_ref':'pred_date'})
                overall_results_predday = overall_results_predday.set_index('pred_date')
                overall_results_predday = pd.concat([overall_results_predday, close_ref], axis = 1)
                overall_results_predday = overall_results_predday[['close', f'Tp{plus_target}_pred', f'Tp{plus_target}_mdate_ref']]
                overall_results_predday = overall_results_predday.dropna()
                overall_results_predday['pred_date'] = overall_results_predday.index
                overall_results_predday = overall_results_predday.set_index('pred_date')
                if index == 1: overall_results_predday = overall_results_predday.tail(len(test_results))

                overall_results_predday.to_csv(f'{wkdir}/{header}/OVERALL_prediction_by_date.csv')

        else:
            for index in range(2):

                if index == 1:
                    item = test_results
                    header = 'test'
                elif index == 0:
                    item = val_results
                    header = 'val'


                # read from csv
                overall_results_predmodel = pd.read_csv(f'{wkdir}/{header}/OVERALL_prediction_by_model.csv', index_col = 'Date')
                overall_results_predday = pd.read_csv(f'{wkdir}/{header}/OVERALL_prediction_by_date.csv', index_col = 'pred_date')

                # predmodel
                predmodel = item[[f'Tp{plus_target}_pred', f'Tp{plus_target}_date_ref']] # by prediction model
                overall_results_predmodel = pd.concat([overall_results_predmodel, predmodel], axis = 1)

                # predday
                # predictions by prediction dates
                if index == 1: pred_day = pd.concat([val_results, test_results], axis = 0)
                else: pred_day = val_results
                pred_day = pred_day[[f'Tp{plus_target}_date_ref', f'Tp{plus_target}_pred']]

                # reindex to reference predicted date
                pred_day = pred_day.reset_index(drop = False)
                pred_day = pred_day.rename(columns = {'Date':f'Tp{plus_target}_mdate_ref', f'Tp{plus_target}_date_ref':'pred_date'})
                pred_day = pred_day.set_index('pred_date')
                pred_day = pred_day[[f'Tp{plus_target}_pred', f'Tp{plus_target}_mdate_ref']]
                overall_results_predday = pd.concat([overall_results_predday, pred_day], axis = 1)
                overall_results_predday = overall_results_predday.dropna()

                # write to csv
                overall_results_predmodel.to_csv(f'{wkdir}/{header}/OVERALL_prediction_by_model.csv')
                overall_results_predday.to_csv(f'{wkdir}/{header}/OVERALL_prediction_by_date.csv')

        gc.collect()

    ref = overall_results_predmodel[['close']]

    stats_data = {
        'columns': headers_compile,
        'r2_price': r2_compile,
        'rmse': rmse_compile,
        'mape': mape_compile
    }
    stats_df = pd.DataFrame(stats_data)
    stats_df.to_csv(f'{wkdir}/statistics_before_ensemble.csv')

    # # retrieve data
    # val_df = pd.read_csv(f'{wkdir}/val/OVERALL_prediction_by_date.csv', index_col = 'pred_date')
    # test_df = pd.read_csv(f'{wkdir}/test/OVERALL_prediction_by_date.csv', index_col = 'pred_date')

    # # ensemble
    # output_results, output1_values, output1_dates, stats_df = ensemble(val_df=val_df, test_df=test_df, ref=ref)

    # stats_df.to_csv(f'{wkdir}/statistics_after_ensemble.csv')

    # output_results.to_csv(f'{wkdir}/ensemble/output_results.csv')
    # output1_values.to_csv(f'{wkdir}/ensemble/output_values.csv')
    # output1_dates.to_csv(f'{wkdir}/ensemble/output_dates.csv')

    return overall_results_predmodel, overall_results_predday



Evaluate Model

In [None]:

import pandas as pd
from sklearn.metrics import r2_score, mean_squared_error


def evaluateModel(outcome, trainset, valset, testset, cluster_details, header, plus_target, period = PERIOD, mode = 0):

    eval_res = {}

    for result in outcome:

        # if result == 'train_pred': continue



        ## normalization
        pred_res = outcome[result].copy()
        if result == 'val_pred': reference = valset['ref']
        elif result == 'test_pred': reference = testset['ref'] #EDITHERE
        else: reference = trainset['ref'] #EDITHERE

        if result == 'val_pred': item = valset
        elif result == 'test_pred': item = testset #EDITHERE
        else: item = trainset #EDITHERE

        cols = list(pred_res.columns)

        if mode == 0:

            pred_res['minimum'] = pred_res.apply(lambda x: min(x), axis = 1)
            for col in cols: pred_res[col] = pred_res.apply(lambda x: round((x[col] - x['minimum']), 6), axis = 1)

            pred_res['summation'] = pred_res.apply(lambda x: sum(x), axis = 1)
            for col in cols: pred_res[col] = pred_res.apply(lambda x: round(x[col]/x['summation'], 6), axis = 1)

            pred_res = pred_res.drop(['minimum', 'summation'], axis = 1)

        if mode == 1:
            pred_res['maximum'] = pred_res.apply(lambda x: max(x), axis = 1)
            for col in cols: pred_res[col] = pred_res.apply(lambda x: 1 if x[col] == x['maximum'] else 0, axis = 1)

        ## defuzzify

        pred_res['pc_pred'] = 0
        for col in cols: pred_res['pc_pred'] = pred_res.apply(lambda x: x['pc_pred'] + x[col]*cluster_details[header][col]['mean'], axis = 1)

        # price change


        price_pred = pd.concat([pred_res, reference[[f'refPrice_Tm{period}', f'yref_Tp{plus_target}_Price', f'ypcref_Tp{plus_target}_PriceChg']]], axis = 1)
        price_pred['price_pred'] = price_pred.apply(lambda x: x[f'refPrice_Tm{period}']*(1+x['pc_pred']), axis = 1)
        price_pred = price_pred[[f'yref_Tp{plus_target}_Price', 'price_pred', f'ypcref_Tp{plus_target}_PriceChg', 'pc_pred']]
        price_pred['error'] = price_pred.apply(lambda x: abs(x['price_pred'] - x[f'yref_Tp{plus_target}_Price'])/x[f'yref_Tp{plus_target}_Price'], axis = 1)

        # print('pred_res')
        # print(pred_res)

        # print('reference')
        # print(reference)

        pred_r2 = r2_score(price_pred[f'yref_Tp{plus_target}_Price'], price_pred['price_pred'])
        pred_rmse = mean_squared_error(price_pred[f'yref_Tp{plus_target}_Price'], price_pred['price_pred'])**0.5
        pred_mape = mean_absolute_percentage_error(price_pred[f'yref_Tp{plus_target}_Price'], price_pred['price_pred'])

        # print(reference)

        eval_res[result] = {'rmse': pred_rmse, 'r2': pred_r2, 'mape': pred_mape, 'predicted': reference, 'ref': price_pred, 'cluster_ref': pred_res, 'ori_y': item['y']}

    r2 = eval_res['test_pred']['r2']
    rmse = eval_res['test_pred']['rmse']
    mape = eval_res['test_pred']['mape']

    print(f'[T+{plus_target} - Results] r2: {r2}, rmse: {rmse}, mape: {mape}')

    return eval_res

Keras

In [None]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from keras.callbacks import EarlyStopping
import pandas as pd


def mlpConstructor(model_dict):

    nn = Sequential()

    for layer in model_dict['mlp']['layers']:

        nodes = model_dict['mlp']['layers'][layer]['nodes']
        activation = model_dict['mlp']['hl_activation']

        if layer == 0: nn.add(Dense(nodes, input_dim = model_dict['input_size'], activation = activation))
        else: nn.add(Dense(nodes, activation = activation))

    # add output layer
    activation = model_dict['mlp']['ol_activation']
    nn.add(Dense(model_dict['output_size'], activation = activation))

    # compile model
    if activation == 'linear': nn.compile(loss = 'mse', optimizer = model_dict['mlp']['optimizer']['optim_type'])
    # elif activation == 'sigmoid': nn.compile(loss = 'binary_crossentropy', optimizer =  model_dict['mlp']['optimizer']['optim_type'], metrics = ['binary_accuracy'])
    # elif activation == 'softmax': nn.compile(loss = 'categorical_crossentropy', optimizer =  model_dict['mlp']['optimizer']['optim_type'], metrics = ['accuracy'])
    elif activation == 'sigmoid': nn.compile(loss = 'mse', optimizer =  model_dict['mlp']['optimizer']['optim_type'])
    elif activation == 'softmax': nn.compile(loss = 'categorical_crossentropy', optimizer =  model_dict['mlp']['optimizer']['optim_type'], metrics = ['accuracy'])


    return nn

# kerasTrain function
def kerasTrain(model, model_dict, cluster_details, trainset, valset, testset, header):

    # early stopper
    # es = EarlyStopping(monitor='loss', mode='min', verbose=1, patience = model_dict['optimizer']['patience'], min_delta=model_dict['optimizer']['min_delta'])

    # model fit
    model.fit(trainset['X'], trainset['y'], epochs = model_dict['epochs'], batch_size = model_dict['batch_size'], shuffle = model_dict['mlp']['shuffle'], verbose = model_dict['mlp']['verbose'])

    val_pred = model.predict(valset['X'], verbose = model_dict['mlp']['verbose'])
    test_pred = model.predict(testset['X'], verbose = model_dict['mlp']['verbose'])

    val_pred = pd.DataFrame(val_pred, columns = valset['y'].columns, index = valset['y'].index)
    test_pred = pd.DataFrame(test_pred, columns = testset['y'].columns, index = testset['y'].index)

    return model, val_pred, test_pred



Pytorch

In [None]:

from torch.utils.data import TensorDataset, DataLoader, Dataset
from tqdm.auto import tqdm

import matplotlib.pyplot as plt
import pandas as pd
import torch
import math
import copy
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.nn.functional as F
from torch.optim import Adam, LBFGS, SGD, AdamW

def pytorchTrain(model, model_dict, trainset, valset, testset, traintype = 0):

    # data preparation
    train_dataloader, trainref_dataloader, val_dataloader, test_dataloader = pytorchDataPreparation(model_dict, trainset, valset, testset)

    # retrive variables
    n_epochs = model_dict['epochs'] # 100
    optimizer_choice = model_dict[model_dict['model_type']]['optimizer']['optim_type'] # adam
    learning_rate = model_dict[model_dict['model_type']]['optimizer']['learning_rate'] # 0.0001
    patience = model_dict['early_stopper']['patience']
    min_delta = model_dict['early_stopper']['min_delta']

    # define the device (CPU or GPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f' -- Connected to {device} -- ')

    # define loss function
    loss_fn = nn.MSELoss()
    # loss_fn = nn.MSELoss(reduction = 'sum')
    # loss_fn = nn.CrossEntropyLoss()
    # loss_fn = nn.BCEWithLogitsLoss()

    # initialize early stopper
    early_stopper = EarlyStopper(patience=patience, min_delta=min_delta)

    # define optimizer
    if optimizer_choice == 'adam': optimizer = optim.Adam(model.parameters(), lr = learning_rate)
    elif optimizer_choice == 'adamW': optimizer = optim.AdamW(model.parameters(), lr = learning_rate)
    elif optimizer_choice == 'sgd': optimizer = optim.SGD(model.parameters(), lr = learning_rate)

    history = []

    # optimal
    min_loss = math.inf

    train_losses = []
    val_losses = []
    test_losses = []

    for epoch in tqdm(range(n_epochs)):

        # set model to train mode
        model.train()

        train_batch = []

        # for batch, (X_batch, y_batch) in enumerate(train_dataloader):
        for batch, (X_batch, y_batch) in enumerate(train_dataloader): #EDITHERE

            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            # set gradients to zero before bacj
            optimizer.zero_grad()

            # forward pass
            if traintype == 1: train_output = model(X_batch, y_batch)
            else: train_output = model(X_batch)

            # calculate loss
            train_loss = loss_fn(train_output, y_batch)
            train_batch.append(train_loss.item())

            # backpropagation
            train_loss.backward() # calculate gradient descent
            optimizer.step() # update weights

        # evaluate model performance at end of epoch
        model.eval()

        train_loss = sum(train_batch)/len(train_batch)
        train_losses.append(train_loss)

        val_batch = []

        # disables gradient calculation
        with torch.no_grad():

            for batch, (X_reftrain, y_reftrain) in enumerate(trainref_dataloader):
            # for batch, (X_reftrain, y_reftrain) in enumerate(test_dataloader):

                X_reftrain, y_reftrain = X_reftrain.to(device), y_reftrain.to(device)

                if traintype == 1: trainref_pred = model(X_reftrain, y_reftrain)
                else: trainref_pred = model(X_reftrain)

            for batch, (X_bval, y_bval) in enumerate(val_dataloader):

                X_bval, y_bval = X_bval.to(device), y_bval.to(device)

                if traintype == 1: val_pred = model(X_bval, y_bval)
                else: val_pred = model(X_bval)

                val_loss = loss_fn(val_pred, y_bval)

                val_batch.append(val_loss.item())

        val_loss = sum(val_batch)/len(val_batch)
        val_losses.append(val_loss)

        test_batch = []

        # disables gradient calculation
        with torch.no_grad():
            for batch, (X_btest, y_btest) in enumerate(test_dataloader):

                X_btest, y_btest = X_btest.to(device), y_btest.to(device)

                if traintype == 1: test_pred = model(X_btest, y_btest)
                else: test_pred = model(X_btest)

                test_loss = loss_fn(test_pred, y_btest)

                test_batch.append(test_loss.item())

        test_losses.append(sum(test_batch)/len(test_batch))

        if val_loss < min_loss:
            optimal_model = copy.deepcopy(model)
            optimal_train_pred = pd.DataFrame(trainref_pred.cpu(), columns = trainset['y'].columns, index = trainset['y'].index)
            # optimal_train_pred = pd.DataFrame(test_pred.cpu(), columns = testset['y'].columns, index = testset['y'].index) #EDITHERE

            optimal_val_pred = pd.DataFrame(val_pred.cpu(), columns = valset['y'].columns, index = valset['y'].index)
            optimal_test_pred = pd.DataFrame(test_pred.cpu(), columns = testset['y'].columns, index = testset['y'].index)
            min_loss = val_loss
            ref_epoch = epoch

        # early stop to prevent overfitting
        if early_stopper.early_stop(val_loss): break

    print(f'Optimal Model Epoch: {ref_epoch}')

    fig = plt.figure(figsize=(10,8))
    plt.plot(range(1,len(train_losses)+1), train_losses, label='Training Loss')
    plt.plot(range(1,len(val_losses)+1), val_losses,label='Validation Loss')
    # plt.plot(range(1,len(test_losses)+1), test_losses,label='Validation Loss')

    # find position of lowest validation loss
    minposs = test_losses.index(min(test_losses))
    # plt.axvline(ref_epoch, linestyle='--', color='r',label='Early Stopping (Val) Checkpoint')
    plt.axvline(minposs, linestyle='--', color='r',label='Early Stopping (Val) Checkpoint')


    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.xlim(0, len(train_losses)+1) # consistent scale
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

    return optimal_model, optimal_train_pred, optimal_val_pred, optimal_test_pred


def pytorchDataPreparation(model_dict, trainset, valset, testset):

    batch_size = model_dict['batch_size']
    shuffle = model_dict[model_dict['model_type']]['shuffle']

    train_data = FuzzyDataset(trainset['X'], trainset['y'])
    val_data = FuzzyDataset(valset['X'], valset['y'])
    test_data = FuzzyDataset(testset['X'], testset['y'])

    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=shuffle)
    trainref_dataloader = DataLoader(train_data, batch_size=len(trainset['y']), shuffle = shuffle)
    val_dataloader = DataLoader(val_data, batch_size=len(valset['y']), shuffle=shuffle)
    test_dataloader = DataLoader(test_data, batch_size=len(testset['y']), shuffle=shuffle)

    return train_dataloader, trainref_dataloader, val_dataloader, test_dataloader



class FuzzyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.to_numpy(), dtype = torch.float)
        self.y = torch.tensor(y.to_numpy(), dtype = torch.float)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class EarlyStopper:
    def __init__(self, patience=10, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [None]:

class Transformer(nn.Module):
    # Constructor
    def __init__(self, model_dict):

        super().__init__()

        encoder_layer = nn.TransformerEncoderLayer(d_model=model_dict['input_size'], nhead=model_dict['transformer']['nheads'], dropout = 0, dim_feedforward = model_dict['transformer']['dim_feedforward'], batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=model_dict['transformer']['num_encoder_layers'])
        self.activation = nn.ReLU()
        self.output_activation = nn.Sigmoid()
       
        self.hidden_size = model_dict['transformer']['hidden_size']
        self.dense_layers = model_dict['transformer']['dense_layers']
        
        self.ll1 = nn.Linear(model_dict['input_size'], self.hidden_size)
        self.ll2 = nn.Linear(self.hidden_size, self.hidden_size)
        self.ll3 = nn.Linear(self.hidden_size, self.hidden_size)
        self.ll4 = nn.Linear(self.hidden_size, self.hidden_size)
        self.ll5 = nn.Linear(self.hidden_size, self.hidden_size)
        self.ll6 = nn.Linear(self.hidden_size, model_dict['output_size'])
        
        if self.dense_layers == 1: self.ll1 = nn.Linear(model_dict['input_size'], model_dict['output_size'])
        elif self.dense_layers == 2: self.ll2 = nn.Linear(self.hidden_size, model_dict['output_size'])
        elif self.dense_layers == 3: self.ll3 = nn.Linear(self.hidden_size, model_dict['output_size'])
        elif self.dense_layers == 4: self.ll4 = nn.Linear(self.hidden_size, model_dict['output_size'])
        elif self.dense_layers == 5: self.ll5 = nn.Linear(self.hidden_size, model_dict['output_size'])
        

    def forward(self, x):
        x = self.transformer_encoder(x)
        x = self.ll1(x)
        
        if self.dense_layers >= 2: 
            x = self.activation(x)
            x = self.ll2(x)
            
            if self.dense_layers >= 3:
                x = self.activation(x)
                x = self.ll3(x)
                
                if self.dense_layers >= 4: 
                    x = self.activation(x)
                    x = self.ll4(x)
                    
                    if self.dense_layers >= 5: 
                        x = self.activation(x)
                        x = self.ll5(x)
                        
                        if self.dense_layers >= 6:
                            x = self.activation(x)
                            x = self.ll6(x)

        x = self.output_activation(x)
        out = x

        return out


In [None]:
pred_bymodel, pred_byday = rangeNeuralPipeline(ticker, ftraindf, fvaldf, ftestdf, cluster_details, model_dict, google = 1, pred_period = PRED_PERIOD)