In [None]:
# Trading Algorithm 
# Author Samwel Portelli <samwel.portelli.18@um.edu.mt>

In [None]:
import pandas as pd
import os
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import sys
from scipy import stats
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# Function that gets the daily close price data
def get_data(symbol, start_date, end_date, interval = '1d', param='Close'):
    
    data = yf.download(symbol, start=start_date, end=end_date, interval=interval)
    closeData = data[param]
    
    return closeData

SandPData = get_data('^GSPC', "2006-08-16","2010-10-10", param='Adj Close')

# Function to convert dates to the desired format
def convert_to_datetime(date_str):
    return datetime.strptime(date_str, '%Y/%m/%d %H:%M').strftime('%Y-%m-%d')

def buy_and_hold(priceData, initialCapital=1000, do_print=False, compute_metrics=False):

    returns = priceData.pct_change()

    returnsDf = pd.DataFrame(returns)
    returnsDf.columns = ['returns']

    fund_value = list()
    fund_value.append(initialCapital)

    for i in range(len(priceData)):
        if i>0:
            fund_value.append(fund_value[i-1]+fund_value[i-1]*returnsDf['returns'][i])
    
    if compute_metrics:
        cumulative_returns, annual_volatility, sharpe_ratio, sortino_ratio, max_drawdown, beta, alpha = calculate_portfolio_metrics(pd.DataFrame(fund_value, columns=['portfolio_value']), plot=False, havedate=False)
    
    if do_print:
        print("Cumulative Returns:", cumulative_returns)
        print("Annualized Volatility:", annual_volatility)
        print("Sharpe Ratio:", sharpe_ratio)
        print("Sortino Ratio:", sortino_ratio)
        print("Beta:", beta)
        print("Alpha:", alpha)
        print("Maximum Drawdown:", max_drawdown)
    else:   
        return fund_value
        

*Code that generates a model that converts the certainty into threshold position size*

In [None]:
def min_max_normalize(data):
    min_val = np.min(data)
    max_val = np.max(data)
    normalized_data = (data - min_val) / (max_val - min_val)
    if np.all(np.isnan(normalized_data)):
        normalized_data = np.zeros_like(data)
    return normalized_data

def get_lin_reg_dataset(df, step=30, test=False, val_df=None):
    
    x, y = [], []
    
    if test:
        for i in range(step):
            con_x = np.concatenate((val_df['certainty'][len(val_df['certainty']) - step + i:len(val_df['certainty'])].values, df['certainty'][0:i].values))
                
            x.append(min_max_normalize(con_x))
            y.append(1-np.abs(df['actualClass'][i]-df['predClass'][i])) # 1 means trade, 0 means no trade
        
    
    for i in range(len(df['certainty'])-step):
        x.append(min_max_normalize(df['certainty'][i:i+step].values))
        y.append(1-np.abs(df['actualClass'][i+step]-df['predClass'][i+step])) # 1 means trade, 0 means no trade
    
    return x, y

# Function to train the model
def train_model(X_train, y_train):
    model = LogisticRegression()
    model.fit(X_train, y_train)
    return model

# Function to test the model and classify based on a threshold
def test_model_with_threshold(model, X_test, y_test, threshold=0.5):
    y_proba = model.predict_proba(X_test)[:, 1]  # Probability of positive class    
    y_pred = (y_proba >= threshold).astype(int)  # Classify based on threshold
    accuracy = accuracy_score(y_test, y_pred)
    return y_proba, accuracy

In [None]:
def calculate_portfolio_metrics(df, og_data, plot=True, havedate=True):
        
    if havedate:
        # Converting the date column to datetime
        df['Date'] = pd.to_datetime(df['Date'])
    
    # Calculating the daily returns
    df['daily_returns'] = df['portfolio_value'].pct_change()

    # Calculating the cumulative returns
    cumulative_returns = df['portfolio_value'].iloc[-1]/df['portfolio_value'].iloc[0] - 1
    
    annual_returns = (1+cumulative_returns)**(365/len(df['daily_returns'])) - 1 #https://www.investopedia.com/terms/a/annualized-total-return.asp

    # Calculating the annualized volatility
    annual_volatility = df['daily_returns'].std()*np.sqrt(len(df['daily_returns']))

    # Risk-free rate is assumed to be 1%
    risk_free_rate = 0.01/365

    # Calculating the sharpe ratio
    print('**TEST**')
    print('annual_returns: '+str(annual_returns))
    print('risk_free_rate: '+str(risk_free_rate))
    print('annual_volatility: '+str(annual_volatility))
    print('**TEST**')
    
    sharpe_ratio = (annual_returns - risk_free_rate) / annual_volatility

    # Calculating the sortino ratio
    negative_returns = df[df['daily_returns']<0]['daily_returns']
    downside_volatility = negative_returns.std()*np.sqrt(len(df['daily_returns']))
    sortino_ratio = (annual_returns - risk_free_rate) / downside_volatility

    # Calculating the beta alpha using linear regression
    #benchmark_returns = SandPData.pct_change()[1:]
    #(beta, alpha) = stats.linregress(benchmark_returns, df['daily_returns'][1:])[0:2]
    benchmark_returns = pd.Series(buy_and_hold(og_data['tradeactualPrice'])).pct_change()[1:]
    (beta, alpha) = stats.linregress(benchmark_returns, df['daily_returns'][1:])[0:2]

    # Calculating the maximum drawdown
    df['cumulative_returns'] = df['portfolio_value']/df['portfolio_value'].iloc[0]
    
    #df['peak'] = df['cumulative_returns'].cummax()
    df['peak'] = df['portfolio_value'].cummax()
    df['drawdown'] = (df['portfolio_value'] - df['peak'])/df['peak']
    max_drawdown = df['drawdown'].min()*100
    
    if plot:
        # Plotting the portfolio value
        sns.set(style='whitegrid')
        plt.figure(figsize=(12, 6))
        plt.plot(df['Date'], df['portfolio_value'], linewidth=2)
        plt.xlabel('Date')
        plt.ylabel('Portfolio Value')
        plt.title('Portfolio Growth')
        plt.xticks(rotation=45)
        plt.xlim(df['Date'].iloc[0], df['Date'].iloc[-1]) 

        
        y_min = df['portfolio_value'].min() * 0.95  
        y_max = df['portfolio_value'].max() * 1.05  
        plt.ylim(y_min, y_max)

        plt.tight_layout()
        sns.despine()
        plt.savefig("portfolio_growth.png")
        plt.show()

        # Plotting maximum drawdown
        plt.figure(figsize=(12, 6))
        plt.plot(df['Date'], df['drawdown'], color='red', linewidth=2)
        plt.fill_between(df['Date'], df['drawdown'], color='lightcoral')  
        plt.xlabel('Date')
        plt.ylabel('Drawdown')
        plt.title('Maximum Drawdown')
        plt.xticks(rotation=45)
        plt.xlim(df['Date'].iloc[0], df['Date'].iloc[-1])  

        plt.tight_layout()
        sns.despine()
        plt.savefig("maximum_drawdown.png")
        plt.show()

    # Removing temporary columns
    df.drop(['daily_returns','cumulative_returns', 'peak', 'drawdown'], axis=1, inplace=True)

    #return annual_returns, cumulative_returns, annual_volatility, sharpe_ratio, sortino_ratio, max_drawdown, beta, alpha
    return cumulative_returns, annual_volatility, sharpe_ratio, sortino_ratio, max_drawdown, beta, alpha

In [None]:
def create_unique_folder(folder_name):
    version = 1
    new_folder_name = folder_name
    while os.path.exists(new_folder_name):
        new_folder_name = f"{folder_name}_v{version}"
        version += 1
    os.makedirs(new_folder_name)
    return new_folder_name

In [None]:
def read_csv_as_df(file_name, folder_name):
    file_path = os.path.join(folder_name, file_name)
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        return df
    else:
        print(f"File '{file_name}' does not exist in folder '{folder_name}'.")
        return None

*Loading the data*

In [None]:
def load_data(model, alpha, s=None, data_folder='\model_data_OT'):
    
    folder_path = r'C:\Users\porte\Downloads' + data_folder
    
    if model=='transformer_enbpi':

        file_path = os.path.join(folder_path, f'transformer_confidence_run_data_s_{s}', f'ALL_PI_data_s_{s}.csv')
        file_path_true_pred = os.path.join(folder_path, f'transformer_confidence_run_data_s_{s}', f'y_true_and_pred_s_{s}.csv')
        og_data = r'C:\Users\porte\Downloads\dataset\exchange_rate\exchange_rate.csv'
        val_file_path = os.path.join(folder_path, f'transformer_confidence_run_data_s_{s}_val', f'ALL_PI_data_s_{s}.csv')
        val_file_path_true_pred = os.path.join(folder_path, f'transformer_confidence_run_data_s_{s}_val', f'y_true_and_pred_s_{s}.csv')
    
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path, delimiter=',')

        # Iterate through each column and convert it to an array
        confidence_data = {}
        for column in df.columns:
            confidence_data[column] = df[column].to_numpy()

        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path_true_pred, delimiter=',')

        # Iterate through each column and convert it to an array
        true_pred = {}
        for column in df.columns:
            true_pred[column] = df[column].to_numpy()

        # VALIDATION DATA

        # Read the CSV file into a DataFrame
        df = pd.read_csv(val_file_path, delimiter=',')

        # Iterate through each column and convert it to an array
        val_confidence_data = {}
        for column in df.columns:
            val_confidence_data[column] = df[column].to_numpy()

        # Read the CSV file into a DataFrame
        df = pd.read_csv(val_file_path_true_pred, delimiter=',')

        # Iterate through each column and convert it to an array
        val_true_pred = {}
        for column in df.columns:
            val_true_pred[column] = df[column].to_numpy()

        # ORIGINAL DATA

        # Read the CSV file into a DataFrame
        df = pd.read_csv(og_data, delimiter=',')

        # Iterate through each column and convert it to an array
        og_data = {}
        for column in df.columns:
            og_data[column] = df[column].to_numpy()

        #Test Set
        y_true=true_pred['y_true']
        y_pred=true_pred['y_pred_from_PUNCC']
        y_pred_lower=confidence_data['low_'+str(alpha)]
        y_pred_upper=confidence_data['high_'+str(alpha)]

        # Validation Set
        val_y_true=val_true_pred['y_true']
        val_y_pred=val_true_pred['y_pred_from_PUNCC']
        val_y_pred_lower=val_confidence_data['low_'+str(alpha)]
        val_y_pred_upper=val_confidence_data['high_'+str(alpha)]

        og_data_true=og_data[data_folder.split('_')[-1]]
        og_data_date=og_data['date']
    
    elif model=='transformer_garch':
        conf = int(100-alpha*100)
        
        file_path = os.path.join(folder_path, f'garch_transformer_confidence_run_data_alpha_{alpha}', f'garch_transforemer_y_true_and_pred_alp_{conf}_{data_folder.split("_")[-1]}.csv')
        #file_path = r'C:\Users\porte\Downloads'+str(data_folder)+'\\garch_transformer_confidence_run_data_alpha_'+str(alpha)+'\garch_transforemer_y_true_and_pred_alp_'+str(conf)+'.csv'

        df = pd.read_csv(file_path)

        y_true = df['y_true'].values
        y_pred = df['y_pred_from_PUNCC'].values
        y_pred_lower = df['y_lower'].values
        y_pred_upper = df['y_upper'].values

        file_path = os.path.join(folder_path, f'val_garch_transformer_confidence_run_data_alpha_{alpha}', f'val_garch_transforemer_y_true_and_pred_alp_{conf}_{data_folder.split("_")[-1]}.csv')
        #file_path = r'C:\Users\porte\Downloads'+str(data_folder)+'\\val_garch_transformer_confidence_run_data_alpha_'+str(alpha)+'\garch_transforemer_y_true_and_pred_alp_'+str(conf)+'.csv'

        df = pd.read_csv(file_path)

        val_y_true = df['y_true'].values
        val_y_pred = df['y_pred_from_PUNCC'].values
        val_y_pred_lower = df['y_lower'].values
        val_y_pred_upper = df['y_upper'].values

        og_data = r'C:\Users\porte\Downloads\dataset\exchange_rate\exchange_rate.csv'

        # Read the CSV file into a DataFrame
        df = pd.read_csv(og_data, delimiter=',')

        # Iterate through each column and convert it to an array
        og_data = {}
        for column in df.columns:
            og_data[column] = df[column].to_numpy()

        og_data_true=og_data[data_folder.split('_')[-1]]
        og_data_date=og_data['date'] 
        
    elif model=='transformer_mcdropout':
        conf = int(100-alpha*100)

        file_path = os.path.join(folder_path, 'output_mc_1_MCDROPOUT_PREDICTION', 'output_mc_1',f'min_max_mean_alpha_{alpha}.csv')
        #file_path = r'C:\Users\porte\Downloads'+str(data_folder)+'\\output_mc_1_MCDROPOUT_PREDICTION\output_mc_1\min_max_mean_alpha_'+str(alpha)+'.csv'

        df = pd.read_csv(file_path)

        file_path = os.path.join(folder_path, 'output_mc_1_MCDROPOUT_PREDICTION', 'output_mc_1','true_0.npy')
        
        #y_true = np.load(r'C:\Users\porte\Downloads'+str(data_folder)+'\\output_mc_1_MCDROPOUT_PREDICTION\output_mc_1\true_0.npy').flatten()
        y_true = np.load(file_path).flatten()
        y_pred = df['mean'].values
        y_pred_lower = df['lower'].values
        y_pred_upper = df['upper'].values


        file_path = os.path.join(folder_path, 'output_mc_1_MCDROPOUT_VAL', 'output_mc_1',f'min_max_mean_alpha_{alpha}.csv')
        #file_path = r'C:\Users\porte\Downloads'+str(data_folder)+'\\output_mc_1_MCDROPOUT_VAL\output_mc_1\min_max_mean_alpha_'+str(alpha)+'.csv'

        df = pd.read_csv(file_path)

        file_path = os.path.join(folder_path, 'output_mc_1_MCDROPOUT_VAL', 'output_mc_1','true_0.npy')
        
        #val_y_true = np.load(r'C:\Users\porte\Downloads'+str(data_folder)+'\\output_mc_1_MCDROPOUT_VAL\output_mc_1\true_0.npy').flatten()
        val_y_true = np.load(file_path).flatten()
        
        #val_y_true = np.load(r'C:\Users\porte\Downloads'+str(data_folder)+'\\output_mc_1_MCDROPOUT_VAL\output_mc_1\true_0.npy').flatten()
        val_y_pred = df['mean'].values
        val_y_pred_lower = df['lower'].values
        val_y_pred_upper = df['upper'].values

        og_data = r'C:\Users\porte\Downloads\dataset\exchange_rate\exchange_rate.csv'

        # Read the CSV file into a DataFrame
        df = pd.read_csv(og_data, delimiter=',')

        # Iterate through each column and convert it to an array
        og_data = {}
        for column in df.columns:
            og_data[column] = df[column].to_numpy()

        og_data_true=og_data[data_folder.split('_')[-1]]
        og_data_date=og_data['date']        
    elif model=='arima_garch':
        conf = int(100-alpha*100)

        data_file_path = r'C:\Users\porte\Downloads'+str(data_folder)+'\\arimagarch_y_true_and_pred_alp_'+str(conf)+'.csv'

        # Read data CSV file
        df_data = pd.read_csv(data_file_path, delimiter=',')

        # Store columns in arrays
        y_true = df_data['y_true'].values
        y_pred = df_data['y_pred_from_arimagarch'].values 
        y_pred_lower = df_data['y_lower'].values
        y_pred_upper = df_data['y_upper'].values

        data_file_path = r'C:\Users\porte\Downloads'+str(data_folder)+'\\val_arimagarch_y_true_and_pred_alp_'+str(conf)+'.csv'

        # Read data CSV file
        df_data = pd.read_csv(data_file_path, delimiter=',')

        # Store columns in arrays
        val_y_true = df_data['y_true'].values
        val_y_pred = df_data['y_pred_from_arimagarch'].values 
        val_y_pred_lower = df_data['y_lower'].values
        val_y_pred_upper = df_data['y_upper'].values

        og_data = r'C:\Users\porte\Downloads\dataset\exchange_rate\exchange_rate.csv'

        # Read the CSV file into a DataFrame
        df = pd.read_csv(og_data, delimiter=',')

        # Iterate through each column and convert it to an array
        og_data = {}
        for column in df.columns:
            og_data[column] = df[column].to_numpy()

        og_data_true=og_data[data_folder.split('_')[-1]]
        og_data_date=og_data['date']        
    elif model=='arima':
        conf = int(100-alpha*100)

        data_file_path = r'C:\Users\porte\Downloads'+str(data_folder)+'\\arima_y_true_and_pred_alp_'+str(conf)+'.csv'

        # Read data CSV file
        df_data = pd.read_csv(data_file_path, delimiter=',')

        # Store columns in arrays
        y_true = df_data['y_true'].values
        y_pred = df_data['y_pred_from_arima'].values 
        y_pred_lower = df_data['y_lower'].values
        y_pred_upper = df_data['y_upper'].values

        data_file_path = r'C:\Users\porte\Downloads'+str(data_folder)+'\\val_arima_y_true_and_pred_alp_'+str(conf)+'.csv'

        # Read data CSV file
        df_data = pd.read_csv(data_file_path, delimiter=',')

        # Store columns in arrays
        val_y_true = df_data['y_true'].values
        val_y_pred = df_data['y_pred_from_arima'].values 
        val_y_pred_lower = df_data['y_lower'].values
        val_y_pred_upper = df_data['y_upper'].values

        og_data = r'C:\Users\porte\Downloads\dataset\exchange_rate\exchange_rate.csv'

        # Read the CSV file into a DataFrame
        df = pd.read_csv(og_data, delimiter=',')

        # Iterate through each column and convert it to an array
        og_data = {}
        for column in df.columns:
            og_data[column] = df[column].to_numpy()

        og_data_true=og_data[data_folder.split('_')[-1]]
        og_data_date=og_data['date']        
    else:
        print('Model does not exist. Choose a suitable model!')
        
    return y_true, y_pred, y_pred_lower, y_pred_upper, val_y_true, val_y_pred, val_y_pred_lower, val_y_pred_upper, og_data_true, og_data_date


*Preparing the data for the trading algorithm*

In [None]:
def prepare_data(y_true, val_y_true, og_data_date, og_data_true):

    #Test Set
    index = np.arange(0,len(y_true))
    tradedate = og_data_date[-len(y_true)-1:-1]
    date = og_data_date[-len(y_true):]
    tradeactualPrice = og_data_true[-len(y_true)-1:-1]
    actualPrice = y_true

    # Validation Set
    val_index = np.arange(0,len(val_y_true))
    val_tradedate = og_data_date[-len(val_y_true)-1-len(y_true):-1-len(y_true)]
    val_date = og_data_date[-len(val_y_true)-len(y_true):-len(y_true)]
    val_tradeactualPrice = og_data_true[-len(val_y_true)-1-len(y_true):-1-len(y_true)]
    val_actualPrice = og_data_true[-len(val_y_true)-len(y_true):-len(y_true)]#val_y_true
    
    return index, tradedate, date, tradeactualPrice, actualPrice, val_index, val_tradedate, val_date, val_tradeactualPrice, val_actualPrice

In [None]:
def convert_to_class(index, actualPrice, tradeactualPrice, y_pred):
    
    actualClass, predClass = [], []

    for i in index:

        # Getting the ACTUAL signal
        if actualPrice[i] > tradeactualPrice[i]:
            actualClass.append(1)
        else:
            actualClass.append(0)

        # Getting the PREDICTED signal
        if y_pred[i] > tradeactualPrice[i]:
            predClass.append(1)
        else:
            predClass.append(0)
    
    return np.array(actualClass), np.array(predClass)

*Normalising the uncertainty of the test set using params obtained from the val set*

In [None]:
# Loading the uncertainty of the validation dataset so that I can obtain the normalising parameters for
# the test set uncertainty

def normalise_test_uncert(val_y_pred_upper, val_y_pred_lower, y_pred_upper, y_pred_lower):

    val_uncert = np.subtract(val_y_pred_upper, val_y_pred_lower)

    val_uncert_min = np.min(val_uncert)
    val_uncert_max = np.max(val_uncert)

    test_uncert = np.subtract(y_pred_upper, y_pred_lower)

    norm_test_uncert = (test_uncert - val_uncert_min) / (val_uncert_max - val_uncert_min)
    norm_val_uncert = (val_uncert - val_uncert_min) / (val_uncert_max - val_uncert_min)
    
    # !!SKIPPING NORMALISATION!!
    norm_val_uncert = val_uncert
    norm_test_uncert = test_uncert
    
    return norm_val_uncert, norm_test_uncert

*Converting the dates to datetime format*

In [None]:
def format_dates(tradedate, date, val_tradedate, val_date):

    # Convert each date in the array to the desired format
    tradedate = [convert_to_datetime(dt) for dt in tradedate]
    date = [convert_to_datetime(dt) for dt in date]

    # Validation
    val_tradedate = [convert_to_datetime(dt) for dt in val_tradedate]
    val_date = [convert_to_datetime(dt) for dt in val_date]
    
    return tradedate, date, val_tradedate, val_date

*Skip day ?*

In [None]:
def skip_day(df, threshold, days=5):
    
    # Initialising the signal
    df['skip'] = 0
    
    # Iterate over the DataFrame and set 'skip' column values
    for i in range(len(df) - days):
        if df.loc[i, 'certainty'] > threshold:
            df.loc[i:i+days, 'skip'] = 1
            
    return df

*Setting up the dataframe for the trading algorithm*

In [None]:
def create_df(index, tradedate, date, tradeactualPrice, actualPrice, actualClass, predClass, certainty):

    data ={'index':index,'tradedate':tradedate,'date':date,'tradeactualPrice':tradeactualPrice,'actualPrice':actualPrice,'actualClass':actualClass,'predClass':predClass,'certainty':certainty}
    df = pd.DataFrame(data)

    return df

*Trading Algorithm Function*

In [None]:
## updated with a trading commission instead of fee, like IBRKs
def tradingStrategy(df,portfolio_value = 1000, useCertainty = True, usetransactionFee = True, transactionFee = 0.005, useSlippage = True, slippageChange = 0.1, certaintyThreshold = 0.1, short=False, skip=False):
    
    # Extracting the unique trading dates
    tradingDates = df.tradedate.unique()
    
    portfolio_value_list = list()
    remainingLiquid = 0 
    
    # Trading on each trading day
    for tradingDate in tradingDates:
        
        # Storing the data that will later be used for evaluation
        portfolio_value_list.append([tradingDate, portfolio_value])
        
        if skip:
            # If skip signal is 0 this means that trade normal
            if df.query('tradedate=="%s"' % (tradingDate))['skip'].values[0] == 0:
                # Filtering the datafram based on whether short positons wil be considered or not (if yes Sell signals are not neglected)
                if short:
                    dayDf = df.query('tradedate=="%s" & (predClass==1 | predClass==0) & certainty < %f' % (tradingDate, float(certaintyThreshold)))
                else:
                    dayDf = df.query('tradedate=="%s" & predClass==1 & certainty < %f' % (tradingDate, float(certaintyThreshold)))
            else: #If skip = 1, then no trades shall occur. Hence create a dataframe with no trades
                dayDf = pd.DataFrame(columns=['index', 'tradedate', 'date', 'tradeactualPrice', 'actualPrice', 'actualClass', 'predClass', 'certainty', 'skip'])
        else:
            if short:
                dayDf = df.query('tradedate=="%s" & (predClass==1 | predClass==0) & certainty < %f' % (tradingDate, float(certaintyThreshold)))
            else:
                dayDf = df.query('tradedate=="%s" & predClass==1 & certainty < %f' % (tradingDate, float(certaintyThreshold)))
        
        # Check whether there a profitable at the current trading day
        if dayDf.shape[0] > 0:
            
            # Adding a column that would be used to perform short trading (sets 1 to 1 and 0 to -1)
            dayDf['LongShort'] = dayDf['predClass'].apply(lambda x: 1 if x == 1 else -1)           
            
            # If slippage is used then a small random factor is added or subtracted to the prices
            if useSlippage:
                dayDf['tradeactualPrice'] = dayDf['tradeactualPrice'] + np.random.uniform(low=-slippageChange, high=slippageChange, size=len(dayDf))
                dayDf['actualPrice'] = dayDf['actualPrice'] + np.random.uniform(low=-slippageChange, high=slippageChange, size=len(dayDf))
            
            # Calculating the percentage change
            dayDf['pct_change'] = dayDf[['tradeactualPrice', 'actualPrice']].pct_change(axis=1)['actualPrice']
            
            # Calculating how the capital will be divided
            investmentPerStock = portfolio_value/dayDf.shape[0]
            investmentPerStock = np.ones(dayDf.shape[0])*investmentPerStock

            # If certainty is used then the investment size is varied 
            if useCertainty:
                investmentPerStock = investmentPerStock*dayDf['certainty'] # dayDf['certainty'] ranges from 0 to 1
                remainingLiquid = portfolio_value - np.sum(investmentPerStock) # Noting what was not invested
            
            # Calculating the profit or loss that was made on the trades
            dayDf['profitLoss'] = dayDf['pct_change']*investmentPerStock*dayDf['LongShort']
            
            # Calculating the new portfolio value
            portfolio_value = np.sum(investmentPerStock) + np.sum(np.array(dayDf['profitLoss'])) + remainingLiquid

            # Accounting for transaction fees
            if usetransactionFee:
                # Multiplied by two since two transactions per order are occuring, buy and sell
                
                fee = transactionFee*np.sum(investmentPerStock) + transactionFee*(np.sum(np.array(dayDf['profitLoss'])) + np.sum(investmentPerStock)) 
                
                if fee/2 < 2: # minimum fee is 2$
                    fee = 2*2 # $2 * 2 orders
                
                portfolio_value -= fee
                
    
    # Storing the data in a df and returning it
    portfolio_valueDf = pd.DataFrame(portfolio_value_list)
    portfolio_valueDf.columns = ['Date', 'portfolio_value']
    
    #print('Debugging.... <><><><><><><><><><><><><><><><>')
    #print(df[0:100])
    
    #print(portfolio_valueDf[0:100])
    #print('END Debugging.... <><><><><><><><><><><><><><><><>')
    
    return portfolio_valueDf

def tradingStrategy(df,portfolio_value = 1000, useCertainty = True, usetransactionFee = True, transactionFee = 0.005, useSlippage = True, slippageChange = 0.1, certaintyThreshold = 0.1, short=False, skip=False):
    
    # Extracting the unique trading dates
    tradingDates = df.tradedate.unique()
    
    portfolio_value_list = list()
    remainingLiquid = 0 
    
    # Trading on each trading day
    for tradingDate in tradingDates:
        
        # Storing the data that will later be used for evaluation
        portfolio_value_list.append([tradingDate, portfolio_value])
        
        if skip:
            # If skip signal is 0 this means that trade normal
            if df.query('tradedate=="%s"' % (tradingDate))['skip'].values[0] == 0:
                # Filtering the datafram based on whether short positons wil be considered or not (if yes Sell signals are not neglected)
                if short:
                    dayDf = df.query('tradedate=="%s" & (predClass==1 | predClass==0) & certainty < %f' % (tradingDate, float(certaintyThreshold)))
                else:
                    dayDf = df.query('tradedate=="%s" & predClass==1 & certainty < %f' % (tradingDate, float(certaintyThreshold)))
            else: #If skip = 1, then no trades shall occur. Hence create a dataframe with no trades
                dayDf = pd.DataFrame(columns=['index', 'tradedate', 'date', 'tradeactualPrice', 'actualPrice', 'actualClass', 'predClass', 'certainty', 'skip'])
        else:
            if short:
                dayDf = df.query('tradedate=="%s" & (predClass==1 | predClass==0) & certainty < %f' % (tradingDate, float(certaintyThreshold)))
            else:
                dayDf = df.query('tradedate=="%s" & predClass==1 & certainty < %f' % (tradingDate, float(certaintyThreshold)))
        
        # Check whether there a profitable at the current trading day
        if dayDf.shape[0] > 0:
            
            # Adding a column that would be used to perform short trading (sets 1 to 1 and 0 to -1)
            dayDf['LongShort'] = dayDf['predClass'].apply(lambda x: 1 if x == 1 else -1)           
            
            # If slippage is used then a small random factor is added or subtracted to the prices
            if useSlippage:
                dayDf['tradeactualPrice'] = dayDf['tradeactualPrice'] + np.random.uniform(low=-slippageChange, high=slippageChange, size=len(dayDf))
                dayDf['actualPrice'] = dayDf['actualPrice'] + np.random.uniform(low=-slippageChange, high=slippageChange, size=len(dayDf))
            
            # Calculating the percentage change
            dayDf['pct_change'] = dayDf[['tradeactualPrice', 'actualPrice']].pct_change(axis=1)['actualPrice']
            
            # Calculating how the capital will be divided
            investmentPerStock = portfolio_value/dayDf.shape[0]
            investmentPerStock = np.ones(dayDf.shape[0])*investmentPerStock

            # If certainty is used then the investment size is varied 
            if useCertainty:
                investmentPerStock = investmentPerStock*dayDf['certainty'] # dayDf['certainty'] ranges from 0 to 1
                remainingLiquid = portfolio_value - np.sum(investmentPerStock) # Noting what was not invested
            
            # Calculating the profit or loss that was made on the trades
            dayDf['profitLoss'] = dayDf['pct_change']*investmentPerStock*dayDf['LongShort']
            
            # Calculating the new portfolio value
            portfolio_value = np.sum(investmentPerStock) + np.sum(np.array(dayDf['profitLoss'])) + remainingLiquid

            # Accounting for transaction fees
            if usetransactionFee:
                # Multiplied by two since two transactions per order are occuring, buy and sell
                portfolio_value -= transactionFee*dayDf.shape[0]*2
    
    # Storing the data in a df and returning it
    portfolio_valueDf = pd.DataFrame(portfolio_value_list)
    portfolio_valueDf.columns = ['Date', 'portfolio_value']
    
    #print('Debugging.... <><><><><><><><><><><><><><><><>')
    #print(df[0:100])
    
    #print(portfolio_valueDf[0:100])
    #print('END Debugging.... <><><><><><><><><><><><><><><><>')
    
    return portfolio_valueDf

The below function works CORRECTLY but was updated under it

Below code was updated such that the when using enbpi, the trading when not using uncertainty is done using the ensemble model predictions but the original model predictions

In [None]:
def trade_on_test(df, initialCapital, folder_name, df_og_data=None):
    
    df.to_csv(os.path.join(folder_name, "trading_data.csv"), index=False)
    
    # Hard-coding the settings for the trading strategy variations
    settings = [[False, 100, False], [True, 100, False],[False, 100, True], [True, 100, True]]
    description = ['Long','Long using thresholded certainty','Short','Short using thresholded certainty']

    for ind, setting in enumerate(settings):

        # Create an empty DataFrame to store the metrics
        metrics_df = pd.DataFrame(columns=['Metric', 'Mean', 'Standard Deviation'])

        # Run the function multiple times and collect the metrics
        num_runs = 1
        results = []

        for i in range(num_runs):

            #df_mod = skip_day(df, tuned_threshold, days=4)
            
            if setting == 'Long' and df_og_data != None or setting == 'Short' and df_og_data != None:
                
                portfolio_valueDf = tradingStrategy(df_og_data,
                                               portfolio_value = initialCapital, 
                                               useCertainty = setting[0], 
                                               usetransactionFee = True, 
                                               transactionFee = 0.0001*0.2, #fee from IBRKs
                                               useSlippage = False, 
                                               slippageChange = 0.1, 
                                               certaintyThreshold = setting[1],
                                               short = setting[2],
                                               skip = False)
            else:
                
                portfolio_valueDf = tradingStrategy(df,
                                                   portfolio_value = initialCapital, 
                                                   useCertainty = setting[0], 
                                                   usetransactionFee = True, 
                                                   transactionFee = 0.0001*0.2, #fee from IBRKs
                                                   useSlippage = False, 
                                                   slippageChange = 0.1, 
                                                   certaintyThreshold = setting[1],
                                                   short = setting[2],
                                                   skip = False)

            metrics = calculate_portfolio_metrics(portfolio_valueDf, plot=False, og_data=df)
            results.append(metrics)
        
        portfolio_valueDf.to_csv(os.path.join(folder_name, f"portfolio_valueDf_{description[ind].replace(' ', '')}.csv"), index=False)

        # Calculating the mean and variance of the metrics
        metrics_mean = np.mean(results, axis=0)
        metrics_variance = np.std(results, axis=0)

        # Storing the data
        #metrics_df['Metric'] = ['Cumulative Returns', 'Annualized Volatility', 'Sharpe Ratio',
        #                        'Sortino Ratio', 'Beta','Alpha', 'Maximum Drawdown']

        metrics_df['Metric'] = ['Cumulative Returns', 'Annualized Volatility', 'Sharpe Ratio',
                                'Sortino Ratio', 'Maximum Drawdown','beta', 'alpha']

        metrics_df['Mean'] = metrics_mean
        metrics_df['Standard Deviation'] = metrics_variance

        # Converting the data to 3 decimal places
        metrics_df['Mean'] = metrics_df['Mean'].apply(lambda x: f'{x:.3e}')
        metrics_df['Standard Deviation'] = metrics_df['Standard Deviation'].apply(lambda x: f'{x:.3e}')

        # Printing the metrics
        print(description[ind])
        print(metrics_df)
        print('\n')

        # Saving the metrics to a csv file
        metrics_df.to_csv(os.path.join(folder_name, f"{'test_portfolio_metrics'+str(ind)}.csv"), index=False)

def trade_on_test(df, initialCapital, folder_name):
    
    df.to_csv(os.path.join(folder_name, "trading_data.csv"), index=False)
    
    # Hard-coding the settings for the trading strategy variations
    settings = [[False, 100, False], [True, 100, False],[False, 100, True], [True, 100, True]]
    description = ['Long','Long using thresholded certainty','Short','Short using thresholded certainty']

    for ind, setting in enumerate(settings):

        # Create an empty DataFrame to store the metrics
        metrics_df = pd.DataFrame(columns=['Metric', 'Mean', 'Standard Deviation'])

        # Run the function multiple times and collect the metrics
        num_runs = 1
        results = []

        for i in range(num_runs):

            #df_mod = skip_day(df, tuned_threshold, days=4)

            portfolio_valueDf = tradingStrategy(df,
                                               portfolio_value = initialCapital, 
                                               useCertainty = setting[0], 
                                               usetransactionFee = True, 
                                               transactionFee = 0.0001*0.2, #fee from IBRKs
                                               useSlippage = False, 
                                               slippageChange = 0.1, 
                                               certaintyThreshold = setting[1],
                                               short = setting[2],
                                               skip = False)

            metrics = calculate_portfolio_metrics(portfolio_valueDf, plot=False, og_data=df)
            results.append(metrics)
        
        portfolio_valueDf.to_csv(os.path.join(folder_name, f"portfolio_valueDf_{description[ind].replace(' ', '')}.csv"), index=False)

        # Calculating the mean and variance of the metrics
        metrics_mean = np.mean(results, axis=0)
        metrics_variance = np.std(results, axis=0)

        # Storing the data
        #metrics_df['Metric'] = ['Cumulative Returns', 'Annualized Volatility', 'Sharpe Ratio',
        #                        'Sortino Ratio', 'Beta','Alpha', 'Maximum Drawdown']

        metrics_df['Metric'] = ['Cumulative Returns', 'Annualized Volatility', 'Sharpe Ratio',
                                'Sortino Ratio', 'Maximum Drawdown','beta', 'alpha']

        metrics_df['Mean'] = metrics_mean
        metrics_df['Standard Deviation'] = metrics_variance

        # Converting the data to 3 decimal places
        metrics_df['Mean'] = metrics_df['Mean'].apply(lambda x: f'{x:.3e}')
        metrics_df['Standard Deviation'] = metrics_df['Standard Deviation'].apply(lambda x: f'{x:.3e}')

        # Printing the metrics
        print(description[ind])
        print(metrics_df)
        print('\n')

        # Saving the metrics to a csv file
        metrics_df.to_csv(os.path.join(folder_name, f"{'test_portfolio_metrics'+str(ind)}.csv"), index=False)

*Function that finds the best threshold based on the validation data*

*Three options to optimise - Directional Accuracy, Profit (using thresholded uncertainty), Sharpe (using thresholded uncertainty)*

In [None]:
# Have to add another metric... Directional Accuracy using the df itself, threshold using query and see when direction was correct
# Pass the threshold to the next section

# ADD: Consider both long and short 

def tune_threshold(val_df, initialCapital, folder_name, useCertainty=True, short=False, optimise='profit', days=4, test=False):
    
    #step = 0.05
    #certaintyThresholds = np.arange(0,1+step,step)
    
    # !!WHEN USING UN-NORMALISED UNCERTAINTY!!
    step = (val_df['certainty'].max() - val_df['certainty'].min())/20
    certaintyThresholds = np.arange(val_df['certainty'].min(),val_df['certainty'].max()+step,step)
    
    results, correctDADFlens, certainpredsDFlens, percentDAlist = [], [], [], []
    
    for certaintyThreshold in certaintyThresholds:
        
        val_df_mod = skip_day(val_df, certaintyThreshold, days=days)
        
        # Generating a portfolio from the validation data
        portfolio_valueDf = tradingStrategy(val_df_mod,
                                           portfolio_value = initialCapital, 
                                           useCertainty = useCertainty, 
                                           usetransactionFee = False, 
                                           transactionFee = 0.0001*0.2, #fee from IBRKs 
                                           useSlippage = False, 
                                           slippageChange = 0.1, 
                                           certaintyThreshold = certaintyThreshold,
                                           short = short,
                                           skip = False)

        # Calculating the portfolio metrics
        # ['Cumulative Returns', 'Annualized Volatility', 'Sharpe Ratio', 'Sortino Ratio', 'Maximum Drawdown']
        metrics = calculate_portfolio_metrics(portfolio_valueDf, plot=False)
        results.append(metrics)
        
        # Number of correct predictions under the threshold level
        correctDADF = val_df_mod.query('((predClass==1 & actualClass==1) | (predClass==0 & actualClass==0)) & certainty < %f & skip == 0' % (float(certaintyThreshold)))
        correctDADFlens.append(len(correctDADF.index))
        
        # Number of predictions under the threshold level
        certainpredsDF = val_df_mod.query('certainty < %f & skip == 0' % (float(certaintyThreshold)))
        certainpredsDFlens.append(len(certainpredsDF.index))
        
        #if certaintyThreshold == 0.1:
            #val_df_mod.to_csv('val_df_mod.csv')
        
        if len(certainpredsDF.index) != 0:
            percentDAlist.append((len(correctDADF.index)/len(certainpredsDF.index))*100)
        else:
            percentDAlist.append(np.nan)
    
    results_df = pd.DataFrame(data=results, columns=['CumulativeReturns', 'Annualized Volatility', 'SharpeRatio', 'Sortino Ratio', 'Maximum Drawdown', 'beta', 'alpha'])
    
    results_df.insert(0, "threshold", certaintyThresholds)
    results_df.insert(0, "correctpredictions", correctDADFlens)
    results_df.insert(0, "numberofpredictions", certainpredsDFlens)
    results_df.insert(0, "percentDA", percentDAlist)
    
    
    # Plotting the uncertainty
    
    plt.plot(val_df['tradedate'], val_df['certainty'])
    plt.xlabel('Date')
    plt.ylabel('Certainty')
    plt.grid()
    if not test:
        plot_name = 'Certaintyvstradedate_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
        plt.title('Certainty Plot (Validation Data)')
    else:
        plot_name = 'TEST_Certaintyvstradedate_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
        plt.title('Certainty Plot (Test Data)')
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()
    
    # Plotting the da
    
    plt.plot(results_df['threshold'], results_df['percentDA'])
    plt.xlabel('Threshold')
    plt.ylabel('DA (%)')
    plt.grid()
    if not test:
        plot_name = 'DAvsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
        plt.title('DA vs Threshold (Validation Data)')
    else:
        plot_name = 'TEST_DAvsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
        plt.title('DA vs Threshold (Test Data)')
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()
    
    # Plotting the SharpeRatio
    
    plt.plot(results_df['threshold'], results_df['SharpeRatio'])
    plt.xlabel('Threshold')
    plt.ylabel('Sharpe Ratio')
    plt.grid()
    if not test:
        plot_name = 'SharpeRatiovsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
        plt.title('Sharpe Ratio vs Threshold (Validation Data)')
    else:
        plot_name = 'TEST_SharpeRatiovsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
        plt.title('Sharpe Ratio vs Threshold (Test Data)')
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()
    
    # Plotting the CumulativeReturns
    
    plt.plot(results_df['threshold'], results_df['CumulativeReturns'])
    plt.xlabel('Threshold')
    plt.ylabel('Returns')
    plt.grid()
    if not test:
        plot_name = 'CumulativeReturnsvsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
        plt.title('Cumulative Returns vs Threshold (Validation Data)')
    else:
        plot_name = 'TEST_CumulativeReturnsvsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
        plt.title('Cumulative Returns vs Threshold (Test Data)')
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()
    
    if not test:
        results_df.to_csv(os.path.join(folder_name, f"{'Tuning_Outcome'}.csv"), index=False)
    else:
        results_df.to_csv(os.path.join(folder_name, f"{'TEST_Tuning_Outcome'}.csv"), index=False)
        
    # Finding the best threshold for the required optimisation
    if optimise == 'profit':
        profitOptimisedPortfolio = results_df[results_df.CumulativeReturns == results_df.CumulativeReturns.max()]
        print('Profit Optimised Portfolio')
        print(profitOptimisedPortfolio)
        tuned_threshold = profitOptimisedPortfolio['threshold'].values[0]
        tuned_parameter = profitOptimisedPortfolio['CumulativeReturns'].values[0]
    elif optimise == 'sharpe':
        sharpeOptimisedPortfolio = results_df[results_df.SharpeRatio == results_df.SharpeRatio.max()]
        print('Sharpe Ratio Optimised Portfolio')
        print(sharpeOptimisedPortfolio)
        tuned_threshold = sharpeOptimisedPortfolio['threshold'].values[0]
        tuned_parameter = sharpeOptimisedPortfolio['SharpeRatio'].values[0]
    elif optimise == 'da':
        DAOptimisedPortfolio = results_df[results_df.percentDA == results_df.percentDA.max()]
        print('Directional Accuracy Optimised Portfolio')
        print(DAOptimisedPortfolio)
        tuned_threshold = DAOptimisedPortfolio['threshold'].values[0]
        tuned_parameter = DAOptimisedPortfolio['percentDA'].values[0]
        
    tuned_thres = {'tuned_threshold':[tuned_threshold],'tuned_parameter':tuned_parameter}
    tuned_df = pd.DataFrame(tuned_thres)
    
    if not test:
        tuned_df.to_csv(os.path.join(folder_name, f"{'Tuned_Threshold_optimise'+optimise}.csv"), index=False)
    else:
        tuned_df.to_csv(os.path.join(folder_name, f"{'TEST_Tuned_Threshold_optimise'+optimise}.csv"), index=False)
    
    return tuned_threshold
            

*Testing all the possible trading methods*

def tradingStrategy(df,portfolio_value = 1000, useCertainty = True, usetransactionFee = True, transactionFee = 0.005, useSlippage = True, slippageChange = 0.1, certaintyThreshold = 0.1, short=False, skip=True):
    
    # Extracting the unique trading dates
    tradingDates = df.tradedate.unique()
    
    portfolio_value_list = list()
    remainingLiquid = 0 
    
    # Trading on each trading day
    for tradingDate in tradingDates:
        
        # Storing the data that will later be used for evaluation
        portfolio_value_list.append([tradingDate, portfolio_value])
        
        if skip:
            # If skip signal is 0 this means that trade normal
            if df.query('tradedate=="%s"' % (tradingDate))['skip'].values[0] == 0:
                # Filtering the datafram based on whether short positons wil be considered or not (if yes Sell signals are not neglected)
                if short:
                    dayDf = df.query('tradedate=="%s" & (predClass==1 | predClass==0) & certainty < %f' % (tradingDate, float(certaintyThreshold)))
                else:
                    dayDf = df.query('tradedate=="%s" & predClass==1 & certainty < %f' % (tradingDate, float(certaintyThreshold)))
            else: #If skip = 1, then no trades shall occur. Hence create a dataframe with no trades
                dayDf = pd.DataFrame(columns=['index', 'tradedate', 'date', 'tradeactualPrice', 'actualPrice', 'actualClass', 'predClass', 'certainty', 'skip'])
        else:
            if short:
                dayDf = df.query('tradedate=="%s" & (predClass==1 | predClass==0) & certainty < %f' % (tradingDate, float(certaintyThreshold)))
            else:
                dayDf = df.query('tradedate=="%s" & predClass==1 & certainty < %f' % (tradingDate, float(certaintyThreshold)))
        
        # Check whether there a profitable at the current trading day
        if dayDf.shape[0] > 0:
            
            # Adding a column that would be used to perform short trading (sets 1 to 1 and 0 to -1)
            dayDf['LongShort'] = dayDf['predClass'].apply(lambda x: 1 if x == 1 else -1)           
            
            # If slippage is used then a small random factor is added or subtracted to the prices
            if useSlippage:
                dayDf['tradeactualPrice'] = dayDf['tradeactualPrice'] + np.random.uniform(low=-slippageChange, high=slippageChange, size=len(dayDf))
                dayDf['actualPrice'] = dayDf['actualPrice'] + np.random.uniform(low=-slippageChange, high=slippageChange, size=len(dayDf))
            
            # Calculating the percentage change
            dayDf['pct_change'] = dayDf[['tradeactualPrice', 'actualPrice']].pct_change(axis=1)['actualPrice']
            
            # Calculating how the capital will be divided
            investmentPerStock = portfolio_value/dayDf.shape[0]
            investmentPerStock = np.ones(dayDf.shape[0])*investmentPerStock

            # If certainty is used then the investment size is varied 
            if useCertainty:
                investmentPerStock = investmentPerStock*dayDf['certainty'] # dayDf['certainty'] ranges from 0 to 1
                remainingLiquid = portfolio_value - np.sum(investmentPerStock) # Noting what was not invested
            
            # Calculating the profit or loss that was made on the trades
            dayDf['profitLoss'] = dayDf['pct_change']*investmentPerStock*dayDf['LongShort']
            
            # Calculating the new portfolio value
            portfolio_value = np.sum(investmentPerStock) + np.sum(np.array(dayDf['profitLoss'])) + remainingLiquid

            # Accounting for transaction fees
            if usetransactionFee:
                # Multiplied by two since two transactions per order are occuring, buy and sell
                portfolio_value -= transactionFee*dayDf.shape[0]*2
    
    # Storing the data in a df and returning it
    portfolio_valueDf = pd.DataFrame(portfolio_value_list)
    portfolio_valueDf.columns = ['Date', 'portfolio_value']
    return portfolio_valueDf

def trade_on_test(df, tuned_threshold, initialCapital, folder_name):
        
    # Hard-coding the settings for the trading strategy variations
    settings = [[False, 100, False],[True, 100, False], [True, tuned_threshold, False],[False, 100, True],[True, 100, True], [True, tuned_threshold, True]]
    description = ['Long','Long using certainty','Long using thresholded certainty','Short','Short using certainty','Short using thresholded certainty']

    for ind, setting in enumerate(settings):

        # Create an empty DataFrame to store the metrics
        metrics_df = pd.DataFrame(columns=['Metric', 'Mean', 'Standard Deviation'])

        # Run the function multiple times and collect the metrics
        num_runs = 1
        results = []

        for i in range(num_runs):

            df_mod = skip_day(df, tuned_threshold, days=4)

            portfolio_valueDf = tradingStrategy(df_mod,
                                               portfolio_value = initialCapital, 
                                               useCertainty = setting[0], 
                                               usetransactionFee = True, 
                                               transactionFee = 0.005, 
                                               useSlippage = False, 
                                               slippageChange = 0.1, 
                                               certaintyThreshold = setting[1],
                                               short = setting[2],
                                               skip = True)

            metrics = calculate_portfolio_metrics(portfolio_valueDf, plot=False)
            results.append(metrics)
        
        portfolio_valueDf.to_csv(os.path.join(folder_name, f"portfolio_valueDf_{description[ind].replace(' ', '')}.csv"), index=False)

        # Calculating the mean and variance of the metrics
        metrics_mean = np.mean(results, axis=0)
        metrics_variance = np.std(results, axis=0)

        # Storing the data
        #metrics_df['Metric'] = ['Cumulative Returns', 'Annualized Volatility', 'Sharpe Ratio',
        #                        'Sortino Ratio', 'Beta','Alpha', 'Maximum Drawdown']

        metrics_df['Metric'] = ['Cumulative Returns', 'Annualized Volatility', 'Sharpe Ratio',
                                'Sortino Ratio', 'Maximum Drawdown']

        metrics_df['Mean'] = metrics_mean
        metrics_df['Standard Deviation'] = metrics_variance

        # Converting the data to 3 decimal places
        metrics_df['Mean'] = metrics_df['Mean'].apply(lambda x: f'{x:.3e}')
        metrics_df['Standard Deviation'] = metrics_df['Standard Deviation'].apply(lambda x: f'{x:.3e}')

        # Printing the metrics
        print(description[ind])
        print(metrics_df)
        print('\n')

        # Saving the metrics to a csv file
        metrics_df.to_csv(os.path.join(folder_name, f"{'test_portfolio_metrics'+str(ind)}.csv"), index=False)

*Plotting the result*

In [None]:
def plot_equity_curve(df, initialCapital, folder_name):
    
    settings = [[False, 100, False], [True, 100, False],[False, 100, True], [True, 100, True]]
    description = ['Long','Long using thresholded certainty','Short','Short using thresholded certainty']
    
    sns.set(style='whitegrid')
    plt.figure(figsize=(12, 6))

    # For all the trading strategy variations
    for setting in settings:

        portfolio_valueDf = tradingStrategy(df,
                                               portfolio_value = initialCapital, 
                                               useCertainty = setting[0], 
                                               usetransactionFee = True, 
                                               transactionFee = 0.0001*0.2, #fee from IBRKs 
                                               useSlippage = False, 
                                               slippageChange = 0.1, 
                                               certaintyThreshold = setting[1],
                                               short = setting[2],
                                               skip = False)
        # Plotting portfolio value
        portfolio_valueDf['Date'] = pd.to_datetime(portfolio_valueDf['Date'])
        plt.plot(portfolio_valueDf['Date'], portfolio_valueDf['portfolio_value'], linewidth=2)

    plt.xlabel('Date')
    plt.ylabel('Portfolio Value ($)')
    plt.title('Portfolio Growth Using an Different Trading Techniques')
    plt.xticks(rotation=45)
    plt.xlim(portfolio_valueDf['Date'].iloc[0], portfolio_valueDf['Date'].iloc[-1])  
    plt.legend(['Long','Long using thresholded certainty','Short','Short using thresholded certainty'])

    plt.tight_layout()
    sns.despine()
    plot_name = "portfolio_growth_with_all_strategies.png"
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()

def plot_equity_curve(df, tuned_threshold, initialCapital, folder_name):
    
    df_mod = skip_day(df, tuned_threshold, days=4)
    
    # Calculating the DA on the test data
    # Number of correct predictions under the threshold level
    correctDADF = df_mod.query('((predClass==1 & actualClass==1) | (predClass==0 & actualClass==0)) & certainty < %f & skip == 0' % (float(tuned_threshold)))

    # Number of predictions under the threshold level
    certainpredsDF = df_mod.query('certainty < %f & skip == 0' % (float(tuned_threshold)))
    
    if len(certainpredsDF.index) != 0:
        percent_da=(len(correctDADF.index)/len(certainpredsDF.index))*100
    else:
        percent_da=np.nan
    
    percent_da = {'test_percent_da':[percent_da]}
    percent_da_df = pd.DataFrame(percent_da)
    percent_da_df.to_csv(os.path.join(folder_name, f"{'Test_Percent_DA'}.csv"), index=False)

    settings = [[False, 100, False],[True, 100, False], [True, tuned_threshold, False],[False, 100, True],[True, 100, True], [True, tuned_threshold, True]]
    description = ['Long','Long using certainty','Long using thresholded certainty','Short','Short using certainty','Short using thresholded certainty']
    
    sns.set(style='whitegrid')
    plt.figure(figsize=(12, 6))

    # For all the trading strategy variations
    for setting in settings:

        portfolio_valueDf = tradingStrategy(df_mod,
                                               portfolio_value = initialCapital, 
                                               useCertainty = setting[0], 
                                               usetransactionFee = True, 
                                               transactionFee = 0.005, 
                                               useSlippage = False, 
                                               slippageChange = 0.1, 
                                               certaintyThreshold = setting[1],
                                               short = setting[2],
                                               skip = True)
        # Plotting portfolio value
        portfolio_valueDf['Date'] = pd.to_datetime(portfolio_valueDf['Date'])
        plt.plot(portfolio_valueDf['Date'], portfolio_valueDf['portfolio_value'], linewidth=2)

    plt.xlabel('Date')
    plt.ylabel('Portfolio Value ($)')
    plt.title('Portfolio Growth Using an Different Trading Techniques')
    plt.xticks(rotation=45)
    plt.xlim(portfolio_valueDf['Date'].iloc[0], portfolio_valueDf['Date'].iloc[-1])  
    plt.legend(['Long','Long using certainty','Long using thresholded certainty','Short','Short using certainty','Short using thresholded certainty'])

    plt.tight_layout()
    sns.despine()
    plot_name = "portfolio_growth_with_all_strategies.png"
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()

In [None]:
def superimpose_tuned_data(folder_name, short, days, optimise):
    
    # Superimposing the validation and test data tuned threshold plots
    val_tuning_data = read_csv_as_df("Tuning_Outcome.csv",folder_name)
    test_tuning_data = read_csv_as_df("TEST_Tuning_Outcome.csv",folder_name)

    # Plotting the threshold

    plt.plot(val_tuning_data['threshold'], val_tuning_data['percentDA'], label='Val')
    plt.plot(test_tuning_data['threshold'], test_tuning_data['percentDA'], label='Test')
    plt.xlabel('Threshold')
    plt.ylabel('DA (%)')
    plt.legend()
    plt.grid()
    plot_name = 'TESTVAL_DAvsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
    plt.title('DA vs Threshold (Test & Val Data)')
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()

    # Plotting the SharpeRatio

    plt.plot(val_tuning_data['threshold'], val_tuning_data['SharpeRatio'], label='Val')
    plt.plot(test_tuning_data['threshold'], test_tuning_data['SharpeRatio'], label='Test')
    plt.xlabel('Threshold')
    plt.ylabel('Sharpe Ratio')
    plt.legend()
    plt.grid()
    plot_name = 'TESTVAL_SharpeRatiovsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
    plt.title('Sharpe Ratio vs Threshold (Test & Val Data)')
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()

    # Plotting the CumulativeReturns

    plt.plot(val_tuning_data['threshold'], val_tuning_data['CumulativeReturns'], label='Val')
    plt.plot(test_tuning_data['threshold'], test_tuning_data['CumulativeReturns'], label='Test')
    plt.xlabel('Threshold')
    plt.ylabel('Returns')
    plt.legend()
    plt.grid()
    plot_name = 'TESTVAL_CumulativeReturnsvsThreshold_days'+str(days)+'_optimise'+optimise+'_short'+str(short)
    plt.title('Cumulative Returns vs Threshold (Test & Val Data)')
    plt.savefig(os.path.join(folder_name, plot_name))
    plt.show()

In [None]:
def collate_data(folder_name, optimise, days, short, s):
    
    # 'folder_name', 'optimise','val_thres', 'test_thres', 'days', 'short', 's', 'optimised_value_on_val', 'optimised_value_on_test', 'test_percent_da', 'Long -> Cumulative Returns -> Mean', 'Long -> Cumulative Returns -> Std', 'Long -> Annualized Volatility -> Mean', 'Long -> Annualized Volatility -> Std', 'Long -> Sharpe Ratio -> Mean', 'Long -> Sharpe Ratio -> Std', 'Long -> Sortino Ratio -> Mean', 'Long -> Sortino Ratio -> Std', 'Long -> Maximum Drawdown -> Mean', 'Long -> Maximum Drawdown -> Std', 'Long using certainty -> Cumulative Returns -> Mean', 'Long using certainty -> Cumulative Returns -> Std', 'Long using certainty -> Annualized Volatility -> Mean', 'Long using certainty -> Annualized Volatility -> Std', 'Long using certainty -> Sharpe Ratio -> Mean', 'Long using certainty -> Sharpe Ratio -> Std', 'Long using certainty -> Sortino Ratio -> Mean', 'Long using certainty -> Sortino Ratio -> Std', 'Long using certainty -> Maximum Drawdown -> Mean', 'Long using certainty -> Maximum Drawdown -> Std', 'Long using thresholded certainty -> Cumulative Returns -> Mean', 'Long using thresholded certainty -> Cumulative Returns -> Std', 'Long using thresholded certainty -> Annualized Volatility -> Mean', 'Long using thresholded certainty -> Annualized Volatility -> Std', 'Long using thresholded certainty -> Sharpe Ratio -> Mean', 'Long using thresholded certainty -> Sharpe Ratio -> Std', 'Long using thresholded certainty -> Sortino Ratio -> Mean', 'Long using thresholded certainty -> Sortino Ratio -> Std', 'Long using thresholded certainty -> Maximum Drawdown -> Mean', 'Long using thresholded certainty -> Maximum Drawdown -> Std', 'Short -> Cumulative Returns -> Mean', 'Short -> Cumulative Returns -> Std', 'Short -> Annualized Volatility -> Mean', 'Short -> Annualized Volatility -> Std', 'Short -> Sharpe Ratio -> Mean', 'Short -> Sharpe Ratio -> Std', 'Short -> Sortino Ratio -> Mean', 'Short -> Sortino Ratio -> Std', 'Short -> Maximum Drawdown -> Mean', 'Short -> Maximum Drawdown -> Std', 'Short using certainty -> Cumulative Returns -> Mean', 'Short using certainty -> Cumulative Returns -> Std', 'Short using certainty -> Annualized Volatility -> Mean', 'Short using certainty -> Annualized Volatility -> Std', 'Short using certainty -> Sharpe Ratio -> Mean', 'Short using certainty -> Sharpe Ratio -> Std', 'Short using certainty -> Sortino Ratio -> Mean', 'Short using certainty -> Sortino Ratio -> Std', 'Short using certainty -> Maximum Drawdown -> Mean', 'Short using certainty -> Maximum Drawdown -> Std', 'Short using thresholded certainty -> Cumulative Returns -> Mean', 'Short using thresholded certainty -> Cumulative Returns -> Std', 'Short using thresholded certainty -> Annualized Volatility -> Mean', 'Short using thresholded certainty -> Annualized Volatility -> Std', 'Short using thresholded certainty -> Sharpe Ratio -> Mean', 'Short using thresholded certainty -> Sharpe Ratio -> Std', 'Short using thresholded certainty -> Sortino Ratio -> Mean', 'Short using thresholded certainty -> Sortino Ratio -> Std', 'Short using thresholded certainty -> Maximum Drawdown -> Mean', 'Short using thresholded certainty -> Maximum Drawdown -> Std'
    
    data = list()
    
    data.append(folder_name)
    
    data.append(optimise)
    
    data.append(days)
    
    data.append(short)
    
    data.append(s)
    
    for i in range(4):
        
        df = read_csv_as_df('test_portfolio_metrics'+str(i)+'.csv',folder_name)
        
        mean_values = df['Mean']
        
        std_values = df['Standard Deviation']
        
        for j in range(7):
        
            data.append(mean_values[j])
            data.append(std_values[j])
    
    return data

*Setting the initial Capital*

In [None]:
initialCapital=1000000

In [None]:
models = ['arima', 'arima_garch', 'transformer_mcdropout', 'transformer_garch', 'transformer_enbpi']

alphas = [0.15, 0.1, 0.05, 0.01]

optimises=['sharpe'] #Does not have use in the following code

collated_data = list()
collated_columns = ['folder_name', 'optimise', 'days', 'short', 's', 'Long -> Cumulative Returns -> Mean', 'Long -> Cumulative Returns -> Std', 'Long -> Annualized Volatility -> Mean', 'Long -> Annualized Volatility -> Std', 'Long -> Sharpe Ratio -> Mean', 'Long -> Sharpe Ratio -> Std', 'Long -> Sortino Ratio -> Mean', 'Long -> Sortino Ratio -> Std', 'Long -> Maximum Drawdown -> Mean', 'Long -> Maximum Drawdown -> Std', 'Long -> Beta -> Mean', 'Long -> Beta -> Std', 'Long -> Alpha -> Mean', 'Long -> Alpha -> Std', 'Long using thresholded certainty -> Cumulative Returns -> Mean', 'Long using thresholded certainty -> Cumulative Returns -> Std', 'Long using thresholded certainty -> Annualized Volatility -> Mean', 'Long using thresholded certainty -> Annualized Volatility -> Std', 'Long using thresholded certainty -> Sharpe Ratio -> Mean', 'Long using thresholded certainty -> Sharpe Ratio -> Std', 'Long using thresholded certainty -> Sortino Ratio -> Mean', 'Long using thresholded certainty -> Sortino Ratio -> Std', 'Long using thresholded certainty -> Maximum Drawdown -> Mean', 'Long using thresholded certainty -> Maximum Drawdown -> Std', 'Long using thresholded certainty -> Beta -> Mean', 'Long using thresholded certainty -> Beta -> Std', 'Long using thresholded certainty -> Alpha -> Mean', 'Long using thresholded certainty -> Alpha -> Std', 'Short -> Cumulative Returns -> Mean', 'Short -> Cumulative Returns -> Std', 'Short -> Annualized Volatility -> Mean', 'Short -> Annualized Volatility -> Std', 'Short -> Sharpe Ratio -> Mean', 'Short -> Sharpe Ratio -> Std', 'Short -> Sortino Ratio -> Mean', 'Short -> Sortino Ratio -> Std', 'Short -> Maximum Drawdown -> Mean', 'Short -> Maximum Drawdown -> Std', 'Short -> Beta -> Mean', 'Short -> Beta -> Std', 'Short -> Alpha -> Mean', 'Short -> Alpha -> Std', 'Short using thresholded certainty -> Cumulative Returns -> Mean', 'Short using thresholded certainty -> Cumulative Returns -> Std', 'Short using thresholded certainty -> Annualized Volatility -> Mean', 'Short using thresholded certainty -> Annualized Volatility -> Std', 'Short using thresholded certainty -> Sharpe Ratio -> Mean', 'Short using thresholded certainty -> Sharpe Ratio -> Std', 'Short using thresholded certainty -> Sortino Ratio -> Mean', 'Short using thresholded certainty -> Sortino Ratio -> Std', 'Short using thresholded certainty -> Maximum Drawdown -> Mean', 'Short using thresholded certainty -> Maximum Drawdown -> Std', 'Short using thresholded certainty -> Beta -> Mean', 'Short using thresholded certainty -> Beta -> Std', 'Short using thresholded certainty -> Alpha -> Mean', 'Short using thresholded certainty -> Alpha -> Std']#['folder_name', 'optimise', 'days', 'short', 's', 'Long -> Cumulative Returns -> Mean', 'Long -> Cumulative Returns -> Std', 'Long -> Annualized Volatility -> Mean', 'Long -> Annualized Volatility -> Std', 'Long -> Sharpe Ratio -> Mean', 'Long -> Sharpe Ratio -> Std', 'Long -> Sortino Ratio -> Mean', 'Long -> Sortino Ratio -> Std', 'Long -> Maximum Drawdown -> Mean', 'Long -> Maximum Drawdown -> Std', 'Long using thresholded certainty -> Cumulative Returns -> Mean', 'Long using thresholded certainty -> Cumulative Returns -> Std', 'Long using thresholded certainty -> Annualized Volatility -> Mean', 'Long using thresholded certainty -> Annualized Volatility -> Std', 'Long using thresholded certainty -> Sharpe Ratio -> Mean', 'Long using thresholded certainty -> Sharpe Ratio -> Std', 'Long using thresholded certainty -> Sortino Ratio -> Mean', 'Long using thresholded certainty -> Sortino Ratio -> Std', 'Long using thresholded certainty -> Maximum Drawdown -> Mean', 'Long using thresholded certainty -> Maximum Drawdown -> Std', 'Short -> Cumulative Returns -> Mean', 'Short -> Cumulative Returns -> Std', 'Short -> Annualized Volatility -> Mean', 'Short -> Annualized Volatility -> Std', 'Short -> Sharpe Ratio -> Mean', 'Short -> Sharpe Ratio -> Std', 'Short -> Sortino Ratio -> Mean', 'Short -> Sortino Ratio -> Std', 'Short -> Maximum Drawdown -> Mean', 'Short -> Maximum Drawdown -> Std', 'Short using thresholded certainty -> Cumulative Returns -> Mean', 'Short using thresholded certainty -> Cumulative Returns -> Std', 'Short using thresholded certainty -> Annualized Volatility -> Mean', 'Short using thresholded certainty -> Annualized Volatility -> Std', 'Short using thresholded certainty -> Sharpe Ratio -> Mean', 'Short using thresholded certainty -> Sharpe Ratio -> Std', 'Short using thresholded certainty -> Sortino Ratio -> Mean', 'Short using thresholded certainty -> Sortino Ratio -> Std', 'Short using thresholded certainty -> Maximum Drawdown -> Mean', 'Short using thresholded certainty -> Maximum Drawdown -> Std']#['folder_name', 'optimise','val_thres', 'test_thres', 'days', 'short', 's', 'optimised_value_on_val', 'optimised_value_on_test', 'test_percent_da', 'Long -> Cumulative Returns -> Mean', 'Long -> Cumulative Returns -> Std', 'Long -> Annualized Volatility -> Mean', 'Long -> Annualized Volatility -> Std', 'Long -> Sharpe Ratio -> Mean', 'Long -> Sharpe Ratio -> Std', 'Long -> Sortino Ratio -> Mean', 'Long -> Sortino Ratio -> Std', 'Long -> Maximum Drawdown -> Mean', 'Long -> Maximum Drawdown -> Std', 'Long using certainty -> Cumulative Returns -> Mean', 'Long using certainty -> Cumulative Returns -> Std', 'Long using certainty -> Annualized Volatility -> Mean', 'Long using certainty -> Annualized Volatility -> Std', 'Long using certainty -> Sharpe Ratio -> Mean', 'Long using certainty -> Sharpe Ratio -> Std', 'Long using certainty -> Sortino Ratio -> Mean', 'Long using certainty -> Sortino Ratio -> Std', 'Long using certainty -> Maximum Drawdown -> Mean', 'Long using certainty -> Maximum Drawdown -> Std', 'Long using thresholded certainty -> Cumulative Returns -> Mean', 'Long using thresholded certainty -> Cumulative Returns -> Std', 'Long using thresholded certainty -> Annualized Volatility -> Mean', 'Long using thresholded certainty -> Annualized Volatility -> Std', 'Long using thresholded certainty -> Sharpe Ratio -> Mean', 'Long using thresholded certainty -> Sharpe Ratio -> Std', 'Long using thresholded certainty -> Sortino Ratio -> Mean', 'Long using thresholded certainty -> Sortino Ratio -> Std', 'Long using thresholded certainty -> Maximum Drawdown -> Mean', 'Long using thresholded certainty -> Maximum Drawdown -> Std', 'Short -> Cumulative Returns -> Mean', 'Short -> Cumulative Returns -> Std', 'Short -> Annualized Volatility -> Mean', 'Short -> Annualized Volatility -> Std', 'Short -> Sharpe Ratio -> Mean', 'Short -> Sharpe Ratio -> Std', 'Short -> Sortino Ratio -> Mean', 'Short -> Sortino Ratio -> Std', 'Short -> Maximum Drawdown -> Mean', 'Short -> Maximum Drawdown -> Std', 'Short using certainty -> Cumulative Returns -> Mean', 'Short using certainty -> Cumulative Returns -> Std', 'Short using certainty -> Annualized Volatility -> Mean', 'Short using certainty -> Annualized Volatility -> Std', 'Short using certainty -> Sharpe Ratio -> Mean', 'Short using certainty -> Sharpe Ratio -> Std', 'Short using certainty -> Sortino Ratio -> Mean', 'Short using certainty -> Sortino Ratio -> Std', 'Short using certainty -> Maximum Drawdown -> Mean', 'Short using certainty -> Maximum Drawdown -> Std', 'Short using thresholded certainty -> Cumulative Returns -> Mean', 'Short using thresholded certainty -> Cumulative Returns -> Std', 'Short using thresholded certainty -> Annualized Volatility -> Mean', 'Short using thresholded certainty -> Annualized Volatility -> Std', 'Short using thresholded certainty -> Sharpe Ratio -> Mean', 'Short using thresholded certainty -> Sharpe Ratio -> Std', 'Short using thresholded certainty -> Sortino Ratio -> Mean', 'Short using thresholded certainty -> Sortino Ratio -> Std', 'Short using thresholded certainty -> Maximum Drawdown -> Mean', 'Short using thresholded certainty -> Maximum Drawdown -> Std']

data_folders = ['\model_data_1','\model_data_2','\model_data_3','\model_data_5','\model_data_6','\model_data_OT']


In [None]:
import csv

for data_folder in data_folders:
    
    # Creating a folder where to store the trading data
    main_folder = create_unique_folder('trading_'+data_folder[1:])
    
    for optimise in optimises:

        for model in models:

            if model == 'transformer_enbpi':
                ss = [5, 10, 20, 25, 50, 100]
            else:
                ss = [None]

            for s in ss:

                if s != None:
                    model_step = s*6
                else:
                    model_step = 30

                for alpha in alphas:

                    print(f"Optimise: {optimise}, Model: {model}, s: {s}, Alpha: {alpha}, Data Folder: {data_folder}")

                    # Creating a folder for the results
                    sub_folder_name = model+'_alpha'+str(alpha)+'_s'+str(s)+'_optimise'+optimise

                    # Create the directory for the results
                    folder_name = os.path.join(main_folder, sub_folder_name)
                    os.makedirs(folder_name, exist_ok=True)
                    
                    # Loading data
                    y_true, y_pred, y_pred_lower, y_pred_upper, val_y_true, val_y_pred, val_y_pred_lower, val_y_pred_upper, og_data_true, og_data_date = load_data(model=model, alpha=alpha, s=s, data_folder=data_folder)

                    # Preparing the data
                    index, tradedate, date, tradeactualPrice, actualPrice, val_index, val_tradedate, val_date, val_tradeactualPrice, val_actualPrice = prepare_data(y_true, val_y_true, og_data_date, og_data_true)

                    # Classifying the predictions
                    actualClass, predClass = convert_to_class(index, actualPrice, tradeactualPrice, y_pred)
                    val_actualClass, val_predClass = convert_to_class(val_index, val_actualPrice, val_tradeactualPrice, val_y_pred)

                    # Normalising the uncertainty
                    norm_val_uncert, norm_test_uncert = normalise_test_uncert(val_y_pred_upper, val_y_pred_lower, y_pred_upper, y_pred_lower)

                    # Formating the dated
                    tradedate, date, val_tradedate, val_date = format_dates(tradedate, date, val_tradedate, val_date)

                    # Creating a df that is acceptable by the trading algo
                    df = create_df(index, tradedate, date, tradeactualPrice, actualPrice, actualClass, predClass, norm_test_uncert)
                    val_df = create_df(val_index, val_tradedate, val_date, val_tradeactualPrice, val_actualPrice, val_actualClass, val_predClass, norm_val_uncert)

                    # Training a model to get threshold and position size
                    X_train, y_train = get_lin_reg_dataset(val_df, step=model_step)
                    X_test, y_test = get_lin_reg_dataset(df, test=True, val_df=val_df, step=model_step)

                    # Train the model
                    model_tuner = train_model(X_train, y_train)

                    # Test the model
                    y_proba, accuracy = test_model_with_threshold(model_tuner, X_test, y_test)

                    df['certainty'] = np.where(y_proba < 0.5, 0, y_proba)
                    
                    if model == 'transformer_enbpi' or model == 'transformer_mcdropout':
                        
                        # Define the path to your CSV file
                        csv_file_path = r'C:\Users\porte\Downloads' + data_folder + r'\preds_run_0_ns_Transformer_1_' + data_folder.split('_')[-1] + '.csv'

                        # Initialize an empty list to store the numerical data
                        og_pred_data = []

                        # Open and read the CSV file
                        with open(csv_file_path, newline='') as csvfile:
                            reader = csv.reader(csvfile)
                            for row in reader:
                                # Convert each element in the row to float and extend the numerical_data list
                                og_pred_data.extend([float(item) for item in row])

                        # replicate df
                        df_og_data = df.copy()
                        
                        # replace the predClass
                        actualClass_NotUSED, new_predClass = convert_to_class(index, actualPrice, tradeactualPrice, og_pred_data)
                        df_og_data['predClass'] = og_pred_data
                        
                        trade_on_test(df, initialCapital, folder_name, df_og_data=df_og_data)
                    
                    else:
                        trade_on_test(df, initialCapital, folder_name, df_og_data=None)
                        
                    # Tuning the threshold on the valoidation data
                    ##tuned_threshold = tune_threshold(val_df, initialCapital, folder_name, useCertainty=True, short=short, optimise=optimise, days=days)

                    # Tuning on the test data to check how different the threshold is
                    # Tuning the threshold on the valoidation data
                    ##tuned_threshold_test = tune_threshold(df, initialCapital, folder_name, useCertainty=True, short=short, optimise=optimise, days=days, test=True)

                    # Plotting the superimpose thresholded parameters
                    ##superimpose_tuned_data(folder_name, short, days, optimise)

                    # Trading on the test data using the threshold obtained from the validation data
                    #trade_on_test(df, tuned_threshold, initialCapital, folder_name)
                    #trade_on_test(df, initialCapital, folder_name)

                    # Plotting the equity curve for all the different methods
                    plot_equity_curve(df, initialCapital, folder_name)

                    collated_data.append(collate_data(folder_name, optimise, days, short, s))


    collated_data_df = pd.DataFrame(collated_data, columns=collated_columns)
    collated_data_df.to_csv('collated_data_df_new.csv', index=False)

In [None]:
df

*Checking folder paths are correct*

In [None]:
for data_folder in data_folders:
    
    # Creating a folder where to store the trading data
    main_folder = create_unique_folder('trading_'+data_folder[1:])
    
    for optimise in optimises:

        for model in models:

            if model == 'transformer_enbpi':
                ss = [5, 10, 20, 25, 50, 100]
            else:
                ss = [None]

            for s in ss:

                if s != None:
                    model_step = s*6
                else:
                    model_step = 30

                for alpha in alphas:

                    print(f"Optimise: {optimise}, Model: {model}, s: {s}, Alpha: {alpha}, Data Folder: {data_folder}")

                    # Creating a folder for the results
                    sub_folder_name = model+'_alpha'+str(alpha)+'_s'+str(s)+'_optimise'+optimise

                    # Create the directory for the results
                    folder_name = os.path.join(main_folder, sub_folder_name)
                    os.makedirs(folder_name, exist_ok=True)
                    
                    # Loading data
                    y_true, y_pred, y_pred_lower, y_pred_upper, val_y_true, val_y_pred, val_y_pred_lower, val_y_pred_upper, og_data_true, og_data_date = load_data(model=model, alpha=alpha, s=s, data_folder=data_folder)

                    