In [1]:
import sys
import os
import pandas as pd
import numpy as np
from datetime import date
import talib
from sklearn.linear_model import *
from sktime.forecasting.base import ForecastingHorizon
from sktime.utils.plotting import plot_series
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error, mean_squared_error
from sklearn.metrics import accuracy_score
from sktime.forecasting.model_selection import SlidingWindowSplitter
from joblib import Parallel, delayed
from itertools import islice
import json
import warnings

In [2]:
root_data_dir = "/projects/genomic-ml/da2343/ml_project_2/data" 
dataset_path = f"{root_data_dir}/EURUSD/EURUSD_H1_200702210000_202304242100_Update.csv"
# Load the config file
config_path = "/projects/genomic-ml/da2343/ml_project_2/settings/config.json"
with open(config_path) as f:
  config = json.load(f)
  
dataset_name = "EURUSD_H1"
# Get the take_profit and stop_loss levels from the config file
tp = config["trading_settings"][dataset_name]["take_profit"]
sl = config["trading_settings"][dataset_name]["stop_loss"]
df = pd.read_csv(dataset_path, index_col=0)

y = df[['Close']]
offset = y.index[0]

## GENERATE SEQUENTIAL TRADE DATA

In [3]:
def create_trade_order(row, position, tp, sl):
    ask_price = row["Close"]
    tp_price = ask_price + tp if position == 1 else ask_price - tp
    sl_price = ask_price - sl if position == 1 else ask_price + sl

    trade_order = {
        "index": row.name,
        "ask_price": ask_price,
        "take_profit_price": tp_price,
        "stop_loss_price": sl_price,
        "position": position,
        # f"SMA_{timeperiod}": row[f"SMA_{timeperiod}"],
        "MACD": row["MACD"],
        "MACD_Signal": row["MACD_Signal"],
        "MACD_Hist": row["MACD_Hist"],
        "MACD_Crossover_Change" : row["MACD_Crossover_Change"],
        "RSI": row["RSI"],
        "ATR": row["ATR"],
        "ADX": row["ADX"],
        "AROON_Oscillator": row["AROON_Oscillator"],
        "WILLR": row["WILLR"],
        "OBV": row["OBV"],
        "CCI": row["CCI"],
        "PSAR": row["PSAR"],
        "AD": row["AD"],
        "ADOSC": row["ADOSC"],
        "VOLUME_RSI": row["VOLUME_RSI"],
        "MFI": row["MFI"],
        "Date_Time": row["Date_Time"],
        "close_time": None,
        "label": None,
    }
    return trade_order


trades = []
# loop through all rows in the dataframe
for index, row in df.iterrows():
    i = index + offset

    if len(trades) != 0:
        prev_trade = trades[-1]
        # check if the previous trade was a long trade
        if prev_trade["position"] == 1:
            if row["Close"] >= prev_trade["take_profit_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 1
                prev_trade["close_time"] = row["Date_Time"]
                continue
            elif row["Close"] <= prev_trade["stop_loss_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 0
                prev_trade["close_time"] = row["Date_Time"]
                continue
        else:
            if row["Close"] <= prev_trade["take_profit_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 1
                prev_trade["close_time"] = row["Date_Time"]
                continue
            elif row["Close"] >= prev_trade["stop_loss_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 0
                prev_trade["close_time"] = row["Date_Time"]
                continue
                
        if prev_trade["label"] == None:
            continue
   
    macd_crossover_change = row["MACD_Crossover_Change"]
    if macd_crossover_change > 0 or macd_crossover_change < 0:
        current_position = 1 if macd_crossover_change > 0 else 0
        local_order = create_trade_order(row, current_position, tp, sl)
        trades.append(local_order) 

trades_df = pd.DataFrame(trades)

In [4]:
trades_df

Unnamed: 0,index,ask_price,take_profit_price,stop_loss_price,position,MACD,MACD_Signal,MACD_Hist,MACD_Crossover_Change,RSI,...,OBV,CCI,PSAR,AD,ADOSC,VOLUME_RSI,MFI,Date_Time,close_time,label
0,209,1.30870,1.31270,1.30620,1,-0.001955,-0.001962,0.000007,2.0,33.833860,...,2987.0,-59.563728,1.310000,-3.518072e+03,-314.606376,43.118997,27.493133,2007.03.05 22:00:00,2007.03.06 22:00:00,1
1,244,1.31130,1.30730,1.31380,0,0.000262,0.000342,-0.000080,-2.0,46.186253,...,4903.0,-138.344595,1.313834,-3.619358e+03,-94.962257,52.352590,46.481357,2007.03.07 09:00:00,2007.03.07 15:00:00,0
2,265,1.31710,1.31310,1.31960,0,0.001249,0.001289,-0.000040,-2.0,60.753873,...,6683.0,29.626780,1.318460,-2.647393e+03,9.642566,50.225690,45.481383,2007.03.08 06:00:00,2007.03.08 15:00:00,1
3,285,1.31430,1.31830,1.31180,1,-0.000365,-0.000393,0.000028,2.0,50.934218,...,6143.0,74.769797,1.311746,-2.825622e+03,134.357644,47.591747,44.034916,2007.03.09 02:00:00,2007.03.09 14:00:00,0
4,307,1.31210,1.31610,1.30960,1,-0.000718,-0.000724,0.000006,2.0,45.369438,...,4983.0,3.345281,1.313866,-2.560048e+03,135.647302,48.192723,45.661289,2007.03.12 01:00:00,2007.03.12 10:00:00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4134,100000,1.10489,1.10889,1.10239,1,0.001867,0.001686,0.000181,2.0,75.531393,...,15049.0,177.792861,1.098382,-3.904942e+06,1065.826390,66.431461,62.992627,2023.04.13 15:00:00,2023.04.14 17:00:00,0
4135,100042,1.09868,1.10268,1.09618,1,-0.001327,-0.001374,0.000047,2.0,40.895697,...,-25520.0,24.394401,1.099470,-3.909506e+06,-281.560849,52.198844,48.281841,2023.04.17 09:00:00,2023.04.17 15:00:00,0
4136,100058,1.09241,1.09641,1.08991,1,-0.002050,-0.002051,0.000001,2.0,32.908571,...,-43695.0,-42.238600,1.096220,-3.925679e+06,-1665.883694,40.977494,28.144375,2023.04.18 01:00:00,2023.04.18 10:00:00,1
4137,100088,1.09686,1.09286,1.09936,0,0.000451,0.000463,-0.000012,-2.0,51.622283,...,-20871.0,-17.138055,1.098362,-3.917172e+06,781.721024,42.811481,56.587638,2023.04.19 07:00:00,2023.04.24 03:00:00,0


In [None]:
# 2359 rows × 24 columns
# save the trades dataframe to a csv file
trades_df.to_csv(f"trades_seq_{dataset_name}_2007_2023.csv", index=False)

## GENERATE NON-SEQUENTIAL TRADE DATA

In [13]:

trades = []

# loop through all rows in the dataframe
for index in range(len(df)):
    i = index + offset

    # if len(trades) != 0:
    #     prev_trade = trades[-1]
    #     # check if the previous trade was a long trade
    #     if prev_trade["position"] == 1:
    #         # check if the current close price is greater than the take profit price
    #         if df.loc[i, "Close"] >= prev_trade["take_profit_price"]:
    #             trades[-1]["label"] = 1
    #         # check if the current close price is less than the stop loss price
    #         elif df.loc[i, "Close"] <= prev_trade["stop_loss_price"]:
    #             trades[-1]["label"] = 0
    #         else:
    #             continue
    #     else:
    #         # check if the current close price is less than the take profit price
    #         if df.loc[i, "Close"] <= prev_trade["take_profit_price"]:
    #             trades[-1]["label"] = 1
    #         # check if the current close price is greater than the stop loss price
    #         elif df.loc[i, "Close"] >= prev_trade["stop_loss_price"]:
    #             trades[-1]["label"] = 0
    #         else:
    #             continue

    

    if df.loc[i, "MACD_Crossover_Change"] > 0:
        ask_price = df.loc[i, "Close"]
        tp_price = ask_price + tp
        sl_price = ask_price - sl
        current_position = 1

        local_order = {
            "index": i,
            "ask_price": ask_price,
            "take_profit_price": tp_price,
            "stop_loss_price": sl_price,
            "position": current_position,
            # f"SMA_{timeperiod}": df.loc[i, f"SMA_{timeperiod}"],
            "MACD": df.loc[i, "MACD"],
            "MACD_Signal": df.loc[i, "MACD_Signal"],
            "MACD_Hist": df.loc[i, "MACD_Hist"],
            "RSI": df.loc[i, "RSI"],
            "ATR": df.loc[i, "ATR"],
            "ADX": df.loc[i, "ADX"],
            "AROON_Oscillator": df.loc[i, "AROON_Oscillator"],
            "WILLR": df.loc[i, "WILLR"],
            "OBV": df.loc[i, "OBV"],
            "CCI": df.loc[i, "CCI"],
            "PSAR": df.loc[i, "PSAR"],
            "AD": df.loc[i, "AD"],
            "ADOSC": df.loc[i, "ADOSC"],
            "VOLUME_RSI": df.loc[i, "VOLUME_RSI"],
            "MFI": df.loc[i, "MFI"],
            "Date_Time": df.loc[i, "Date_Time"],
            "label": None,
        }
        # add a second loop to check if the current close price is greater than the take profit price
        # or less than the stop loss price
        for k in range(index+1, len(df)):
            j = k + offset
            if df.loc[j, "Close"] >= tp_price:
                local_order["label"] = 1
                local_order["close_time"] = df.loc[j, "Date_Time"]
                break
            elif df.loc[j, "Close"] <= sl_price:
                local_order["label"] = 0
                local_order["close_time"] = df.loc[j, "Date_Time"]
                break
        trades.append(local_order)
        
        
    elif df.loc[i, "MACD_Crossover_Change"] < 0:   
        ask_price = df.loc[i, "Close"]  
        tp_price = ask_price - tp
        sl_price = ask_price + sl
        current_position = 0

        local_order = {
            "index": i,
            "ask_price": ask_price,
            "take_profit_price": tp_price,
            "stop_loss_price": sl_price,
            "position": current_position,
            # f"SMA_{timeperiod}": df.loc[i, f"SMA_{timeperiod}"],
            "MACD": df.loc[i, "MACD"],
            "MACD_Signal": df.loc[i, "MACD_Signal"],
            "MACD_Hist": df.loc[i, "MACD_Hist"],
            "RSI": df.loc[i, "RSI"],
            "ATR": df.loc[i, "ATR"],
            "ADX": df.loc[i, "ADX"],
            "AROON_Oscillator": df.loc[i, "AROON_Oscillator"],
            "WILLR": df.loc[i, "WILLR"],
            "OBV": df.loc[i, "OBV"],
            "CCI": df.loc[i, "CCI"],
            "PSAR": df.loc[i, "PSAR"],
            "AD": df.loc[i, "AD"],
            "ADOSC": df.loc[i, "ADOSC"],
            "VOLUME_RSI": df.loc[i, "VOLUME_RSI"],
            "MFI": df.loc[i, "MFI"],
            "Date_Time": df.loc[i, "Date_Time"],
            "label": None,
        }
        
        for k in range(index+1, len(df)):
            j = k + offset
            if df.loc[j, "Close"] <= tp_price:
                local_order["label"] = 1
                local_order["close_time"] = df.loc[j, "Date_Time"]
                break
            elif df.loc[j, "Close"] >= sl_price:
                local_order["label"] = 0
                local_order["close_time"] = df.loc[j, "Date_Time"]
                break
        trades.append(local_order)
        
trades_df = pd.DataFrame(trades)

In [14]:
trades_df

Unnamed: 0,index,ask_price,take_profit_price,stop_loss_price,position,MACD,MACD_Signal,MACD_Hist,RSI,ATR,...,OBV,CCI,PSAR,AD,ADOSC,VOLUME_RSI,MFI,Date_Time,label,close_time
0,209,1.30870,1.31270,1.30620,1,-0.001955,-0.001962,0.000007,33.833860,0.001827,...,2987.0,-59.563728,1.310000,-3.518072e+03,-314.606376,43.118997,27.493133,2007.03.05 22:00:00,1.0,2007.03.06 22:00:00
1,210,1.30800,1.30400,1.31050,0,-0.001967,-0.001963,-0.000004,31.687411,0.001839,...,2728.0,-77.611586,1.307000,-3.751172e+03,-355.228376,46.286378,29.501138,2007.03.05 23:00:00,0.0,2007.03.06 03:00:00
2,211,1.30870,1.31270,1.30620,1,-0.001898,-0.001950,0.000052,36.056116,0.001765,...,2922.0,-84.009466,1.307058,-3.612600e+03,-295.462070,44.135909,31.102673,2007.03.06 00:00:00,1.0,2007.03.06 22:00:00
3,244,1.31130,1.30730,1.31380,0,0.000262,0.000342,-0.000080,46.186253,0.001341,...,4903.0,-138.344595,1.313834,-3.619358e+03,-94.962257,52.352590,46.481357,2007.03.07 09:00:00,0.0,2007.03.07 15:00:00
4,248,1.31360,1.31760,1.31110,1,0.000325,0.000297,0.000029,60.534359,0.001250,...,5550.0,101.577287,1.310900,-3.351958e+03,-2.545212,50.850305,47.517985,2007.03.07 13:00:00,1.0,2007.03.07 20:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7736,100100,1.09657,1.10057,1.09407,1,-0.000231,-0.000265,0.000034,54.104304,0.001479,...,-30211.0,41.294539,1.092312,-3.920824e+06,-495.389051,46.954707,44.635653,2023.04.19 19:00:00,1.0,2023.04.24 11:00:00
7737,100127,1.09635,1.09235,1.09885,0,0.000258,0.000296,-0.000038,49.529907,0.001469,...,-32564.0,-96.149887,1.093891,-3.914997e+06,826.707713,46.609510,59.342942,2023.04.20 22:00:00,0.0,2023.04.21 22:00:00
7738,100143,1.09815,1.10215,1.09565,1,0.000015,-0.000064,0.000079,60.305810,0.001382,...,-25150.0,158.869872,1.093790,-3.913219e+06,790.528386,53.858577,56.312882,2023.04.21 14:00:00,0.0,2023.04.21 16:00:00
7739,100159,1.09816,1.09416,1.10066,0,0.000564,0.000577,-0.000013,52.410581,0.001190,...,-19404.0,-12.583156,1.096319,-3.912240e+06,-353.178936,44.699972,52.155783,2023.04.24 06:00:00,0.0,2023.04.24 11:00:00


In [15]:
# 2359 rows × 24 columns
# save the trades dataframe to a csv file
trades_df.to_csv(f"trades_non_seq_{dataset_name}_2011_2023.csv", index=False)

In [None]:
# get outcomes from trades_df using the label column
outcomes = trades_df["label"].tolist()

accuracy_df = pd.DataFrame({
    'accuracy': accuracy_score([1] * len(outcomes), outcomes),
    'no_of_trades': len(outcomes),
    'no_of_wins': sum(outcomes),
    'no_of_losses': len(outcomes) - sum(outcomes),
}, index=[0])