In [1]:
import sys
import os
import pandas as pd
import numpy as np
from datetime import date
import talib
from sklearn.linear_model import *
from sktime.forecasting.base import ForecastingHorizon
# from sktime.forecasting.compose import make_reduction
from sktime.utils.plotting import plot_series
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error, mean_squared_error
from sklearn.metrics import accuracy_score
from sktime.forecasting.model_selection import SlidingWindowSplitter
from joblib import Parallel, delayed
from itertools import islice
import json
import warnings


warnings.filterwarnings('ignore')

params_df = pd.read_csv("params.csv")

# if len(sys.argv) == 2:
#     prog_name, task_str = sys.argv
#     param_row = int(task_str)
# else:
#     print("len(sys.argv)=%d so trying first param" % len(sys.argv))
#     param_row = 0

param_row = 0
param_dict = dict(params_df.iloc[param_row, :])

dataset_name = param_dict["dataset_name"]
forecast_horizon = param_dict["fh"]
window_size = param_dict["window_size"]
algorithm = param_dict["algorithm"]
train_size = param_dict["train_size"]
timeperiod = param_dict["sma"]

slope_threshold = param_dict["slope_threshold"]
year = str(param_dict["year"])
step_length = param_dict["step_length"]

root_data_dir = "/projects/genomic-ml/da2343/ml_project_2/data" 
dataset_dict = {
    "EURUSD_H1" : f"{root_data_dir}/EURUSD/EURUSD_H1_200702210000_202304242100_Update.csv",
    "USDJPY_H1" : f"{root_data_dir}/USDJPY/USDJPY_H1_200705290000_202307282300_Update.csv",
    "GBPUSD_H1" : f"{root_data_dir}/GBPUSD/GBPUSD_H1_200704170000_202307282300_Update.csv",
    "AUDUSD_H1" : f"{root_data_dir}/AUDUSD/AUDUSD_H1_200704170000_202307282300_Update.csv",
    "USDCAD_H1" : f"{root_data_dir}/USDCAD/USDCAD_H1_200705300000_202307282300_Update.csv",
    "USDCHF_H1" : f"{root_data_dir}/USDCHF/USDCHF_H1_200704170000_202307282300_Update.csv",
    # "NZDUSD_H1" : f"{root_data_dir}/NZDUSD/NZDUSD_H1_200704170000_202307282300_Update.csv",
    "EURJPY_H1" : f"{root_data_dir}/EURJPY/EURJPY_H1_200705300000_202307282300_Update.csv",
    "EURGBP_H1" : f"{root_data_dir}/EURGBP/EURGBP_H1_200703270000_202307282300_Update.csv",
}

dataset_path = dataset_dict[dataset_name]
# Load the config file
config_path = "/projects/genomic-ml/da2343/ml_project_2/settings/config.json"
with open(config_path) as f:
  config = json.load(f)
# Get the take_profit and stop_loss levels from the config file
tp = config["trading_settings"][dataset_name]["take_profit"]
sl = config["trading_settings"][dataset_name]["stop_loss"]
df = pd.read_csv(dataset_path, index_col=0)

In [None]:
df

In [2]:
y = df[['Close']]
offset = y.index[0]

In [3]:
def create_trade_order(row, position, tp, sl, timeperiod):
    ask_price = row["Close"]
    tp_price = ask_price + tp if position == 1 else ask_price - tp
    sl_price = ask_price - sl if position == 1 else ask_price + sl

    trade_order = {
        "index": row.name,
        "ask_price": ask_price,
        "take_profit_price": tp_price,
        "stop_loss_price": sl_price,
        "position": position,
        f"SMA_{timeperiod}": row[f"SMA_{timeperiod}"],
        "MACD": row["MACD"],
        "MACD_Signal": row["MACD_Signal"],
        "MACD_Hist": row["MACD_Hist"],
        "MACD_Crossover_Change" : row["MACD_Crossover_Change"],
        "RSI": row["RSI"],
        "ATR": row["ATR"],
        "ADX": row["ADX"],
        "AROON_Oscillator": row["AROON_Oscillator"],
        "WILLR": row["WILLR"],
        "OBV": row["OBV"],
        "CCI": row["CCI"],
        "PSAR": row["PSAR"],
        "AD": row["AD"],
        "ADOSC": row["ADOSC"],
        "VOLUME_RSI": row["VOLUME_RSI"],
        "MFI": row["MFI"],
        "Date_Time": row["Date_Time"],
        "close_time": None,
        "label": None,
    }
    return trade_order


trades = []
# loop through all rows in the dataframe
for index, row in df.iterrows():
    i = index + offset

    if len(trades) != 0:
        prev_trade = trades[-1]
        # check if the previous trade was a long trade
        if prev_trade["position"] == 1:
            if row["Close"] >= prev_trade["take_profit_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 1
                prev_trade["close_time"] = row["Date_Time"]
                continue
            elif row["Close"] <= prev_trade["stop_loss_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 0
                prev_trade["close_time"] = row["Date_Time"]
                continue
        else:
            if row["Close"] <= prev_trade["take_profit_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 1
                prev_trade["close_time"] = row["Date_Time"]
                continue
            elif row["Close"] >= prev_trade["stop_loss_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 0
                prev_trade["close_time"] = row["Date_Time"]
                continue
                
        if prev_trade["label"] == None:
            continue
   
    macd_crossover_change = row["MACD_Crossover_Change"]
    if macd_crossover_change > 0 or macd_crossover_change < 0:
        current_position = 1 if macd_crossover_change > 0 else 0
        local_order = create_trade_order(row, current_position, tp, sl, timeperiod)
        trades.append(local_order) 

trades_df = pd.DataFrame(trades)

In [4]:
trades_df

Unnamed: 0,index,ask_price,take_profit_price,stop_loss_price,position,SMA_200,MACD,MACD_Signal,MACD_Hist,MACD_Crossover_Change,...,OBV,CCI,PSAR,AD,ADOSC,VOLUME_RSI,MFI,Date_Time,close_time,label
0,209,1.30870,1.31270,1.30620,1,1.316625,-0.001955,-0.001962,0.000007,2.0,...,2987.0,-59.563728,1.310000,-3.518072e+03,-314.606376,43.118997,27.493133,2007.03.05 22:00:00,2007.03.06 22:00:00,1
1,244,1.31130,1.30730,1.31380,0,1.316453,0.000262,0.000342,-0.000080,-2.0,...,4903.0,-138.344595,1.313834,-3.619358e+03,-94.962257,52.352590,46.481357,2007.03.07 09:00:00,2007.03.07 15:00:00,0
2,265,1.31710,1.31310,1.31960,0,1.316768,0.001249,0.001289,-0.000040,-2.0,...,6683.0,29.626780,1.318460,-2.647393e+03,9.642566,50.225690,45.481383,2007.03.08 06:00:00,2007.03.08 15:00:00,1
3,285,1.31430,1.31830,1.31180,1,1.316485,-0.000365,-0.000393,0.000028,2.0,...,6143.0,74.769797,1.311746,-2.825622e+03,134.357644,47.591747,44.034916,2007.03.09 02:00:00,2007.03.09 14:00:00,0
4,307,1.31210,1.31610,1.30960,1,1.315822,-0.000718,-0.000724,0.000006,2.0,...,4983.0,3.345281,1.313866,-2.560048e+03,135.647302,48.192723,45.661289,2007.03.12 01:00:00,2007.03.12 10:00:00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4134,100000,1.10489,1.10889,1.10239,1,1.091827,0.001867,0.001686,0.000181,2.0,...,15049.0,177.792861,1.098382,-3.904942e+06,1065.826390,66.431461,62.992627,2023.04.13 15:00:00,2023.04.14 17:00:00,0
4135,100042,1.09868,1.10268,1.09618,1,1.094480,-0.001327,-0.001374,0.000047,2.0,...,-25520.0,24.394401,1.099470,-3.909506e+06,-281.560849,52.198844,48.281841,2023.04.17 09:00:00,2023.04.17 15:00:00,0
4136,100058,1.09241,1.09641,1.08991,1,1.094418,-0.002050,-0.002051,0.000001,2.0,...,-43695.0,-42.238600,1.096220,-3.925679e+06,-1665.883694,40.977494,28.144375,2023.04.18 01:00:00,2023.04.18 10:00:00,1
4137,100088,1.09686,1.09286,1.09936,0,1.095222,0.000451,0.000463,-0.000012,-2.0,...,-20871.0,-17.138055,1.098362,-3.917172e+06,781.721024,42.811481,56.587638,2023.04.19 07:00:00,2023.04.24 03:00:00,0


In [44]:
# 2359 rows × 24 columns
# save the trades dataframe to a csv file
trades_df.to_csv(f"trades_seq_fixed_{dataset_name}_2011_2023_(1).csv", index=False)

In [45]:
# get outcomes from trades_df using the label column
outcomes = trades_df["label"].tolist()

accuracy_df = pd.DataFrame({
    'accuracy': accuracy_score([1] * len(outcomes), outcomes),
    'no_of_trades': len(outcomes),
    'no_of_wins': sum(outcomes),
    'no_of_losses': len(outcomes) - sum(outcomes),
}, index=[0])
accuracy_df

Unnamed: 0,accuracy,no_of_trades,no_of_wins,no_of_losses
0,0.410244,4139,1698,2441
