In [35]:
import sys
import os
import pandas as pd
import numpy as np
from datetime import date
import talib
from sklearn.linear_model import *
from sktime.forecasting.base import ForecastingHorizon
# from sktime.forecasting.compose import make_reduction
from sktime.utils.plotting import plot_series
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error, mean_squared_error
from sklearn.metrics import accuracy_score
from sktime.forecasting.model_selection import SlidingWindowSplitter
from joblib import Parallel, delayed
from itertools import islice
import json
import warnings


warnings.filterwarnings('ignore')

params_df = pd.read_csv("params.csv")

# if len(sys.argv) == 2:
#     prog_name, task_str = sys.argv
#     param_row = int(task_str)
# else:
#     print("len(sys.argv)=%d so trying first param" % len(sys.argv))
#     param_row = 0

param_row = 0
param_dict = dict(params_df.iloc[param_row, :])

dataset_name = param_dict["dataset_name"]
forecast_horizon = param_dict["fh"]
window_size = param_dict["window_size"]
algorithm = param_dict["algorithm"]
train_size = param_dict["train_size"]
timeperiod = param_dict["sma"]

slope_threshold = param_dict["slope_threshold"]
year = str(param_dict["year"])
step_length = param_dict["step_length"]

root_data_dir = "/projects/genomic-ml/da2343/ml_project_2/data" 
dataset_dict = {
    "EURUSD_H1" : f"{root_data_dir}/EURUSD/EURUSD_H1_200702210000_202304242100_Update.csv",
    "USDJPY_H1" : f"{root_data_dir}/USDJPY/USDJPY_H1_200705290000_202307282300_Update.csv",
    "GBPUSD_H1" : f"{root_data_dir}/GBPUSD/GBPUSD_H1_200704170000_202307282300_Update.csv",
    "AUDUSD_H1" : f"{root_data_dir}/AUDUSD/AUDUSD_H1_200704170000_202307282300_Update.csv",
    "USDCAD_H1" : f"{root_data_dir}/USDCAD/USDCAD_H1_200705300000_202307282300_Update.csv",
    "USDCHF_H1" : f"{root_data_dir}/USDCHF/USDCHF_H1_200704170000_202307282300_Update.csv",
    # "NZDUSD_H1" : f"{root_data_dir}/NZDUSD/NZDUSD_H1_200704170000_202307282300_Update.csv",
    "EURJPY_H1" : f"{root_data_dir}/EURJPY/EURJPY_H1_200705300000_202307282300_Update.csv",
    "EURGBP_H1" : f"{root_data_dir}/EURGBP/EURGBP_H1_200703270000_202307282300_Update.csv",
}

dataset_path = dataset_dict[dataset_name]
# Load the config file
config_path = "/projects/genomic-ml/da2343/ml_project_2/settings/config.json"
with open(config_path) as f:
  config = json.load(f)
# Get the take_profit and stop_loss levels from the config file
tp = config["trading_settings"][dataset_name]["take_profit"]
sl = config["trading_settings"][dataset_name]["stop_loss"]
df = pd.read_csv(dataset_path, index_col=0)

In [None]:
df

In [36]:
y = df[['Close']]
offset = y.index[0]

In [37]:
def create_trade_order(row, position, tp, sl, timeperiod):
    ask_price = row["Close"]
    tp_price = ask_price + tp if position == 1 else ask_price - tp
    sl_price = ask_price - sl if position == 1 else ask_price + sl

    trade_order = {
        "index": row.name,
        "ask_price": ask_price,
        "take_profit_price": tp_price,
        "stop_loss_price": sl_price,
        "position": position,
        f"SMA_{timeperiod}": row[f"SMA_{timeperiod}"],
        "MACD": row["MACD"],
        "MACD_Signal": row["MACD_Signal"],
        "MACD_Hist": row["MACD_Hist"],
        "MACD_Crossover_Change" : row["MACD_Crossover_Change"],
        "RSI": row["RSI"],
        "ATR": row["ATR"],
        "ADX": row["ADX"],
        "AROON_Oscillator": row["AROON_Oscillator"],
        "WILLR": row["WILLR"],
        "OBV": row["OBV"],
        "CCI": row["CCI"],
        "PSAR": row["PSAR"],
        "AD": row["AD"],
        "ADOSC": row["ADOSC"],
        "VOLUME_RSI": row["VOLUME_RSI"],
        "MFI": row["MFI"],
        "Date_Time": row["Date_Time"],
        "label": None,
    }
    return trade_order


trades = []
# loop through all rows in the dataframe
for index, row in df.iterrows():
    i = index + offset

    if len(trades) != 0:
        prev_trade = trades[-1]
        # check if the previous trade was a long trade
        if prev_trade["position"] == 1:
            if row["Close"] >= prev_trade["take_profit_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 1
                continue
            elif row["Close"] <= prev_trade["stop_loss_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 0
                continue
        else:
            if row["Close"] <= prev_trade["take_profit_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 1
                continue
            elif row["Close"] >= prev_trade["stop_loss_price"] and prev_trade["label"] == None:
                prev_trade["label"] = 0
                continue
                
        if prev_trade["label"] == None:
            continue
   
    macd_crossover_change = row["MACD_Crossover_Change"]
    if macd_crossover_change > 0 or macd_crossover_change < 0:
        current_position = 1 if macd_crossover_change > 0 else 0
        local_order = create_trade_order(row, current_position, tp, sl, timeperiod)
        trades.append(local_order) 

trades_df = pd.DataFrame(trades)

In [38]:
trades_df

Unnamed: 0,index,ask_price,take_profit_price,stop_loss_price,position,SMA_200,MACD,MACD_Signal,MACD_Hist,MACD_Crossover_Change,...,WILLR,OBV,CCI,PSAR,AD,ADOSC,VOLUME_RSI,MFI,Date_Time,label
0,209,1.30870,1.31620,1.30370,1,1.316625,-0.001955,-0.001962,0.000007,2.0,...,-79.268293,2987.0,-59.563728,1.310000,-3.518072e+03,-314.606376,43.118997,27.493133,2007.03.05 22:00:00,1
1,265,1.31710,1.30960,1.32210,0,1.316768,0.001249,0.001289,-0.000040,-2.0,...,-28.571429,6683.0,29.626780,1.318460,-2.647393e+03,9.642566,50.225690,45.481383,2007.03.08 06:00:00,0
2,383,1.32260,1.31510,1.32760,0,1.315208,0.000851,0.000864,-0.000013,-2.0,...,-54.545455,8530.0,55.555556,1.321268,-1.671061e+03,74.140303,40.513145,46.926024,2007.03.15 05:00:00,0
3,418,1.33060,1.32310,1.33560,0,1.317753,0.002163,0.002276,-0.000113,-2.0,...,-55.172414,11798.0,-0.528501,1.329741,-1.966231e+03,-221.232351,49.742860,52.782133,2007.03.16 16:00:00,0
4,505,1.33770,1.33020,1.34270,0,1.325615,0.001901,0.001993,-0.000091,-2.0,...,-30.000000,13009.0,-91.895856,1.341000,7.805687e+02,125.965586,53.999797,63.818681,2007.03.22 08:00:00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2354,99938,1.08645,1.09395,1.08145,1,1.089366,-0.001259,-0.001306,0.000047,2.0,...,-61.124122,-37172.0,-5.314856,1.087136,-3.913988e+06,708.196367,45.890507,31.766623,2023.04.11 01:00:00,1
2355,99990,1.09928,1.09178,1.10428,0,1.090874,0.001977,0.002030,-0.000053,-2.0,...,-26.282051,5601.0,47.857849,1.098790,-3.909382e+06,-63.695507,46.010719,64.222409,2023.04.13 05:00:00,0
2356,100008,1.10473,1.09723,1.10973,0,1.092657,0.002136,0.002166,-0.000030,-2.0,...,-23.644752,17386.0,41.144980,1.102329,-3.904528e+06,499.994868,41.641713,66.445762,2023.04.13 23:00:00,1
2357,100042,1.09868,1.10618,1.09368,1,1.094480,-0.001327,-0.001374,0.000047,2.0,...,-33.244681,-25520.0,24.394401,1.099470,-3.909506e+06,-281.560849,52.198844,48.281841,2023.04.17 09:00:00,0


In [34]:
# 2359 rows × 24 columns
# save the trades dataframe to a csv file
trades_df.to_csv(f"trades_seq_fixed_{dataset_name}_2011_2023.csv", index=False)

In [28]:
# get outcomes from trades_df using the label column
outcomes = trades_df["label"].tolist()

accuracy_df = pd.DataFrame({
    'accuracy': accuracy_score([1] * len(outcomes), outcomes),
    'no_of_trades': len(outcomes),
    'no_of_wins': sum(outcomes),
    'no_of_losses': len(outcomes) - sum(outcomes),
}, index=[0])
accuracy_df

Unnamed: 0,accuracy,no_of_trades,no_of_wins,no_of_losses
0,0.413735,2359,976,1383
