In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
import os
import shutil
import sys
import json

# TRAINING HYPER-PARAMETERS

In [None]:
params_df_list = []

# Define fixed seed for BIRCH
BIRCH_SEED = 42  # You can choose any constant value

# Generate random seeds for other algorithms
DYNAMIC_SEEDS = np.random.choice(1000, size=10, replace=False)

params_dict = {
    "instrument": [
        # "EUR_USD_M15", "GBP_USD_M15", "USD_JPY_M15", "USD_CHF_M15", 
        # "USD_CAD_M15", "AUD_USD_M15", "AUD_JPY_M15", "AUD_CAD_M15", "EUR_GBP_M15", 
        
        "EUR_JPY_M15", "GBP_CHF_M15", "GBP_JPY_M15", 
        "EUR_CHF_M15", "AUD_NZD_M15", "CAD_JPY_M15", "NZD_USD_M15", 
        "EUR_CAD_M15"
    ],
    "price_history_length": [24],
    "num_perceptually_important_points": [4],
    "num_clusters": [5, 6, 7, 8, 9],
    "clustering_algorithm": ["kmeans", "birch"],
    "train_period": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
    "test_period": [1],
    "reverse_test": [True, False],
}

# Create separate dataframes for BIRCH and other algorithms
# BIRCH parameters
birch_params = params_dict.copy()
birch_params["clustering_algorithm"] = ["birch"]
birch_params["random_seed"] = [BIRCH_SEED]

# Other algorithms parameters
other_params = params_dict.copy()
other_params["clustering_algorithm"] = ["kmeans"]
other_params["random_seed"] = DYNAMIC_SEEDS

# Generate parameter combinations for BIRCH
birch_df = (
    pd.MultiIndex.from_product(birch_params.values(), names=birch_params.keys())
    .to_frame()
    .reset_index(drop=True)
)

# Generate parameter combinations for other algorithms
other_df = (
    pd.MultiIndex.from_product(other_params.values(), names=other_params.keys())
    .to_frame()
    .reset_index(drop=True)
)

# Combine both dataframes
params_concat_df = pd.concat([birch_df, other_df], ignore_index=True)
params_concat_df

# TESTING BEST HYPER-PARAMETERS

In [None]:
import pandas as pd

best_params = {
    "instrument": [
        'EUR_USD_M15', 'GBP_USD_M15', 'USD_JPY_M15', 'USD_CHF_M15', 
        'USD_CAD_M15', 'AUD_USD_M15', 'AUD_JPY_M15', 'AUD_CAD_M15',
        'EUR_GBP_M15', 'EUR_JPY_M15', 'GBP_CHF_M15', 'GBP_JPY_M15',
        'EUR_CHF_M15', 'AUD_NZD_M15', 'CAD_JPY_M15', 'NZD_USD_M15', 
        'EUR_CAD_M15'
    ],
    "num_clusters": [
        5, 5, 5, 5,
        5, 5, 5, 5,
        5, 5, 5, 5,
        5, 5, 5, 5,
        5
    ],
    "clustering_algorithm": [
        'kmeans', 'kmeans', 'kmeans', 'birch',
        'kmeans', 'kmeans', 'birch', 'birch',
        'kmeans', 'kmeans', 'birch', 'birch',
        'birch', 'birch', 'birch', 'kmeans',
        'kmeans'
    ],
    "train_period": [
        4, 8, 9, 11,
        14, 6, 9, 6,
        5, 4, 14, 9,
        4, 8, 4, 8,
        7
    ],
        "test_period": [1] * 17,  # all use 1 week test period
    "price_history_length": [24] * 17,  # all use 24
    "num_perceptually_important_points": [4] * 17,  # all use 4
    "reverse_test": [
        False, True, True, True,
        True, True, True, False,
        True, True, True, True,
        False, True, False, False,
        True
    ],
    "random_seed": [
        60, 655, 60, 42,
        60, 655, 42, 42,
        279, 886, 42, 42,
        42, 42, 42, 22,
        673
    ]
}

params_concat_df = pd.DataFrame(best_params)
params_concat_df

In [2]:
import pandas as pd

# Constants for repeated values
INSTRUMENTS = [
    'EUR_USD_M15', 'GBP_USD_M15', 'USD_JPY_M15', 'USD_CHF_M15', 
    'USD_CAD_M15', 'AUD_USD_M15', 'AUD_JPY_M15', 'AUD_CAD_M15',
    'EUR_GBP_M15', 'EUR_JPY_M15', 'GBP_CHF_M15', 'GBP_JPY_M15',
    'EUR_CHF_M15', 'AUD_NZD_M15', 'CAD_JPY_M15', 'NZD_USD_M15', 
    'EUR_CAD_M15'
]
NUM_PAIRS = len(INSTRUMENTS)

best_params = {
    "instrument": INSTRUMENTS,
    "num_clusters": [
        5, 9, 5, 6,
        5, 6, 5, 6,
        7, 9, 5, 5,
        6, 5, 5, 8,
        5
    ],
    "clustering_algorithm": ['birch'] * NUM_PAIRS,
    "train_period": [
        11, 5, 10, 10,
        9, 9, 9, 12,
        9, 6, 14, 9,
        4, 8, 4, 7,
        7
    ],
    "test_period": [1] * NUM_PAIRS,
    "price_history_length": [24] * NUM_PAIRS,
    "num_perceptually_important_points": [4] * NUM_PAIRS,
    "reverse_test": [
        False, False, True, True,
        False, True, True, False,
        False, False, True, True,
        False, True, False, True,
        True
    ],
    "random_seed": [42] * NUM_PAIRS
}

params_concat_df = pd.DataFrame(best_params)
params_concat_df

Unnamed: 0,instrument,num_clusters,clustering_algorithm,train_period,test_period,price_history_length,num_perceptually_important_points,reverse_test,random_seed
0,EUR_USD_M15,5,birch,11,1,24,4,False,42
1,GBP_USD_M15,9,birch,5,1,24,4,False,42
2,USD_JPY_M15,5,birch,10,1,24,4,True,42
3,USD_CHF_M15,6,birch,10,1,24,4,True,42
4,USD_CAD_M15,5,birch,9,1,24,4,False,42
5,AUD_USD_M15,6,birch,9,1,24,4,True,42
6,AUD_JPY_M15,5,birch,9,1,24,4,True,42
7,AUD_CAD_M15,6,birch,12,1,24,4,False,42
8,EUR_GBP_M15,7,birch,9,1,24,4,False,42
9,EUR_JPY_M15,9,birch,6,1,24,4,False,42


In [3]:

n_tasks, ncol = params_concat_df.shape
date_time = datetime.now().strftime("%Y-%m-%d_%H:%M")
job_name = f"ml_project_2_{date_time}"
job_dir = "/scratch/da2343/" + job_name
results_dir = os.path.join(job_dir, "results")
os.system("mkdir -p " + results_dir)
params_concat_df.to_csv(os.path.join(job_dir, "params.csv"), index=False)

print(f"created {n_tasks} tasks in {job_dir}")

run_one_contents = f"""#!/bin/bash
#SBATCH --array=0-{n_tasks-1}
#SBATCH --time=24:00:00
#SBATCH --mem=4GB
#SBATCH --cpus-per-task=1
#SBATCH --error={job_dir}/slurm-%A_%a.out
#SBATCH --output={job_dir}/slurm-%A_%a.out
#SBATCH --job-name={job_name}
cd {job_dir}
python run_one.py $SLURM_ARRAY_TASK_ID
"""
run_one_sh = os.path.join(job_dir, "run_one.sh")
with open(run_one_sh, "w") as run_one_f:
    run_one_f.write(run_one_contents)

run_orig_py = "demo_run_gfd.py"
run_one_py = os.path.join(job_dir, "run_one.py")
shutil.copyfile(run_orig_py, run_one_py)
orig_dir = os.path.dirname(run_orig_py)
orig_results = os.path.join(orig_dir, "results")
os.system("mkdir -p " + orig_results)
orig_csv = os.path.join(orig_dir, "params.csv")
params_concat_df.to_csv(orig_csv, index=False)

msg = f"""created params CSV files and job scripts, test with
python {run_orig_py}
SLURM_ARRAY_TASK_ID=0 bash {run_one_sh}"""
print(msg)


created 17 tasks in /scratch/da2343/ml_project_2_2025-01-04_09:44
created params CSV files and job scripts, test with
python demo_run_gfd.py
SLURM_ARRAY_TASK_ID=0 bash /scratch/da2343/ml_project_2_2025-01-04_09:44/run_one.sh


EUR_USD_M15
num_clusters                          5
clustering_algorithm              birch
train_period                         11
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor             1.326983
test_sharpe_ratio              0.093925
test_win_ratio                 0.532197
test_num_trades               14.314394
test_avg_trades_per_window    14.314394
combined_score                  0.72761
Name: 14, dtype: object


GBP_USD_M15
num_clusters                         9
clustering_algorithm             birch
train_period                         5
test_period                          1
reverse_test                     False
random_seed                         42
test_profit_factor             1.60982
test_sharpe_ratio             0.154343
test_win_ratio                0.551495
test_num_trades               7.541528
test_avg_trades_per_window    7.541528
combined_score                0.752724
Name: 970, dtype: object


USD_JPY_M15
num_clusters                          5
clustering_algorithm              birch
train_period                         10
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.605567
test_sharpe_ratio               0.15664
test_win_ratio                 0.542601
test_num_trades               30.497758
test_avg_trades_per_window    30.497758
combined_score                 0.863033
Name: 13, dtype: object


USD_CHF_M15
num_clusters                          6
clustering_algorithm              birch
train_period                         10
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.607189
test_sharpe_ratio              0.155362
test_win_ratio                 0.530769
test_num_trades               26.430769
test_avg_trades_per_window    26.430769
combined_score                 0.811322
Name: 255, dtype: object


USD_CAD_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          9
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor             1.314086
test_sharpe_ratio               0.09431
test_win_ratio                 0.517717
test_num_trades               31.240157
test_avg_trades_per_window    31.240157
combined_score                 0.734611
Name: 10, dtype: object



AUD_USD_M15
num_clusters                         6
clustering_algorithm             birch
train_period                         9
test_period                          1
reverse_test                      True
random_seed                         42
test_profit_factor            1.368309
test_sharpe_ratio             0.102226
test_win_ratio                0.523077
test_num_trades                   24.9
test_avg_trades_per_window        24.9
combined_score                0.723956
Name: 253, dtype: object

AUD_JPY_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          9
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.364074
test_sharpe_ratio               0.10697
test_win_ratio                 0.533898
test_num_trades               28.919492
test_avg_trades_per_window    28.919492
combined_score                 0.756113
Name: 11, dtype: object


AUD_CAD_M15
num_clusters                         6
clustering_algorithm             birch
train_period                        12
test_period                          1
reverse_test                     False
random_seed                         42
test_profit_factor            1.543154
test_sharpe_ratio             0.148903
test_win_ratio                0.541045
test_num_trades               26.55597
test_avg_trades_per_window    26.55597
combined_score                 0.79178
Name: 258, dtype: object


EUR_GBP_M15
num_clusters                          7
clustering_algorithm              birch
train_period                          9
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor             1.371698
test_sharpe_ratio              0.097822
test_win_ratio                 0.517422
test_num_trades               22.038328
test_avg_trades_per_window    22.038328
combined_score                 0.680034
Name: 494, dtype: object

EUR_JPY_M15
num_clusters                         9
clustering_algorithm             birch
train_period                         6
test_period                          1
reverse_test                     False
random_seed                         42
test_profit_factor            1.739721
test_sharpe_ratio             0.187485
test_win_ratio                0.574751
test_num_trades               17.23588
test_avg_trades_per_window    17.23588
combined_score                0.807402
Name: 972, dtype: object

GBP_CHF_M15
num_clusters                          5
clustering_algorithm              birch
train_period                         14
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.513867
test_sharpe_ratio              0.144154
test_win_ratio                 0.540404
test_num_trades               33.323232
test_avg_trades_per_window    33.323232
combined_score                 0.804119
Name: 21, dtype: object

GBP_JPY_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          9
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.303039
test_sharpe_ratio              0.085823
test_win_ratio                 0.513944
test_num_trades               32.187251
test_avg_trades_per_window    32.187251
combined_score                 0.729744
Name: 11, dtype: object

EUR_CHF_M15
num_clusters                          6
clustering_algorithm              birch
train_period                          4
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor             1.766591
test_sharpe_ratio               0.19252
test_win_ratio                 0.602694
test_num_trades               26.289562
test_avg_trades_per_window    26.289562
combined_score                 0.902183
Name: 242, dtype: object


AUD_NZD_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          8
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.499242
test_sharpe_ratio              0.114585
test_win_ratio                 0.556641
test_num_trades               32.144531
test_avg_trades_per_window    32.144531
combined_score                 0.791286
Name: 9, dtype: object

CAD_JPY_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          4
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor             1.481216
test_sharpe_ratio              0.130124
test_win_ratio                 0.543624
test_num_trades               31.402685
test_avg_trades_per_window    31.402685
combined_score                 0.827545
Name: 0, dtype: object

NZD_USD_M15
num_clusters                          8
clustering_algorithm              birch
train_period                          7
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.675593
test_sharpe_ratio              0.131398
test_win_ratio                    0.545
test_num_trades               13.993333
test_avg_trades_per_window    13.993333
combined_score                 0.766732
Name: 733, dtype: object


EUR_CAD_M15
num_clusters                         5
clustering_algorithm             birch
train_period                         7
test_period                          1
reverse_test                      True
random_seed                         42
test_profit_factor             1.29914
test_sharpe_ratio             0.091114
test_win_ratio                0.551724
test_num_trades               13.32069
test_avg_trades_per_window    13.32069
combined_score                0.722921
Name: 7, dtype: object