In [10]:
from datetime import datetime
import pandas as pd
import numpy as np
import os
import shutil
import sys
import json

# TRAINING HYPER-PARAMETERS

In [3]:
params_df_list = []

# Define fixed seed for BIRCH
BIRCH_SEED = 42  # You can choose any constant value

# Generate random seeds for other algorithms
DYNAMIC_SEEDS = np.random.choice(1000, size=10, replace=False)

params_dict = {
    "instrument": [
        # "EUR_USD_M15", "GBP_USD_M15", "USD_JPY_M15", "USD_CHF_M15", 
        # "USD_CAD_M15", "AUD_USD_M15", "AUD_JPY_M15", "AUD_CAD_M15", "EUR_GBP_M15", 
        
        "EUR_JPY_M15", "GBP_CHF_M15", "GBP_JPY_M15", 
        "EUR_CHF_M15", "AUD_NZD_M15", "CAD_JPY_M15", "NZD_USD_M15", 
        "EUR_CAD_M15"
    ],
    "price_history_length": [24],
    "num_perceptually_important_points": [4],
    "num_clusters": [5, 6, 7, 8, 9],
    "clustering_algorithm": ["kmeans", "birch"],
    "train_period": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
    "test_period": [1],
    "reverse_test": [True, False],
}

# Create separate dataframes for BIRCH and other algorithms
# BIRCH parameters
birch_params = params_dict.copy()
birch_params["clustering_algorithm"] = ["birch"]
birch_params["random_seed"] = [BIRCH_SEED]

# Other algorithms parameters
other_params = params_dict.copy()
other_params["clustering_algorithm"] = ["kmeans"]
other_params["random_seed"] = DYNAMIC_SEEDS

# Generate parameter combinations for BIRCH
birch_df = (
    pd.MultiIndex.from_product(birch_params.values(), names=birch_params.keys())
    .to_frame()
    .reset_index(drop=True)
)

# Generate parameter combinations for other algorithms
other_df = (
    pd.MultiIndex.from_product(other_params.values(), names=other_params.keys())
    .to_frame()
    .reset_index(drop=True)
)

# Combine both dataframes
params_concat_df = pd.concat([birch_df, other_df], ignore_index=True)
params_concat_df

Unnamed: 0,instrument,price_history_length,num_perceptually_important_points,num_clusters,clustering_algorithm,train_period,test_period,reverse_test,random_seed
0,EUR_JPY_M15,24,4,5,birch,4,1,True,42
1,EUR_JPY_M15,24,4,5,birch,4,1,False,42
2,EUR_JPY_M15,24,4,5,birch,5,1,True,42
3,EUR_JPY_M15,24,4,5,birch,5,1,False,42
4,EUR_JPY_M15,24,4,5,birch,6,1,True,42
...,...,...,...,...,...,...,...,...,...
9675,EUR_CAD_M15,24,4,9,kmeans,14,1,False,913
9676,EUR_CAD_M15,24,4,9,kmeans,14,1,False,681
9677,EUR_CAD_M15,24,4,9,kmeans,14,1,False,252
9678,EUR_CAD_M15,24,4,9,kmeans,14,1,False,585


# TESTING BEST HYPER-PARAMETERS

In [11]:
import pandas as pd

# Constants for repeated values
INSTRUMENTS = [
    'EUR_USD_M15', 'GBP_USD_M15', 'USD_JPY_M15', 'USD_CHF_M15', 
    'USD_CAD_M15', 'AUD_USD_M15', 'AUD_JPY_M15', 'AUD_CAD_M15',
    'EUR_GBP_M15', 'EUR_JPY_M15', 'GBP_CHF_M15', 'GBP_JPY_M15',
    'EUR_CHF_M15', 'AUD_NZD_M15', 'CAD_JPY_M15', 'NZD_USD_M15', 
    'EUR_CAD_M15'
]
NUM_PAIRS = len(INSTRUMENTS)

best_params = {
    "instrument": INSTRUMENTS,
    "num_clusters": [5] * NUM_PAIRS,  # All pairs use 5 clusters
    "clustering_algorithm": [
        'kmeans', 'kmeans', 'kmeans', 'birch',
        'kmeans', 'kmeans', 'birch', 'birch',
        'kmeans', 'kmeans', 'kmeans', 'birch',
        'birch', 'kmeans', 'birch', 'kmeans',
        'kmeans'
    ],
    "train_period": [
        4, 11, 11, 11,
        13, 11, 9, 6,
        5, 4, 7, 9,
        4, 10, 4, 8,
        7
    ],
    "test_period": [1] * NUM_PAIRS,
    "price_history_length": [24] * NUM_PAIRS,  # Keeping this constant parameter
    "num_perceptually_important_points": [4] * NUM_PAIRS,  # Keeping this constant parameter
    "reverse_test": [
        False, True, True, True,
        True, True, True, False,
        True, True, True, True,
        False, True, False, False,
        True
    ],
    "random_seed": [
        60, 233, 421, 42,
        231, 640, 42, 42,
        279, 886, 623, 42,
        42, 594, 42, 370,
        673
    ]
}

params_concat_df = pd.DataFrame(best_params)
params_concat_df

Unnamed: 0,instrument,num_clusters,clustering_algorithm,train_period,test_period,price_history_length,num_perceptually_important_points,reverse_test,random_seed
0,EUR_USD_M15,5,kmeans,4,1,24,4,False,60
1,GBP_USD_M15,5,kmeans,11,1,24,4,True,233
2,USD_JPY_M15,5,kmeans,11,1,24,4,True,421
3,USD_CHF_M15,5,birch,11,1,24,4,True,42
4,USD_CAD_M15,5,kmeans,13,1,24,4,True,231
5,AUD_USD_M15,5,kmeans,11,1,24,4,True,640
6,AUD_JPY_M15,5,birch,9,1,24,4,True,42
7,AUD_CAD_M15,5,birch,6,1,24,4,False,42
8,EUR_GBP_M15,5,kmeans,5,1,24,4,True,279
9,EUR_JPY_M15,5,kmeans,4,1,24,4,True,886


In [12]:

n_tasks, ncol = params_concat_df.shape
date_time = datetime.now().strftime("%Y-%m-%d_%H:%M")
job_name = f"ml_project_2_{date_time}"
job_dir = "/scratch/da2343/" + job_name
results_dir = os.path.join(job_dir, "results")
os.system("mkdir -p " + results_dir)
params_concat_df.to_csv(os.path.join(job_dir, "params.csv"), index=False)

print(f"created {n_tasks} tasks in {job_dir}")

run_one_contents = f"""#!/bin/bash
#SBATCH --array=0-{n_tasks-1}
#SBATCH --time=5:00:00
#SBATCH --mem=4GB
#SBATCH --cpus-per-task=1
#SBATCH --error={job_dir}/slurm-%A_%a.out
#SBATCH --output={job_dir}/slurm-%A_%a.out
#SBATCH --job-name={job_name}
cd {job_dir}
python run_one.py $SLURM_ARRAY_TASK_ID
"""
run_one_sh = os.path.join(job_dir, "run_one.sh")
with open(run_one_sh, "w") as run_one_f:
    run_one_f.write(run_one_contents)

run_orig_py = "demo_run_gfd.py"
run_one_py = os.path.join(job_dir, "run_one.py")
shutil.copyfile(run_orig_py, run_one_py)
orig_dir = os.path.dirname(run_orig_py)
orig_results = os.path.join(orig_dir, "results")
os.system("mkdir -p " + orig_results)
orig_csv = os.path.join(orig_dir, "params.csv")
params_concat_df.to_csv(orig_csv, index=False)

msg = f"""created params CSV files and job scripts, test with
python {run_orig_py}
SLURM_ARRAY_TASK_ID=0 bash {run_one_sh}"""
print(msg)


created 17 tasks in /scratch/da2343/ml_project_2_2025-01-06_17:52
created params CSV files and job scripts, test with
python demo_run_gfd.py
SLURM_ARRAY_TASK_ID=0 bash /scratch/da2343/ml_project_2_2025-01-06_17:52/run_one.sh


EUR_USD_M15
num_clusters                         5
clustering_algorithm            kmeans
train_period                         4
test_period                          1
reverse_test                     False
random_seed                         60
test_profit_factor            1.229126
test_sharpe_ratio             0.070113
test_win_ratio                0.511706
test_num_trades                   16.0
test_avg_trades_per_window        16.0
combined_score                0.729995
Name: 23, dtype: object


GBP_USD_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                         11
test_period                           1
reverse_test                       True
random_seed                         233
test_profit_factor             1.242649
test_sharpe_ratio              0.066503
test_win_ratio                 0.503571
test_num_trades               14.992857
test_avg_trades_per_window    14.992857
combined_score                  0.70229
Name: 175, dtype: object


USD_JPY_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                         11
test_period                           1
reverse_test                       True
random_seed                         421
test_profit_factor             1.295778
test_sharpe_ratio              0.089168
test_win_ratio                 0.599119
test_num_trades               32.656388
test_avg_trades_per_window    32.656388
combined_score                   0.7403
Name: 177, dtype: object


USD_CHF_M15
num_clusters                          5
clustering_algorithm              birch
train_period                         11
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.489901
test_sharpe_ratio              0.140226
test_win_ratio                 0.517467
test_num_trades               33.275109
test_avg_trades_per_window    33.275109
combined_score                 0.805263
Name: 15, dtype: object

USD_CAD_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                         13
test_period                           1
reverse_test                       True
random_seed                         231
test_profit_factor              1.30815
test_sharpe_ratio              0.094283
test_win_ratio                 0.527273
test_num_trades               34.877273
test_avg_trades_per_window    34.877273
combined_score                 0.738475
Name: 214, dtype: object


AUD_USD_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                         11
test_period                           1
reverse_test                       True
random_seed                         640
test_profit_factor             1.246068
test_sharpe_ratio                0.0721
test_win_ratio                 0.522523
test_num_trades               31.202703
test_avg_trades_per_window    31.202703
combined_score                  0.71181
Name: 178, dtype: object


AUD_JPY_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          9
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.364074
test_sharpe_ratio               0.10697
test_win_ratio                 0.533898
test_num_trades               28.919492
test_avg_trades_per_window    28.919492
combined_score                 0.756113
Name: 11, dtype: object

AUD_CAD_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          6
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor             1.387117
test_sharpe_ratio              0.114995
test_win_ratio                 0.574913
test_num_trades               32.473868
test_avg_trades_per_window    32.473868
combined_score                 0.790693
Name: 4, dtype: object

EUR_GBP_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                          5
test_period                           1
reverse_test                       True
random_seed                         279
test_profit_factor             1.458896
test_sharpe_ratio              0.126515
test_win_ratio                 0.545775
test_num_trades               35.919014
test_avg_trades_per_window    35.919014
combined_score                 0.829106
Name: 56, dtype: object


EUR_JPY_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                          4
test_period                           1
reverse_test                       True
random_seed                         886
test_profit_factor             1.477576
test_sharpe_ratio              0.135705
test_win_ratio                 0.512195
test_num_trades               36.083624
test_avg_trades_per_window    36.083624
combined_score                 0.847799
Name: 41, dtype: object


GBP_CHF_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                          7
test_period                           1
reverse_test                       True
random_seed                         623
test_profit_factor             1.408548
test_sharpe_ratio              0.115437
test_win_ratio                 0.561798
test_num_trades               36.719101
test_avg_trades_per_window    36.719101
combined_score                 0.799919
Name: 97, dtype: object


GBP_JPY_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          9
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.303039
test_sharpe_ratio              0.085823
test_win_ratio                 0.513944
test_num_trades               32.187251
test_avg_trades_per_window    32.187251
combined_score                 0.729744
Name: 11, dtype: object

EUR_CHF_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          4
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor              1.60884
test_sharpe_ratio              0.170725
test_win_ratio                 0.597973
test_num_trades               31.993243
test_avg_trades_per_window    31.993243
combined_score                 0.879885
Name: 0, dtype: object

GBP_JPY_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          9
test_period                           1
reverse_test                       True
random_seed                          42
test_profit_factor             1.303039
test_sharpe_ratio              0.085823
test_win_ratio                 0.513944
test_num_trades               32.187251
test_avg_trades_per_window    32.187251
combined_score                 0.729744
Name: 11, dtype: object


EUR_CHF_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          4
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor              1.60884
test_sharpe_ratio              0.170725
test_win_ratio                 0.597973
test_num_trades               31.993243
test_avg_trades_per_window    31.993243
combined_score                 0.879885
Name: 0, dtype: object


AUD_NZD_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                         10
test_period                           1
reverse_test                       True
random_seed                         594
test_profit_factor             1.405762
test_sharpe_ratio              0.123455
test_win_ratio                 0.528037
test_num_trades               41.392523
test_avg_trades_per_window    41.392523
combined_score                 0.799129
Name: 156, dtype: object

CAD_JPY_M15
num_clusters                          5
clustering_algorithm              birch
train_period                          4
test_period                           1
reverse_test                      False
random_seed                          42
test_profit_factor             1.481216
test_sharpe_ratio              0.130124
test_win_ratio                 0.543624
test_num_trades               31.402685
test_avg_trades_per_window    31.402685
combined_score                 0.827545
Name: 0, dtype: object

NZD_USD_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                          4
test_period                           1
reverse_test                       True
random_seed                         370
test_profit_factor             1.341276
test_sharpe_ratio              0.087563
test_win_ratio                 0.483108
test_num_trades               30.584459
test_avg_trades_per_window    30.584459
combined_score                 0.780953
Name: 35, dtype: object


EUR_CAD_M15
num_clusters                          5
clustering_algorithm             kmeans
train_period                          7
test_period                           1
reverse_test                       True
random_seed                         673
test_profit_factor             1.363988
test_sharpe_ratio              0.108049
test_win_ratio                 0.542553
test_num_trades               15.783688
test_avg_trades_per_window    15.783688
combined_score                 0.781375
Name: 98, dtype: object