In [2]:
from datetime import datetime
import pandas as pd
import numpy as np
import os
import shutil
import sys
import json

In [None]:
params_df_list = []
params_dict = {
    # "instrument": ["GBP_USD_M15", "USD_JPY_M15"],
    # "instrument": ["EUR_USD_M15", "GBP_USD_M15", "USD_JPY_M15",
    #                "USD_CHF_M15", "USD_CAD_M15", "AUD_USD_M15",
    #                "AUD_JPY_M15", "AUD_CAD_M15"
    #                ],
    "instrument": [
        "EUR_USD_M15",
        "GBP_USD_M15",
        "USD_JPY_M15",
        "USD_CHF_M15",
        "USD_CAD_M15",
        "AUD_USD_M15",
        "AUD_JPY_M15",
        "AUD_CAD_M15",
        "EUR_GBP_M15",
        "EUR_JPY_M15",
        "GBP_CHF_M15",
        "GBP_JPY_M15",
        "EUR_CHF_M15",
        "AUD_NZD_M15",
        "CAD_JPY_M15",
        "NZD_USD_M15",
        "EUR_CAD_M15",
    ],
    # "price_history_length": [24, 36],
    "price_history_length": [24],
    "num_perceptually_important_points": [4],
    "num_clusters": [4, 5, 6, 7, 8, 9, 10],
    # "clustering_algorithm": ["kmeans", "birch", "gaussian_mixture"],
    "clustering_algorithm": ["kmeans", "birch"],
    "random_seed": np.random.choice(1000, size=2, replace=False),
    "train_period": [4, 5, 6, 7, 8, 9, 10, 11, 12, 14],  # weeks
    "test_period": [1],  # weeks
    "reverse_test": [True, False],
}
params_df = (
    pd.MultiIndex.from_product(params_dict.values(), names=params_dict.keys())
    .to_frame()
    .reset_index(drop=True)
)
params_df_list.append(params_df)
params_concat_df = pd.concat(params_df_list, ignore_index=True)
params_concat_df

In [3]:
best_params = {
   "instrument": [
       'EUR_USD_M15', 'GBP_USD_M15', 'USD_JPY_M15', 'USD_CHF_M15', 
       'USD_CAD_M15', 'AUD_USD_M15', 'AUD_JPY_M15', 'AUD_CAD_M15',
       'EUR_GBP_M15', 'EUR_JPY_M15', 'GBP_CHF_M15', 'GBP_JPY_M15',
       'EUR_CHF_M15', 'AUD_NZD_M15', 'CAD_JPY_M15', 'NZD_USD_M15', 
       'EUR_CAD_M15'
   ],
   "num_clusters": [
       6, 4, 5, 5, 
       4, 8, 4, 4,
       4, 5, 4, 4,
       7, 4, 5, 7,
       6
   ],
   "clustering_algorithm": [
       'kmeans', 'birch', 'kmeans', 'kmeans',
       'birch', 'birch', 'kmeans', 'kmeans', 
       'kmeans', 'birch', 'birch', 'birch',
       'kmeans', 'kmeans', 'kmeans', 'birch',
       'kmeans'
   ],
   "train_period": [
       8, 9, 5, 14,
       11, 9, 12, 14,
       5, 4, 11, 10,
       7, 10, 4, 7,
       5
   ],
   "test_period": [1] * 17,  # all use 1 week test period
   "price_history_length": [24] * 17,  # all use 24
   "num_perceptually_important_points": [4] * 17,  # all use 4
   "reverse_test": [
       False, True, True, True,
       True, True, False, True,
       True, True, True, True, 
       False, True, False, True,
       True
   ],
   "random_seed": [388] * 17  # using a consistent seed
}

params_concat_df = pd.DataFrame(best_params)
params_concat_df

Unnamed: 0,instrument,num_clusters,clustering_algorithm,train_period,test_period,price_history_length,num_perceptually_important_points,reverse_test,random_seed
0,EUR_USD_M15,6,kmeans,8,1,24,4,False,388
1,GBP_USD_M15,4,birch,9,1,24,4,True,388
2,USD_JPY_M15,5,kmeans,5,1,24,4,True,388
3,USD_CHF_M15,5,kmeans,14,1,24,4,True,388
4,USD_CAD_M15,4,birch,11,1,24,4,True,388
5,AUD_USD_M15,8,birch,9,1,24,4,True,388
6,AUD_JPY_M15,4,kmeans,12,1,24,4,False,388
7,AUD_CAD_M15,4,kmeans,14,1,24,4,True,388
8,EUR_GBP_M15,4,kmeans,5,1,24,4,True,388
9,EUR_JPY_M15,5,birch,4,1,24,4,True,388


In [4]:

n_tasks, ncol = params_concat_df.shape
date_time = datetime.now().strftime("%Y-%m-%d_%H:%M")
job_name = f"ml_project_2_{date_time}"
job_dir = "/scratch/da2343/" + job_name
results_dir = os.path.join(job_dir, "results")
os.system("mkdir -p " + results_dir)
params_concat_df.to_csv(os.path.join(job_dir, "params.csv"), index=False)

print(f"created {n_tasks} tasks in {job_dir}")

run_one_contents = f"""#!/bin/bash
#SBATCH --array=0-{n_tasks-1}
#SBATCH --time=24:00:00
#SBATCH --mem=4GB
#SBATCH --cpus-per-task=1
#SBATCH --error={job_dir}/slurm-%A_%a.out
#SBATCH --output={job_dir}/slurm-%A_%a.out
#SBATCH --job-name={job_name}
cd {job_dir}
python run_one.py $SLURM_ARRAY_TASK_ID
"""
run_one_sh = os.path.join(job_dir, "run_one.sh")
with open(run_one_sh, "w") as run_one_f:
    run_one_f.write(run_one_contents)

run_orig_py = "demo_run_gfd.py"
run_one_py = os.path.join(job_dir, "run_one.py")
shutil.copyfile(run_orig_py, run_one_py)
orig_dir = os.path.dirname(run_orig_py)
orig_results = os.path.join(orig_dir, "results")
os.system("mkdir -p " + orig_results)
orig_csv = os.path.join(orig_dir, "params.csv")
params_concat_df.to_csv(orig_csv, index=False)

msg = f"""created params CSV files and job scripts, test with
python {run_orig_py}
SLURM_ARRAY_TASK_ID=0 bash {run_one_sh}"""
print(msg)


created 17 tasks in /scratch/da2343/ml_project_2_2024-12-14_04:20
created params CSV files and job scripts, test with
python demo_run_gfd.py
SLURM_ARRAY_TASK_ID=0 bash /scratch/da2343/ml_project_2_2024-12-14_04:20/run_one.sh
