In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
import os
import shutil
import sys
import json

# TRAINING HYPER-PARAMETERS

In [2]:
params_df_list = []

# Define fixed seed for BIRCH
BIRCH_SEED = 42  # You can choose any constant value

# Generate random seeds for other algorithms
DYNAMIC_SEEDS = np.random.choice(1000, size=3, replace=False)

params_dict = {
    # Updated symbol list based on long-term uptrending and volatile equities
    "instrument": [
        "AAPL_M15", "MSFT_M15", "AMZN_M15", "GOOGL_M15", "NVDA_M15", "TSLA_M15", "META_M15", 
        "AMD_M15", "NFLX_M15", "INTC_M15", "CSCO_M15", "PEP_M15", "COST_M15", "QCOM_M15", 
        "TXN_M15", "SBUX_M15", "AMGN_M15", "AVGO_M15", "ADP_M15", "ISRG_M15", "MU_M15", 
        "MDLZ_M15", "BKNG_M15", "GILD_M15", "FISV_M15", "ATVI_M15", "ADI_M15", "LRCX_M15", 
        "KLAC_M15", "MAR_M15", "MCHP_M15", "EA_M15", "EXC_M15", "ILMN_M15", "IDXX_M15", 
        "MNST_M15", "PAYX_M15", "LULU_M15", "ORLY_M15", "VRTX_M15", "REGN_M15", "ASML_M15", 
        "CSX_M15", "SNPS_M15", "CDNS_M15", "DXCM_M15", "KDP_M15", "MTD_M15", "SWKS_M15", 
        "WDAY_M15"
    ],
    "price_history_length": [24],
    "num_perceptually_important_points": [4],
    # "num_clusters": [5, 6, 7, 8, 9],
    "num_clusters": [5, 7],
    "clustering_algorithm": ["kmeans", "birch"],
    "train_period": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
    "test_period": [1],
    "reverse_test": [False],
}

# Create separate dataframes for BIRCH and other algorithms
# BIRCH parameters
birch_params = params_dict.copy()
birch_params["clustering_algorithm"] = ["birch"]
birch_params["random_seed"] = [BIRCH_SEED]

# Other algorithms parameters
other_params = params_dict.copy()
other_params["clustering_algorithm"] = ["kmeans"]
other_params["random_seed"] = DYNAMIC_SEEDS

# Generate parameter combinations for BIRCH
birch_df = (
    pd.MultiIndex.from_product(birch_params.values(), names=birch_params.keys())
    .to_frame()
    .reset_index(drop=True)
)

# Generate parameter combinations for other algorithms
other_df = (
    pd.MultiIndex.from_product(other_params.values(), names=other_params.keys())
    .to_frame()
    .reset_index(drop=True)
)

# Combine both dataframes
params_concat_df = pd.concat([birch_df, other_df], ignore_index=True)
params_concat_df

Unnamed: 0,instrument,price_history_length,num_perceptually_important_points,num_clusters,clustering_algorithm,train_period,test_period,reverse_test,random_seed
0,AAPL_M15,24,4,5,birch,1,1,False,42
1,AAPL_M15,24,4,5,birch,2,1,False,42
2,AAPL_M15,24,4,5,birch,3,1,False,42
3,AAPL_M15,24,4,5,birch,4,1,False,42
4,AAPL_M15,24,4,5,birch,5,1,False,42
...,...,...,...,...,...,...,...,...,...
5595,WDAY_M15,24,4,7,kmeans,13,1,False,450
5596,WDAY_M15,24,4,7,kmeans,13,1,False,829
5597,WDAY_M15,24,4,7,kmeans,14,1,False,163
5598,WDAY_M15,24,4,7,kmeans,14,1,False,450


In [3]:
n_tasks, ncol = params_concat_df.shape
date_time = datetime.now().strftime("%Y-%m-%d_%H:%M")
job_name = f"ml_project_2_{date_time}"
job_dir = "/scratch/da2343/" + job_name
results_dir = os.path.join(job_dir, "results")
os.system("mkdir -p " + results_dir)
params_concat_df.to_csv(os.path.join(job_dir, "params.csv"), index=False)

print(f"created {n_tasks} tasks in {job_dir}")

run_one_contents = f"""#!/bin/bash
#SBATCH --array=0-{n_tasks-1}
#SBATCH --time=5:00:00
#SBATCH --mem=4GB
#SBATCH --cpus-per-task=1
#SBATCH --error={job_dir}/slurm-%A_%a.out
#SBATCH --output={job_dir}/slurm-%A_%a.out
#SBATCH --job-name={job_name}
cd {job_dir}
python run_one.py $SLURM_ARRAY_TASK_ID
"""
run_one_sh = os.path.join(job_dir, "run_one.sh")
with open(run_one_sh, "w") as run_one_f:
    run_one_f.write(run_one_contents)

run_orig_py = "demo_run_stock.py"
run_one_py = os.path.join(job_dir, "run_one.py")
shutil.copyfile(run_orig_py, run_one_py)
orig_dir = os.path.dirname(run_orig_py)
orig_results = os.path.join(orig_dir, "results")
os.system("mkdir -p " + orig_results)
orig_csv = os.path.join(orig_dir, "params.csv")
params_concat_df.to_csv(orig_csv, index=False)

msg = f"""created params CSV files and job scripts, test with
python {run_orig_py}
SLURM_ARRAY_TASK_ID=0 bash {run_one_sh}"""
print(msg)


created 5600 tasks in /scratch/da2343/ml_project_2_2025-01-24_18:13
created params CSV files and job scripts, test with
python demo_run_stock.py
SLURM_ARRAY_TASK_ID=0 bash /scratch/da2343/ml_project_2_2025-01-24_18:13/run_one.sh
