In [3]:
import numpy as np
import pandas as pd
from itertools import combinations
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
from sklearnex import patch_sklearn
import ray
from ray import tune
import os

num_cpus = os.cpu_count()


df = pd.read_csv('../standard_kline.csv')

# Define the target variable and remove it from the features list
target_column = "color_change"
X = df.drop(columns=[target_column])
y = df[target_column]

# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

del df

# Add this line to define the features variable
features = X_train.columns.tolist()
ray.shutdown()
# Patch scikit-learn with scikit-learn-intelex
patch_sklearn()
ray.init()
# Custom training function
def train_model(config):
    model = None
    if config["model"] == "LogisticRegression":
        model = LogisticRegression(**config["params"])
    elif config["model"] == "DecisionTreeClassifier":
        model = DecisionTreeClassifier(**config["params"])
    elif config["model"] == "GaussianNB":
        model = GaussianNB(**config["params"])
    elif config["model"] == "LinearSVC":
        model = LinearSVC(**config["params"])
    elif config["model"] == "SGDClassifier":
        model = SGDClassifier(**config["params"])

    all_feature_combinations = (
        comb for r in range(1, len(features) + 1) for comb in combinations(features, r)
    )

    best_score = -np.inf
    for feature_combination in all_feature_combinations:
        X_train_subset = X_train[list(feature_combination)]
        score = np.mean(cross_val_score(model, X_train_subset, y_train, cv=5))
        if score > best_score:
            best_score = score
            best_combination = feature_combination

    tune.report(mean_score=best_score, best_combination=best_combination)


# Hyperparameter search space and model configurations
def search_space_for_model(model_name):
    if model_name == "LogisticRegression":
        return {
            "solver": tune.choice(["newton-cg", "lbfgs", "liblinear", "sag", "saga"]),
            "C": tune.loguniform(1e-5, 100),
            "penalty": tune.choice(["l1", "l2", "elasticnet", "none"]),
            "max_iter": tune.randint(100, 1000),
        }
    if model_name == "DecisionTreeClassifier":
        return {
            "criterion": tune.choice(["gini", "entropy"]),
            "max_depth": tune.randint(1, 100),
            "min_samples_split": tune.uniform(0.1, 1),
            "min_samples_leaf": tune.uniform(0.1, 0.5),
        }
    if model_name == "GaussianNB":
        return {"var_smoothing": tune.loguniform(1e-10, 1e-2)}
    if model_name == "LinearSvc":
        return {
            "C": tune.loguniform(1e-5, 100),
            "penalty": tune.choice(["l1", "l2"]),
            "loss": tune.choice(["hinge", "squared_hinge"]),
            "max_iter": tune.randint(100, 1000),
        }
    if model_name == "SGDClassifier":
        return {
            "loss": tune.choice(
                ["hinge", "log", "modified_huber", "squared_hinge", "perceptron"]
            ),
            "penalty": tune.choice(["l1", "l2", "elasticnet"]),
            "alpha": tune.loguniform(1e-5, 1),
            "max_iter": tune.randint(100, 1000),
        }


# Iterate through the models
model_names = [
    "LogisticRegression",
    "DecisionTreeClassifier",
    "GaussianNB",
    "LinearSVC",
    "SGDClassifier",
]
best_configs = {}

for model_name in model_names:
    # Set up the model-specific search space
    search_space = {"model": model_name, "params": search_space_for_model(model_name)}

    # Run Ray Tune for the current model
    analysis = tune.run(
        train_model,
        resources_per_trial={"cpu": num_cpus, "gpu": 0},  # Use the total number of CPUs available
        config=search_space,
        num_samples=50,
        local_dir="tune_results",
        name=f"tune_hyperparameters_{model_name}",
        metric="mean_score",
        mode="max",
        stop={"training_iteration": 10},
        progress_reporter=tune.JupyterNotebookReporter(overwrite=True),
        verbose=1,
    )

    # Get the best trial for the current model
    best_trial = analysis.get_best_trial("mean_score", "max", "last")
    best_config = best_trial.config
    best_score = best_trial.last_result["mean_score"]
    best_combination = best_trial.last_result["best_combination"]

    # Store the best configuration for the current model
    best_configs[model_name] = {
        "config": best_config,
        "score": best_score,
        "combination": best_combination,
    }

# Print the best configurations for each model
for model_name, config_info in best_configs.items():
    print(f"Model: {model_name}")
    print(f"  Best configuration: {config_info['config']}")
    print(f"  Best feature combination: {config_info['combination']}")
    print(f"  Best score: {config_info['score']}\n")



0,1
Current time:,2023-04-22 15:51:16
Running for:,00:00:16.87
Memory:,5.8/15.8 GiB

Trial name,# failures,error file
train_model_2626b_00000,1,"c:\Users\jnorm\Projects\websocket_trading\notebooks\tune_results\tune_hyperparameters_LogisticRegression\train_model_2626b_00000_0_C=0.0013,max_iter=783,penalty=l1,solver=lbfgs_2023-04-22_15-50-59\error.txt"
train_model_2626b_00001,1,"c:\Users\jnorm\Projects\websocket_trading\notebooks\tune_results\tune_hyperparameters_LogisticRegression\train_model_2626b_00001_1_C=0.0698,max_iter=989,penalty=elasticnet,solver=liblinear_2023-04-22_15-51-01\error.txt"
train_model_2626b_00002,1,"c:\Users\jnorm\Projects\websocket_trading\notebooks\tune_results\tune_hyperparameters_LogisticRegression\train_model_2626b_00002_2_C=0.0001,max_iter=860,penalty=elasticnet,solver=sag_2023-04-22_15-51-04\error.txt"
train_model_2626b_00003,1,"c:\Users\jnorm\Projects\websocket_trading\notebooks\tune_results\tune_hyperparameters_LogisticRegression\train_model_2626b_00003_3_C=0.0029,max_iter=797,penalty=elasticnet,solver=liblinear_2023-04-22_15-51-07\error.txt"
train_model_2626b_00004,1,"c:\Users\jnorm\Projects\websocket_trading\notebooks\tune_results\tune_hyperparameters_LogisticRegression\train_model_2626b_00004_4_C=78.0728,max_iter=267,penalty=none,solver=liblinear_2023-04-22_15-51-10\error.txt"
train_model_2626b_00005,1,"c:\Users\jnorm\Projects\websocket_trading\notebooks\tune_results\tune_hyperparameters_LogisticRegression\train_model_2626b_00005_5_C=0.0005,max_iter=374,penalty=elasticnet,solver=liblinear_2023-04-22_15-51-13\error.txt"

Trial name,status,loc,params/C,params/max_iter,params/penalty,params/solver
train_model_2626b_00006,PENDING,,0.252034,106,elasticnet,lbfgs
train_model_2626b_00007,PENDING,,4.50948,135,l1,saga
train_model_2626b_00008,PENDING,,1.66616,446,elasticnet,newton-cg
train_model_2626b_00009,PENDING,,2.58676,175,l1,lbfgs
train_model_2626b_00010,PENDING,,0.0595497,458,l2,newton-cg
train_model_2626b_00011,PENDING,,1.0051,926,elasticnet,liblinear
train_model_2626b_00012,PENDING,,0.00017985,120,l1,newton-cg
train_model_2626b_00013,PENDING,,0.0180063,634,l1,lbfgs
train_model_2626b_00014,PENDING,,0.0100368,376,elasticnet,sag
train_model_2626b_00015,PENDING,,0.00015752,516,l1,sag


2023-04-22 15:51:01,383	ERROR trial_runner.py:1062 -- Trial train_model_2626b_00000: Error processing event.
ray.exceptions.RayTaskError(ValueError): [36mray::ImplicitFunc.train()[39m (pid=22900, ip=127.0.0.1, repr=train_model)
  File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
  File "c:\Users\jnorm\Projects\websocket_trading\.venv\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\jnorm\Projects\websocket_trading\.venv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\jnorm\Projects\websocket_trading\.venv\lib\site-packages\ray\tune\trainable\trainable.py", line 368, in train
    raise skipped from exception_cause(s