In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier


from scipy.stats import randint, uniform # Utilities for defining hyperparameter search spaces.


df = pd.read_csv(r'/content/final_min_features_filtered.csv') #DATA LOADING AND PREPROCESSING
X = df.drop(columns=['file', 'run', 'label', 'onset_s'])
y = df['label']
le = LabelEncoder()
y_encoded = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Data Preprocessing Complete.\n")

models_to_tune = {}  # MODEL TRAINING AND HYPERPARAMETER TUNING
# The parameter spaces remain the same, giving us the potential to find great parameters.
models_to_tune['SVM'] = (SVC(probability=True, random_state=42),
                         {'C': uniform(0.1, 10), 'gamma': uniform(0.001, 0.1)})
models_to_tune['Decision Tree'] = (DecisionTreeClassifier(random_state=42),
                                   {'max_depth': randint(5, 50)})
models_to_tune['Random Forest'] = (RandomForestClassifier(random_state=42, n_jobs=-1),
                                   {'n_estimators': randint(100, 200), 'max_depth': randint(10, 50)})
models_to_tune['AdaBoost'] = (AdaBoostClassifier(random_state=42),
                              {'n_estimators': randint(50, 200), 'learning_rate': uniform(0.01, 1.0)})
models_to_tune['CatBoost'] = (CatBoostClassifier(random_state=42, silent=True),
                              {'iterations': randint(100, 200), 'learning_rate': uniform(0.01, 0.3)})
models_to_tune['XGBoost'] = (XGBClassifier(eval_metric='mlogloss', random_state=42, use_label_encoder=False),
                             {'n_estimators': randint(100, 200), 'max_depth': randint(3, 10)})
models_to_tune['Gaussian NB'] = (GaussianNB(),
                                 {'var_smoothing': uniform(1e-10, 1e-7)})
models_to_tune['MLP Classifier'] = (MLPClassifier(random_state=42, max_iter=500),
                                    {'hidden_layer_sizes': [(50,), (100,)], 'alpha': uniform(0.0001, 0.01)})

best_models = {}
for name, (model, params) in models_to_tune.items():
    print(f"Rapidly tuning {name}...")
    random_search = RandomizedSearchCV(
        model,
        param_distributions=params,
        n_iter=5,
        cv=2,
        random_state=42,
        n_jobs=-1 # using all CPU cores for maximum parallelization.
    )
    random_search.fit(X_train_scaled, y_train)
    best_models[name] = random_search.best_estimator_
    print(f"{name} tuning complete.")


print("\nStep 3: Evaluating All Tuned Models...")
results = {}
for name, model in best_models.items():
    y_train_pred = model.predict(X_train_scaled)
    y_test_pred = model.predict(X_test_scaled)
    results[f"{name}_Train"] = {
        'Accuracy': accuracy_score(y_train, y_train_pred),
        'F1-score': f1_score(y_train, y_train_pred, average='macro')
    }
    results[f"{name}_Test"] = {
        'Accuracy': accuracy_score(y_test, y_test_pred),
        'F1-score': f1_score(y_test, y_test_pred, average='macro')
    }



results_df = pd.DataFrame.from_dict(results, orient='index')
results_df.index = pd.MultiIndex.from_tuples(
    [(name.split('_')[0], name.split('_')[1]) for name in results_df.index],
    names=['Model', 'Dataset']
)
print("\n--- RAPID PERFORMANCE REPORT ---")
print(results_df.round(3))

Step 1: Loading and Preprocessing Data...
Data Preprocessing Complete.

Step 2: Defining Models and Hyperparameter Search Spaces...
Rapidly tuning SVM...
SVM tuning complete.
Rapidly tuning Decision Tree...
Decision Tree tuning complete.
Rapidly tuning Random Forest...
Random Forest tuning complete.
Rapidly tuning AdaBoost...
AdaBoost tuning complete.
Rapidly tuning CatBoost...
CatBoost tuning complete.
Rapidly tuning XGBoost...
XGBoost tuning complete.
Rapidly tuning Gaussian NB...
Gaussian NB tuning complete.
Rapidly tuning MLP Classifier...
MLP Classifier tuning complete.

Step 3: Evaluating All Tuned Models...

--- RAPID PERFORMANCE REPORT ---
                        Accuracy  F1-score
Model          Dataset                    
SVM            Train       0.964     0.951
               Test        0.544     0.340
Decision Tree  Train       0.849     0.812
               Test        0.468     0.305
Random Forest  Train       1.000     1.000
               Test        0.553     0.326
