Hyperparameter Tuning

In [1]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

In [2]:
import pandas as pd
df_clean = pd.read_csv("cleaned_scaled.csv")
# Features and target
X = df_clean.drop("target", axis=1)   
y = df_clean["target"]
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

Baseline Models

In [3]:
rf_base = RandomForestClassifier(random_state=42)
svm_base = SVC(probability=True, random_state=42)
rf_base.fit(X_train, y_train)
svm_base.fit(X_train, y_train)
y_pred_rf_base = rf_base.predict(X_test)
y_pred_svm_base = svm_base.predict(X_test)

In [4]:
print("Baseline RF Accuracy:", accuracy_score(y_test, y_pred_rf_base))
print("Baseline RF F1 (macro):", f1_score(y_test, y_pred_rf_base, average="macro"))
print("Baseline SVM Accuracy:", accuracy_score(y_test, y_pred_svm_base))
print("Baseline SVM F1 (macro):", f1_score(y_test, y_pred_svm_base, average="macro"))

Baseline RF Accuracy: 0.5489130434782609
Baseline RF F1 (macro): 0.3915145707405769
Baseline SVM Accuracy: 0.5489130434782609
Baseline SVM F1 (macro): 0.2798901035107403


Random Forest

In [5]:
rf_params = {
    "n_estimators": [100, 200, 300],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5, 10]
}

rf_grid = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid=rf_params,
    cv=3,
    scoring="f1_macro",
    n_jobs=-1
)


In [6]:
rf_grid.fit(X_train, y_train)
print("Best RF Params (GridSearch):", rf_grid.best_params_)
y_pred_rf_grid = rf_grid.predict(X_test)
print("Tuned RF Accuracy:", accuracy_score(y_test, y_pred_rf_grid))
print("Tuned RF F1 (macro):", f1_score(y_test, y_pred_rf_grid, average="macro"))

Best RF Params (GridSearch): {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}
Tuned RF Accuracy: 0.5652173913043478
Tuned RF F1 (macro): 0.31335593817560703


SVM

In [7]:
svm_params = {
    "C": np.logspace(-2, 2, 10),
    "gamma": np.logspace(-3, 1, 10),
    "kernel": ["rbf", "poly", "sigmoid"]
}
svm_rand = RandomizedSearchCV(
    SVC(probability=True, random_state=42),
    param_distributions=svm_params,
    n_iter=15,
    cv=3,
    scoring="f1_macro",
    random_state=42,
    n_jobs=-1
)

In [8]:
svm_rand.fit(X_train, y_train)
print("Best SVM Params (RandomSearch):", svm_rand.best_params_)
y_pred_svm_rand = svm_rand.predict(X_test)
print("Tuned SVM Accuracy:", accuracy_score(y_test, y_pred_svm_rand))
print("Tuned SVM F1 (macro):", f1_score(y_test, y_pred_svm_rand, average="macro"))

Best SVM Params (RandomSearch): {'kernel': 'poly', 'gamma': np.float64(0.1668100537200059), 'C': np.float64(12.915496650148826)}
Tuned SVM Accuracy: 0.5054347826086957
Tuned SVM F1 (macro): 0.3261901964103739
