<h1><b>Importing Libraries</h1>

In [None]:
%load_ext cuml.accel

cuML: Accelerator installed.


In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from scipy.stats import randint
import pandas as pd
import gc

<h1><b>Training the Model

In [None]:
X = pd.read_parquet('X_processed.parquet')
y = pd.read_parquet('y_processed.parquet')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

del X, y
gc.collect()

In [None]:
dt = DecisionTreeClassifier(
    class_weight='balanced',
    random_state=42
)

param_dist = {
    'max_depth': [5, 10, 15, 20, 30, None],
    'min_samples_split': [2, 5, 10, 20],
    'min_samples_leaf': [1, 2, 4, 8],
    'max_features': ['sqrt', 'log2', None]
}

<h5><b> Applying Hyperparameter Tuning using Randmoize Search CV

In [None]:
dt_search = RandomizedSearchCV(
    estimator=dt,
    param_distributions=param_dist,
    n_iter=20,
    scoring='roc_auc',
    cv=2,
    n_jobs=-1,
    random_state=42,
    verbose=2
)

dt_search.fit(X_train, y_train)

print("\n→  Best Parameters:", dt_search.best_params_)
print("→  Best CV ROC-AUC:", dt_search.best_score_)

Fitting 2 folds for each of 20 candidates, totalling 40 fits

→ Best Parameters: {'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 10}
→ Best CV ROC-AUC: 0.8303019600669159


<h1><b> Testing the Model

In [None]:
best_dt = dt_search.best_estimator_
y_pred = best_dt.predict(X_test)
y_proba = best_dt.predict_proba(X_test)[:, 1]

print("\nTest ROC-AUC:", roc_auc_score(y_test, y_proba))
print("Test F1 Score:", f1_score(y_test, y_pred))
print("Test Accuracy:", accuracy_score(y_test, y_pred))


Test ROC-AUC: 0.8422501559917137
Test F1 Score: 0.2556730856161287
Test Accuracy: 0.8605852270802994
