In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef
)

#-- Read Test and Train Files Generated in DataPrep.ipynb -------------------------
X_train = pd.read_csv("../data/split/X_train.csv")
X_test = pd.read_csv("../data/split/X_test.csv")  
y_train = pd.read_csv("../data/split/y_train.csv")
y_test = pd.read_csv("../data/split/y_test.csv")


# -----------------------------
# Initialize Random Forest
# -----------------------------
rf_model = RandomForestClassifier(
    n_estimators=200,       # number of trees
    max_depth=None,         # let trees grow (forest controls overfitting)
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1               # use all CPU cores
)

# -----------------------------
# Train
# -----------------------------
rf_model.fit(X_train, y_train)

#-- Save the trained model using pickle
import pickle
trained_model_path="trained_models/RandomForestModel.pkl"
with open(trained_model_path, "wb") as f:
        pickle.dump(rf_model, f)

        
# -----------------------------
# Predict
# -----------------------------
y_pred_rf = rf_model.predict(X_test)
y_prob_rf = rf_model.predict_proba(X_test)

# -----------------------------
# Evaluation Metrics
# -----------------------------
rf_metrics = {
    "Accuracy": accuracy_score(y_test, y_pred_rf),
    "AUC": roc_auc_score(
        y_test,
        y_prob_rf,
        multi_class="ovr",
        average="macro"
    ),
    "Precision": precision_score(y_test, y_pred_rf, average="macro"),
    "Recall": recall_score(y_test, y_pred_rf, average="macro"),
    "F1": f1_score(y_test, y_pred_rf, average="macro"),
    "MCC": matthews_corrcoef(y_test, y_pred_rf)
}

rf_metrics


  return fit_method(estimator, *args, **kwargs)


{'Accuracy': 0.9773333333333334,
 'AUC': 0.9941326707551654,
 'Precision': 0.9773496294727277,
 'Recall': 0.9773304319960959,
 'F1': 0.9773243390673758,
 'MCC': 0.9758243007988056}