In [1]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef
)

#-- Read Test and Train Files Generated in DataPrep.ipynb -------------------------
X_train = pd.read_csv("../data/split/X_train.csv")
X_test = pd.read_csv("../data/split/X_test.csv")  
y_train = pd.read_csv("../data/split/y_train.csv")
y_test = pd.read_csv("../data/split/y_test.csv")

# -----------------------------
# Initialize Model
# -----------------------------
nb_model = GaussianNB()

# -----------------------------
# Train
# -----------------------------
nb_model.fit(X_train, y_train)

#-- Save the trained model using pickle
import pickle
trained_model_path="trained_models/GaussianNaiveBayesModel.pkl"
with open(trained_model_path, "wb") as f:
        pickle.dump(nb_model, f)

# -----------------------------
# Predict
# -----------------------------
y_pred_nb = nb_model.predict(X_test)
y_prob_nb = nb_model.predict_proba(X_test)

# -----------------------------
# Metrics
# -----------------------------
nb_metrics = {
    "Accuracy": accuracy_score(y_test, y_pred_nb),
    "AUC": roc_auc_score(
        y_test,
        y_prob_nb,
        multi_class="ovr",
        average="macro"
    ),
    "Precision": precision_score(y_test, y_pred_nb, average="macro"),
    "Recall": recall_score(y_test, y_pred_nb, average="macro"),
    "F1": f1_score(y_test, y_pred_nb, average="macro"),
    "MCC": matthews_corrcoef(y_test, y_pred_nb)
}

nb_metrics


  y = column_or_1d(y, warn=True)


{'Accuracy': 0.9114444444444444,
 'AUC': 0.9925038983950463,
 'Precision': 0.9118781314429911,
 'Recall': 0.9114071096982005,
 'F1': 0.911349700842268,
 'MCC': 0.9055798302897614}