In [1]:
import numpy as np
import pandas as pd
import joblib
import os

from scipy.sparse import load_npz
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)

import matplotlib.pyplot as plt
import seaborn as sns


In [3]:
X_test = load_npz("../Models/Features/X_test_tfidf.npz")
y_test = np.load("../Models/Features/y_test.npy")

In [4]:
lr_model  = joblib.load("../Models/logistic_regression_model.pkl")
svm_model = joblib.load("../Models/linear_svm_model.pkl")

In [8]:
def evaluate_and_save(model, model_name):
    # Predict
    y_pred = model.predict(X_test)

    # Compute metrics
    metrics = {
        "model": model_name,
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred),
        "recall": recall_score(y_test, y_pred),
        "f1_score": f1_score(y_test, y_pred)
    }

    # -------------------------------
    # Save metrics CSV
    # -------------------------------
    pd.DataFrame([metrics]).to_csv(
        f"../Models/Evaluation/metrics/{model_name}_metrics.csv",
        index=False
    )

    # -------------------------------
    # Save classification report
    # -------------------------------
    report = classification_report(y_test, y_pred, output_dict=True)
    pd.DataFrame(report).transpose().to_csv(
        f"../Models/Evaluation/reports/{model_name}_classification_report.csv"
    )

    # -------------------------------
    # Confusion Matrix Plot
    # -------------------------------
    cm = confusion_matrix(y_test, y_pred)

    plt.figure(figsize=(5, 4))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=["No Drug", "Drug"],
        yticklabels=["No Drug", "Drug"]
    )
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix - {model_name}")
    plt.tight_layout()
    plt.savefig(
        f"../Models/Evaluation/confusion_matrices/{model_name}_confusion_matrix.png",
        dpi=300
    )
    plt.close()

    # -------------------------------
    # Metric Bar Plot (ZOOMED + VALUES)
    # -------------------------------
    metric_names = ["Accuracy", "Precision", "Recall", "F1"]
    values = [
        metrics["accuracy"],
        metrics["precision"],
        metrics["recall"],
        metrics["f1_score"]
    ]

    plt.figure(figsize=(6, 4))
    bars = plt.bar(metric_names, values)

    plt.ylim(0.95, 1.0)   # ðŸ”¥ zoomed axis
    plt.ylabel("Score")
    plt.title(f"Performance Metrics - {model_name}")

    # Add value labels on bars
    for bar, val in zip(bars, values):
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            val,
            f"{val:.3f}",
            ha="center",
            va="bottom",
            fontsize=9
        )

    plt.tight_layout()
    plt.savefig(
        f"../Models/Evaluation/plots/{model_name}_metrics.png",
        dpi=300
    )
    plt.close()

    return metrics


In [9]:
metrics_lr = evaluate_and_save(lr_model, "Logistic_Regression")
metrics_lr

{'model': 'Logistic_Regression',
 'accuracy': 0.9853420195439739,
 'precision': 0.9911437246963563,
 'recall': 0.9794948737184296,
 'f1_score': 0.9852848698276946}

In [10]:
metrics_svm = evaluate_and_save(svm_model, "Linear_SVM")
metrics_svm

{'model': 'Linear_SVM',
 'accuracy': 0.9859684289651717,
 'precision': 0.9921499113699671,
 'recall': 0.9797449362340586,
 'f1_score': 0.9859084046300957}