In [None]:
import os
import time
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.metrics import confusion_matrix, roc_curve, auc
from sklearn.model_selection import train_test_split

In [None]:
# 📊 View Results
df = pd.read_csv("./experiment_results.csv")
df_sorted = df.sort_values(by="Test_F1", ascending=False).reset_index(drop=True)
print("\n✅ All Experiments Completed — Summary Table:\n")
display(df_sorted)


In [None]:
# Load preprocessed dataset
df = pd.read_csv("labeled_cases_combined.csv")
df["class"] = df["class"].map({"Usability": 1, "Not Usability": 0})


In [None]:
df.dropna(subset=["class"], inplace=True)

In [None]:
df.info()

In [None]:
X = df["description"]
y = df["class"]

# Stratified 10% test set
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X, y, test_size=0.10, stratify=y, random_state=42
)

# Reset test lists
X_test = X_test.tolist()
y_test = y_test.tolist()

In [None]:
# Tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Dataset
class TechSupportDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(self.labels[idx], dtype=torch.float)
        }

test_loader = DataLoader(TechSupportDataset(X_test, y_test, tokenizer), batch_size=16)


In [None]:
# Model
class BertBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.classifier = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(self.bert.config.hidden_size, 1)
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return self.classifier(outputs.pooler_output)

In [None]:
# Plot single confusion matrix + ROC
def plot_confusion_and_roc(y_true, y_prob, y_pred, title_prefix, filename_prefix):
    cm = confusion_matrix(y_true, y_pred)
    labels = ["Non-Usability", "Usability"]
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)

    fig, axs = plt.subplots(1, 2, figsize=(12, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels, ax=axs[0])
    axs[0].set_title(f"{title_prefix} - Confusion Matrix")
    axs[0].set_xlabel("Predicted")
    axs[0].set_ylabel("Actual")

    axs[1].plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
    axs[1].plot([0, 1], [0, 1], linestyle='--', color='gray')
    axs[1].set_title(f"{title_prefix} - ROC Curve")
    axs[1].set_xlabel("False Positive Rate")
    axs[1].set_ylabel("True Positive Rate")
    axs[1].legend()

    plt.tight_layout()
    filename = f"{filename_prefix}_confusion_roc.png"
    plt.savefig(filename, dpi=300)
    plt.close()
    return filename, fpr, tpr, roc_auc

# Plot all 3 confusion matrices and all 3 ROC curves in one image
def plot_combined_confusion_and_roc(models_outputs, save_prefix="combined"):
    fig_cm, axs_cm = plt.subplots(1, 3, figsize=(15, 4))
    fig_roc, ax_roc = plt.subplots(figsize=(6, 5))
    labels = ["Non-Usability", "Usability"]

    for i, (layers, y_true, y_pred, y_prob, fpr, tpr, roc_auc) in enumerate(models_outputs):
        cm = confusion_matrix(y_true, y_pred)
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels, ax=axs_cm[i])
        axs_cm[i].set_title(f"{layers} Layers")
        axs_cm[i].set_xlabel("Predicted")
        axs_cm[i].set_ylabel("Actual")

        ax_roc.plot(fpr, tpr, label=f"{layers} Layers (AUC={roc_auc:.2f})")

    ax_roc.plot([0, 1], [0, 1], linestyle='--', color='gray')
    ax_roc.set_title("ROC Curves by Model")
    ax_roc.set_xlabel("False Positive Rate")
    ax_roc.set_ylabel("True Positive Rate")
    ax_roc.legend()

    fig_cm.suptitle("Confusion Matrices by Model", fontsize=14)
    fig_roc.tight_layout()
    fig_cm.tight_layout()

    cm_path = f"{save_prefix}_confusion_combined.png"
    roc_path = f"{save_prefix}_roc_combined.png"

    fig_cm.savefig(cm_path, dpi=300)
    fig_roc.savefig(roc_path, dpi=300)
    plt.close(fig_cm)
    plt.close(fig_roc)

    return cm_path, roc_path

def plot_confusion_only(y_true, y_pred, title_prefix, filename_prefix):
    cm = confusion_matrix(y_true, y_pred)
    labels = ["Non-Usability", "Usability"]

    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
    plt.title(f"{title_prefix} - Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()

    filename = f"{filename_prefix}_confusion_only.png"
    plt.savefig(filename, dpi=300)
    plt.close()
    return filename


def plot_roc_only(y_true, y_prob, title_prefix, filename_prefix):
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(6, 5))
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}", color="blue")
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.title(f"{title_prefix} - ROC Curve")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.tight_layout()

    filename = f"{filename_prefix}_roc_only.png"
    plt.savefig(filename, dpi=300)
    plt.close()
    return filename


def plot_combined_confusion_and_roc(models_outputs, save_prefix="combined"):
    """
    Generates:
    - A single figure with 3 side-by-side confusion matrices
    - A single ROC plot with all 3 curves overlayed

    models_outputs: List of tuples (layers, y_true, y_pred, y_prob, fpr, tpr, auc)
    """

    # 📌 Confusion Matrix Grid
    fig_cm, axs_cm = plt.subplots(1, 3, figsize=(16, 4))
    labels = ["Non-Usability", "Usability"]

    for i, (layers, y_true, y_pred, y_prob, fpr, tpr, roc_auc) in enumerate(models_outputs):
        cm = confusion_matrix(y_true, y_pred)
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels, ax=axs_cm[i])
        axs_cm[i].set_title(f"{layers} Layers")
        axs_cm[i].set_xlabel("Predicted")
        axs_cm[i].set_ylabel("Actual")

    fig_cm.suptitle("Confusion Matrices for All Configurations", fontsize=14)
    fig_cm.tight_layout(rect=[0, 0.03, 1, 0.95])
    cm_path = f"{save_prefix}_confusion_combined.png"
    fig_cm.savefig(cm_path, dpi=300)
    plt.close(fig_cm)

    # 📈 ROC Curve Overlay
    fig_roc, ax_roc = plt.subplots(figsize=(6, 5))
    for layers, _, _, _, fpr, tpr, roc_auc in models_outputs:
        ax_roc.plot(fpr, tpr, label=f"{layers} Layers (AUC = {roc_auc:.2f})")
    ax_roc.plot([0, 1], [0, 1], linestyle='--', color='gray')
    ax_roc.set_title("ROC Curves for All Configurations")
    ax_roc.set_xlabel("False Positive Rate")
    ax_roc.set_ylabel("True Positive Rate")
    ax_roc.legend(loc="lower right")
    fig_roc.tight_layout()
    roc_path = f"{save_prefix}_roc_combined.png"
    fig_roc.savefig(roc_path, dpi=300)
    plt.close(fig_roc)

    return cm_path, roc_path


In [None]:

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Evaluate and collect results
results = []
for layers in [2, 4, 6]:
    model_path = f"unfreeze_{layers}_best_model.pt"
    if not os.path.exists(model_path):
        print(f"❌ Missing: {model_path}")
        continue

    print(f"\n🔍 Evaluating: {model_path}")
    model = BertBinaryClassifier().to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    all_preds, all_labels, all_probs = [], [], []
    start_time = time.time()

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].unsqueeze(1).to(device)

            outputs = model(input_ids, attention_mask)
            probs = torch.sigmoid(outputs)

            all_probs.extend(probs.cpu().numpy())
            preds = (probs > 0.5).long()
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    elapsed = time.time() - start_time
    print(f"✅ Inference completed in {elapsed:.2f} seconds")

    y_true = [int(x) for x in all_labels]
    y_pred = [int(x) for x in all_preds]
    y_prob = [float(p) for p in all_probs]

    # Save confusion + ROC combined plot
    filename, fpr, tpr, roc_auc = plot_confusion_and_roc(
        y_true, y_prob, y_pred,
        title_prefix=f"Unfreeze {layers} Layers",
        filename_prefix=f"confusion_roc_unfreeze_{layers}"
    )
    print(f"📊 Saved: {filename}")

    # Save confusion-only plot
    filename_cm = plot_confusion_only(
        y_true=y_true,
        y_pred=y_pred,
        title_prefix=f"Unfreeze {layers} Layers",
        filename_prefix=f"confusion_unfreeze_{layers}"
    )
    print(f"🟦 Saved confusion-only plot: {filename_cm}")

    # Save ROC-only plot
    filename_roc = plot_roc_only(
        y_true=y_true,
        y_prob=y_prob,
        title_prefix=f"Unfreeze {layers} Layers",
        filename_prefix=f"roc_unfreeze_{layers}"
    )
    print(f"🟩 Saved ROC-only plot: {filename_roc}")

    # Store for combined plots
    results.append((layers, y_true, y_pred, y_prob, fpr, tpr, roc_auc))

cm_path, roc_path = plot_combined_confusion_and_roc(results, save_prefix="final")
print("🖼️ Combined confusion matrix saved to:", cm_path)
print("📈 Combined ROC curves saved to:", roc_path)

In [None]:
# Plot ROC curves for all models separately and merge them into one image
def plot_roc_curves_side_by_side(models_outputs, save_prefix="roc_curves_side_by_side"):
    fig, axs = plt.subplots(1, len(models_outputs), figsize=(15, 5))
    
    for i, (layers, _, _, _, fpr, tpr, roc_auc) in enumerate(models_outputs):
        axs[i].plot(fpr, tpr, label=f"AUC = {roc_auc:.4f}", color="blue")
        axs[i].plot([0, 1], [0, 1], linestyle='--', color='gray')
        axs[i].set_title(f"{layers} Layers - ROC Curve")
        axs[i].set_xlabel("False Positive Rate")
        axs[i].set_ylabel("True Positive Rate")
        axs[i].legend()
    
    fig.suptitle("ROC Curves for by Model", fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    
    filename = f"{save_prefix}.png"
    plt.savefig(filename, dpi=300)
    plt.close(fig)
    return filename

roc_curves_side_by_side_path = plot_roc_curves_side_by_side(results)
print("📈 Side-by-side ROC curves saved to:", roc_curves_side_by_side_path)
