In [None]:
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
import time
import csv
import os
import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
# 📝 Prepare CSV log
RESULTS_FILE = "experiment_results.csv"
with open(RESULTS_FILE, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["RUN_ID", "Unfrozen_Layers", "Test_Accuracy", "Test_F1", "Test_Precision", "Test_Recall", "Best_Val_F1", "Training_Time_Seconds"])


In [None]:
df = pd.read_csv("./llm/All_cases_labelled.csv")
df["class"] = df["class"].map({"Usability": 1, "Non-Usability": 0})

In [None]:
df.info()

In [None]:
df.dropna(subset=["class"], inplace=True)

In [None]:
df.info()

In [None]:
def run_experiment(unfreeze_last_n_layers):
    RUN_ID = f"unfreeze_{unfreeze_last_n_layers}"
    print(f"\n===== Running with {unfreeze_last_n_layers} unfrozen layer(s) =====")

    texts = df["description"].tolist()
    labels = df["class"].tolist()

    X_temp, X_test, y_temp, y_test = train_test_split(texts, labels, test_size=0.10, stratify=labels, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.1111, stratify=y_temp, random_state=42)

    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

    class TechSupportDataset(Dataset):
        def __init__(self, texts, labels, tokenizer, max_len=256):
            self.texts = texts
            self.labels = labels
            self.tokenizer = tokenizer
            self.max_len = max_len

        def __len__(self):
            return len(self.texts)

        def __getitem__(self, idx):
            encoding = self.tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=self.max_len, return_tensors="pt")
            return {
                "input_ids": encoding["input_ids"].squeeze(0),
                "attention_mask": encoding["attention_mask"].squeeze(0),
                "label": torch.tensor(self.labels[idx], dtype=torch.float)
            }

    train_dataset = TechSupportDataset(X_train, y_train, tokenizer)
    val_dataset = TechSupportDataset(X_val, y_val, tokenizer)
    test_dataset = TechSupportDataset(X_test, y_test, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32)
    test_loader = DataLoader(test_dataset, batch_size=32)

    class BertBinaryClassifier(nn.Module):
        def __init__(self, unfreeze_last_n_layers=0):
            super().__init__()
            self.bert = BertModel.from_pretrained("bert-base-uncased")
            for param in self.bert.parameters():
                param.requires_grad = False
            if unfreeze_last_n_layers > 0:
                for layer in self.bert.encoder.layer[-unfreeze_last_n_layers:]:
                    for param in layer.parameters():
                        param.requires_grad = True
            self.classifier = nn.Sequential(
                nn.Dropout(0.3),
                nn.Linear(self.bert.config.hidden_size, 1)
            )

        def forward(self, input_ids, attention_mask):
            outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
            return self.classifier(outputs.pooler_output)

    model = BertBinaryClassifier(unfreeze_last_n_layers).to(device)
    optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=2e-5)
    loss_fn = nn.BCEWithLogitsLoss()

    train_losses, val_f1_scores, val_accuracies, epoch_times = [], [], [], []

    best_f1 = 0
    best_val_f1 = 0
    patience = 2
    counter = 0
    num_epochs = 10
    training_start = time.time()

    for epoch in range(num_epochs):
        start_time = time.time()
        model.train()
        total_loss = 0
        for batch in train_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].unsqueeze(1).to(device)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        epoch_duration = time.time() - start_time
        epoch_times.append(epoch_duration)
        train_loss = total_loss / len(train_loader)
        train_losses.append(train_loss)

        print(f"\n🔁 Epoch {epoch+1} - Training Loss: {train_loss:.4f} (⏱ {epoch_duration:.2f} sec)")

        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["label"].unsqueeze(1).to(device)

                outputs = model(input_ids, attention_mask)
                probs = torch.sigmoid(outputs)
                preds = (probs > 0.5).long()

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        acc = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds)
        val_accuracies.append(acc)
        val_f1_scores.append(f1)

        print(f"✅ Val Accuracy: {acc:.4f} | F1 Score: {f1:.4f}")

        if f1 > best_f1:
            best_f1 = f1
            counter = 0
            torch.save(model.state_dict(), f"{RUN_ID}_best_model.pt")
            print("💾 Saved new best model")
        else:
            counter += 1
            if counter >= patience:
                print("⏹ Early stopping triggered")
                break

    training_time = time.time() - training_start
    best_val_f1 = max(val_f1_scores)

    # Test Set Evaluation
    model_path = f"{RUN_ID}_best_model.pt"
    if not os.path.exists(model_path):
        print(f"⚠️ Skipping evaluation for {RUN_ID} — model was never saved.")
        return

    model.load_state_dict(torch.load(model_path))

    model.eval()
    test_preds, test_labels = [], []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].unsqueeze(1).to(device)

            outputs = model(input_ids, attention_mask)
            probs = torch.sigmoid(outputs)
            preds = (probs > 0.5).long()

            test_preds.extend(preds.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())

    y_true = [int(x) for x in test_labels]
    y_pred = [int(x) for x in test_preds]

    test_acc = accuracy_score(y_true, y_pred)
    test_f1 = f1_score(y_true, y_pred)
    test_precision = precision_score(y_true, y_pred)
    test_recall = recall_score(y_true, y_pred)

    # Log results
    with open(RESULTS_FILE, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([
            RUN_ID,
            unfreeze_last_n_layers,
            test_acc,
            test_f1,
            test_precision,
            test_recall,
            best_val_f1,
            round(training_time, 2)
        ])

    print(f"\n📋 RUN SUMMARY [{RUN_ID}]")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Test F1 Score: {test_f1:.4f}")
    print(f"Test Precision: {test_precision:.4f}")
    print(f"Test Recall: {test_recall:.4f}")
    print(f"Best Val F1: {best_val_f1:.4f}")
    print(f"Training Time: {training_time:.2f} seconds")


In [None]:
# 🔁 Run All Experiments
for layers in [2, 4, 6]:
    run_experiment(unfreeze_last_n_layers=layers)


In [None]:
# 📊 View Results
df = pd.read_csv("experiment_results.csv")
df_sorted = df.sort_values(by="Test_F1", ascending=False).reset_index(drop=True)
print("\n✅ All Experiments Completed — Summary Table:\n")
display(df_sorted)
