<a href="https://colab.research.google.com/github/541DeepLearning-Group8/datasets/blob/main/deberta_analysis_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers scikit-learn -q

In [None]:

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np


In [None]:
model_path = 'microsoft/deberta-v3-base'
num_labels = 5

from google.colab import drive
drive.mount('/content/drive')
save_path = '/content/drive/MyDrive/541project/deberta_model_14.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=num_labels)
model.load_state_dict(torch.load(save_path, map_location=device))
model.to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [None]:
from google.colab import drive
drive.mount('/content/drive')
test_df = pd.read_csv('/content/drive/MyDrive/541project/test.csv')
test_df = test_df[['Clean Comments', 'Rating']].dropna()
test_df['label'] = test_df['Rating'] - 1

In [None]:
class CommentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_len, return_tensors='pt')
        self.labels = torch.tensor(labels, dtype=torch.long)
    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item
    def __len__(self):
        return len(self.labels)

test_dataset = CommentDataset(test_df['Clean Comments'].tolist(), test_df['label'].tolist(), tokenizer)
test_loader = DataLoader(test_dataset, batch_size=16)


In [None]:
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        preds = torch.argmax(outputs.logits, dim=-1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(batch['labels'].cpu().numpy())


In [None]:
report = classification_report(all_labels, all_preds, digits=4)
print("Classification Report:\n", report)


In [None]:
# confusion matrix
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[1, 2, 3, 4, 5])
disp.plot(cmap="Blues")
plt.title("Confusion Matrix of DeBERTa Model")
plt.show()


In [None]:
#precision / recall / f1-score
report_dict = classification_report(all_labels, all_preds, output_dict=True)
df = pd.DataFrame(report_dict).transpose()
df.iloc[:5][["precision", "recall", "f1-score"]].plot(kind="bar", figsize=(10,6))
plt.title("Per-Class Metrics")
plt.ylabel("Score")
plt.xticks(rotation=0)
plt.ylim(0,1)
plt.grid(axis='y')
plt.show()
