In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import DebertaForSequenceClassification, RobertaForSequenceClassification, BertForSequenceClassification

def evaluate_model(model, val_loader, criterion, device):
    model.eval()
    all_labels = []
    all_preds = []

    total_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            input_ids, attention_mask, token_type_ids, labels = [b.to(device, non_blocking=True) for b in batch]
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
            logits = outputs.logits.squeeze(-1)  
            loss = criterion(logits, labels.float())
            total_loss += loss.item()

            preds = torch.sigmoid(logits).cpu().numpy()
            preds = (preds >= 0.5).astype(int) 
            labels = labels.cpu().numpy()

            all_preds.extend(preds.flatten())
            all_labels.extend(labels.flatten())

    avg_loss = total_loss / len(val_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)

    return avg_loss, accuracy, f1, precision, recall, all_labels, all_preds

def load_data_from_pickle(train_file, val_file):
    with open(train_file, 'rb') as f:
        train_data = pickle.load(f)
    with open(val_file, 'rb') as f:
        val_data = pickle.load(f)

    def process_data(data):
        input_ids = torch.tensor([entry['input_ids'] for entry in data], dtype=torch.long)
        attention_mask = torch.tensor([entry['attention_mask'] for entry in data], dtype=torch.long)
        token_type_ids = torch.tensor([entry['token_type_ids'] for entry in data], dtype=torch.long)
        labels = torch.tensor([entry['label'] for entry in data], dtype=torch.float)
        return TensorDataset(input_ids, attention_mask, token_type_ids, labels)

    train_dataset = process_data(train_data)
    val_dataset = process_data(val_data)
    
    return train_dataset, val_dataset

def load_trained_model(model_path, device):
    """model = RobertaForSequenceClassification.from_pretrained(
        'roberta-base',
        num_labels=1,  # Binary classification
    ).to(device)"""

    """model = DebertaForSequenceClassification.from_pretrained(
        'microsoft/deberta-base',
        num_labels=1,  # Binary classification
    ).to(device)"""

    model = BertForSequenceClassification.from_pretrained(
        'bert-base-uncased',
        num_labels=1, 
    ).to(device)
    
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

def plot_confusion_matrix(y_true, y_pred, class_names):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.show()


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

val_dataset = load_data_from_pickle('bert_train_data_binary.pkl', 'bert_dev_data_binary.pkl')[1] 

val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, pin_memory=True)

model = load_trained_model('bert_f1_72_accuracy_87.pth', device)  

criterion = torch.nn.BCEWithLogitsLoss()

avg_loss, accuracy, f1, precision, recall, all_labels, all_preds = evaluate_model(model, val_loader, criterion, device)


In [None]:
print(f"Evaluation Results:")
print(f"  Loss: {avg_loss:.4f}")
print(f"  Accuracy: {accuracy:.4f}")
print(f"  F1 Score: {f1:.4f}")
print(f"  Precision: {precision:.4f}")
print(f"  Recall: {recall:.4f}")

In [None]:
print("\nClassification Report:")
print(classification_report(all_labels, all_preds))

In [None]:
plot_confusion_matrix(all_labels, all_preds, class_names=["Negative", "Positive"])