In [1]:
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from safetensors.torch import load_file
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score



In [2]:
model_folder_path = r"C:\Users\aless\Desktop\final project 2.1\full_model"
model_path = r"C:\Users\aless\Desktop\final project 2.1\full_model\model.safetensors"

In [3]:
tokenizer = RobertaTokenizer.from_pretrained(model_folder_path)

In [4]:
# Load the safetensors model using safetensors library (not torch.load)
state_dict = load_file(model_path)  # Use safetensors to load model weights

In [5]:
# Load the model using the folder containing the config and other files
model = RobertaForSequenceClassification.from_pretrained(
    pretrained_model_name_or_path=model_folder_path,  # Path to folder, not the config file itself
    state_dict=state_dict,  # Loaded safetensors weights
)

Some weights of the model checkpoint at C:\Users\aless\Desktop\final project 2.1\full_model were not used when initializing RobertaForSequenceClassification: ['classifier.1.bias', 'classifier.1.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at C:\Users\aless\Desktop\final project 2.1\full_model and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You shoul

In [6]:
# Load the tokenized validation dataset (save the file to the path desired and link it here)
val_encodings, val_labels = torch.load(r'C:\Users\aless\Desktop\final project 2.1\val_encodings.pt')


  val_encodings, val_labels = torch.load(r'C:\Users\aless\Desktop\final project 2.1\val_encodings.pt')


In [7]:
# Define the device: Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Move the model to the GPU
model.to(device)

# Set batch size and number of workers
batch_size = 512
num_workers = 8

# Create the validation DataLoader with batch size 512 and workers set to 8
val_dataset = TensorDataset(
    val_encodings['input_ids'].to(device),  # Move input IDs to GPU
    val_encodings['attention_mask'].to(device),  # Move attention masks to GPU
    val_labels.to(device)  # Move labels to GPU
)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers)

# Set the model to evaluation mode
model.eval()

print("Model and data moved to GPU, DataLoader set.")


Model and data moved to GPU, DataLoader set.


In [8]:
model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(0.3),  # Dropout as per the trained model
    torch.nn.Linear(model.config.hidden_size, 2)  # Linear layer for binary classification
).to(device) 

In [9]:
# Example: Re-tokenizing the validation dataset (assuming you have the raw text data)
texts = ["Your sample text here", "Another sample text"]  # Replace with your dataset's text

# Tokenize the texts
tokenized_inputs = tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors="pt")

# Move tokenized inputs to the device
input_ids = tokenized_inputs['input_ids'].to(device)
attention_mask = tokenized_inputs['attention_mask'].to(device)

# Debug: Print tokenized inputs
print(f"Tokenized Input IDs: {input_ids}")
print(f"Tokenized Attention Mask: {attention_mask}")


Tokenized Input IDs: tensor([[    0, 12861,  7728,  2788,   259,     2],
        [    0, 21518,  7728,  2788,     2,     1]], device='cuda:0')
Tokenized Attention Mask: tensor([[1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 0]], device='cuda:0')


In [10]:

# Create the validation DataLoader
val_dataset = TensorDataset(val_encodings['input_ids'], val_encodings['attention_mask'], val_labels)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=8)  # adding workers to imrpove cpu usage to avoid bottleneck (8 core = testing with 8 workers )


In [11]:
import time
from torch.cuda.amp import autocast  # Import for mixed precision

# Start timer for inference
start_time = time.time()

# Initialize lists to store predictions and true labels
all_preds = []
all_labels = []
y_proba = []  # Store probabilities for ROC-AUC

# Set model to no_grad mode for inference
with torch.no_grad():
    # Loop through validation DataLoader
    for step, batch in enumerate(val_loader):
        # Move batch to the device (GPU or CPU)
        input_ids, attention_mask, labels = [x.to(device) for x in batch]

        # Enable mixed precision for faster computation
        with autocast():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            
            # Use only the logits for the [CLS] token (first token) for classification
            cls_logits = logits[:, 0, :]  # Extract the [CLS] token logits for each sequence
            
            # Predicted classes from [CLS] token
            predictions = torch.argmax(cls_logits, dim=-1)
            
            # Probabilities for ROC-AUC from [CLS] token
            probabilities = torch.softmax(cls_logits, dim=-1)[:, 1]  # Probabilities for class 1

        # Store predictions and true labels
        all_preds.extend(predictions.cpu().numpy())  # Store binary predictions for each sequence
        all_labels.extend(labels.cpu().numpy())  # Store actual labels
        y_proba.extend(probabilities.cpu().numpy())  # Store probabilities for ROC-AUC

# Total time taken for inference
total_time = time.time() - start_time
print(f"Total inference time: {total_time:.2f} seconds")


  with autocast():
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Total inference time: 264.83 seconds


In [12]:
# Check the shape and some example values
print(f"Predictions (all_preds): {all_preds[:5]}")  # Print first 5 predictions
print(f"Labels (all_labels): {all_labels[:5]}")  # Print first 5 labels


Predictions (all_preds): [1, 1, 1, 0, 1]
Labels (all_labels): [0, 0, 0, 0, 0]


In [13]:
# Print a few predictions and their corresponding labels
print("Predictions sample:", all_preds[:10])
print("True labels sample:", all_labels[:10])

# Check distribution of predictions (if the model is predicting only one class)
unique_preds, counts_preds = np.unique(all_preds, return_counts=True)
pred_class_distribution = dict(zip(unique_preds, counts_preds))
print(f"Predicted Class Distribution: {pred_class_distribution}")


Predictions sample: [1, 1, 1, 0, 1, 0, 1, 0, 0, 0]
True labels sample: [0, 0, 0, 0, 0, 1, 1, 1, 0, 0]
Predicted Class Distribution: {0: 242483, 1: 227240}


In [19]:
# test with concat instead of flattened (after testing all 3 scenarios iwth flattening)  sane bad result.
all_preds_concat = np.concatenate([np.atleast_1d(pred) for pred in all_preds])
all_labels_concat = np.concatenate([np.atleast_1d(label) for label in all_labels])
y_proba_concat = np.concatenate([np.atleast_1d(proba) for proba in y_proba])


In [20]:
# Calculate Accuracy
accuracy = accuracy_score(all_labels_concat, all_preds_concat)
print(f"Accuracy: {accuracy:.4f}")

# Generate Classification Report (Precision, Recall, F1-Score)
report = classification_report(all_labels_concat, all_preds_concat)
print(report)

# Confusion Matrix
conf_matrix = confusion_matrix(all_labels_concat, all_preds_concat)
print("Confusion Matrix:")
print(conf_matrix)

# ROC-AUC Score
roc_auc = roc_auc_score(all_labels_concat, y_proba_concat)
print(f"ROC-AUC Score: {roc_auc:.4f}")

Accuracy: 0.3421
              precision    recall  f1-score   support

           0       0.35      0.36      0.35    235207
           1       0.34      0.33      0.33    234516

    accuracy                           0.34    469723
   macro avg       0.34      0.34      0.34    469723
weighted avg       0.34      0.34      0.34    469723

Confusion Matrix:
[[ 84324 150883]
 [158159  76357]]
ROC-AUC Score: 0.2810
