<h1>Assignment 5</h1>
<h3><b>Name: </b>Gopi Trinadh Maddikunta</h3>
<h3><b>PSID: </b>2409404</h3>

In [1]:
!pip install -q transformers datasets evaluate accelerate

In [2]:
# --- Import Libraries ---
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW  # Correct: from torch.optim
from transformers import RobertaTokenizer, RobertaForSequenceClassification, get_scheduler
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
from tqdm import tqdm

In [3]:
# --- Configurations ---
model_checkpoint = "roberta-base"
batch_size = 32
learning_rate = 2e-5
num_epochs_frozen = 15   # 10-20 epochs
num_epochs_unfrozen = 4  # 3-5 epochs
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

<h3>Task 1: Paraphrase Detection using RoBERTa</h3>

In [4]:
# --- Load Dataset ---
dataset = load_dataset("glue", "mrpc")
tokenizer = RobertaTokenizer.from_pretrained(model_checkpoint)

def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], truncation=True, padding="max_length")

encoded_dataset = dataset.map(tokenize_function, batched=True)
encoded_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

In [5]:
# --- Dataloaders ---
train_loader = DataLoader(encoded_dataset['train'], batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(encoded_dataset['validation'], batch_size=batch_size)

In [6]:
# --- Early Stopping ---
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

In [7]:
# --- LoRA Preparation ---
# !pip install peft  # Uncomment if PEFT is not installed

from peft import get_peft_model, LoraConfig, TaskType

def apply_lora(model):
    lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=8,           
        lora_alpha=32,
        lora_dropout=0.1,
        target_modules=["query", "key", "value"]
    )
    model = get_peft_model(model, lora_config)
    return model

In [8]:

#Part 1: Fine-tune with Frozen RoBERTa Base + Early Stopping

# --- Start of Part 1: Frozen Training (with Early Stopping) ---

early_stopping = EarlyStopping(patience=3, min_delta=0.001)

model = RobertaForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
model.to(device)

# Freeze all layers except classification head
for param in model.roberta.parameters():
    param.requires_grad = False

optimizer = AdamW(model.parameters(), lr=learning_rate)
num_training_steps = len(train_loader) * num_epochs_frozen
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

for epoch in range(num_epochs_frozen):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_frozen} (Frozen)")
    for batch in progress_bar:
        batch = {k: v.to(device) for k, v in batch.items()}
        
        # Explicitly pass fields
        outputs = model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["label"]
        )
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=total_loss / len(train_loader))

    # Validation after each epoch
    model.eval()
    val_loss = 0
    preds, labels = [], []
    for batch in valid_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(
                input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
                labels=batch["label"]
            )
        logits = outputs.logits
        val_loss += outputs.loss.item()
        preds.extend(torch.argmax(logits, axis=-1).cpu().numpy())
        labels.extend(batch["label"].cpu().numpy())

    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    avg_val_loss = val_loss / len(valid_loader)
    print(f"Validation --> Loss: {avg_val_loss:.4f}, Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

    early_stopping(avg_val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered!")
        break

# Save Frozen Model
torch.save(model.state_dict(), "roberta_mrpc_frozen.pt")
# --- End of Part 1: Frozen Training (with Early Stopping) ---

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.65it/s, loss=0.646]


Validation --> Loss: 0.6254, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 2/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.70it/s, loss=0.635]


Validation --> Loss: 0.6230, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 3/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.69it/s, loss=0.633]


Validation --> Loss: 0.6221, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 4/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.69it/s, loss=0.628]


Validation --> Loss: 0.6196, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 5/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.68it/s, loss=0.629]


Validation --> Loss: 0.6179, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 6/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.68it/s, loss=0.625]


Validation --> Loss: 0.6180, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 7/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.67it/s, loss=0.625]


Validation --> Loss: 0.6164, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 8/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.67it/s, loss=0.625]


Validation --> Loss: 0.6161, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 9/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.67it/s, loss=0.623]


Validation --> Loss: 0.6150, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 10/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.66it/s, loss=0.622]


Validation --> Loss: 0.6148, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 11/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.66it/s, loss=0.622]


Validation --> Loss: 0.6146, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 12/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.70it/s, loss=0.621]


Validation --> Loss: 0.6140, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 13/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.70it/s, loss=0.619]


Validation --> Loss: 0.6138, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 14/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.70it/s, loss=0.619]


Validation --> Loss: 0.6136, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122


Epoch 15/15 (Frozen): 100%|██████████| 115/115 [00:24<00:00,  4.70it/s, loss=0.62] 


Validation --> Loss: 0.6135, Accuracy: 0.6838, Precision: 0.6838, Recall: 1.0000, F1: 0.8122
Early stopping triggered!


In [9]:
# ======================================================
# 🚀 Part 2: Fine-tune with Unfrozen RoBERTa Base (Optional LoRA)
# ======================================================
# --- Start of Part 2: Unfrozen Training ---

model = RobertaForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
model.to(device)

# OPTIONAL: Apply LoRA if training is slow
# model = apply_lora(model)

optimizer = AdamW(model.parameters(), lr=learning_rate)
num_training_steps = len(train_loader) * num_epochs_unfrozen
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

for epoch in range(num_epochs_unfrozen):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_unfrozen} (Unfrozen)")
    for batch in progress_bar:
        batch = {k: v.to(device) for k, v in batch.items()}
        
        outputs = model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["label"]
        )
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=total_loss / len(train_loader))

# Final evaluation on validation set
model.eval()
preds, labels = [], []
for batch in valid_loader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["label"]
        )
    logits = outputs.logits
    preds.extend(torch.argmax(logits, axis=-1).cpu().numpy())
    labels.extend(batch["label"].cpu().numpy())

acc = accuracy_score(labels, preds)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
print(f"Final Validation --> Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

# Save Unfrozen Model
torch.save(model.state_dict(), "roberta_mrpc_unfrozen.pt")
# --- End of Part 2: Unfrozen Training ---

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/4 (Unfrozen): 100%|██████████| 115/115 [01:11<00:00,  1.62it/s, loss=0.535]
Epoch 2/4 (Unfrozen): 100%|██████████| 115/115 [01:11<00:00,  1.62it/s, loss=0.349]
Epoch 3/4 (Unfrozen): 100%|██████████| 115/115 [01:11<00:00,  1.62it/s, loss=0.235]
Epoch 4/4 (Unfrozen): 100%|██████████| 115/115 [01:11<00:00,  1.62it/s, loss=0.155]


Final Validation --> Accuracy: 0.8775, Precision: 0.8908, Recall: 0.9355, F1: 0.9126


<h3>Task 2: Entailment Detection using RoBERTa:</h3>

In [10]:
# Load Dataset
dataset = load_dataset("glue", "rte")
tokenizer = RobertaTokenizer.from_pretrained(model_checkpoint)

def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], truncation=True, padding="max_length")

encoded_dataset = dataset.map(tokenize_function, batched=True)
encoded_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

In [14]:
# Create DataLoaders
# Split train 80-20
from torch.utils.data import DataLoader, random_split
train_size = int(0.8 * len(encoded_dataset['train']))
val_size = len(encoded_dataset['train']) - train_size

train_dataset, val_dataset = random_split(encoded_dataset['train'], [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(encoded_dataset['validation'], batch_size=batch_size)


In [15]:
# Early Stopping Utility
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

In [16]:
# ======================================================
# Part 1: Fine-tune with Frozen RoBERTa Base + Early Stopping
# ======================================================
# --- Start of Part 1: Frozen Training (with Early Stopping) ---

early_stopping = EarlyStopping(patience=3, min_delta=0.001)

model = RobertaForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
model.to(device)

# Freeze all layers except classification head
for param in model.roberta.parameters():
    param.requires_grad = False

optimizer = AdamW(model.parameters(), lr=learning_rate)
num_training_steps = len(train_loader) * num_epochs_frozen
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

for epoch in range(num_epochs_frozen):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_frozen} (Frozen)")
    for batch in progress_bar:
        batch = {k: v.to(device) for k, v in batch.items()}

        outputs = model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["label"]
        )
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=total_loss / len(train_loader))

    # Validation (on 20% split)
    model.eval()
    val_loss = 0
    preds, labels = [], []
    for batch in val_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(
                input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
                labels=batch["label"]
            )
        logits = outputs.logits
        val_loss += outputs.loss.item()
        preds.extend(torch.argmax(logits, axis=-1).cpu().numpy())
        labels.extend(batch["label"].cpu().numpy())

    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    avg_val_loss = val_loss / len(val_loader)
    print(f"Validation --> Loss: {avg_val_loss:.4f}, Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

    early_stopping(avg_val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered!")
        break

# Save Frozen Model
torch.save(model.state_dict(), "roberta_rte_frozen.pt")
# --- End of Part 1: Frozen Training (with Early Stopping) ---

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15 (Frozen): 100%|██████████| 63/63 [00:13<00:00,  4.75it/s, loss=0.695]


Validation --> Loss: 0.6926, Accuracy: 0.4980, Precision: 0.5011, Recall: 0.9084, F1: 0.6459


Epoch 2/15 (Frozen): 100%|██████████| 63/63 [00:13<00:00,  4.73it/s, loss=0.696]


Validation --> Loss: 0.6927, Accuracy: 0.5201, Precision: 0.5469, Recall: 0.2789, F1: 0.3694


Epoch 3/15 (Frozen): 100%|██████████| 63/63 [00:13<00:00,  4.72it/s, loss=0.696]


Validation --> Loss: 0.6924, Accuracy: 0.5040, Precision: 0.5040, Recall: 1.0000, F1: 0.6702


Epoch 4/15 (Frozen): 100%|██████████| 63/63 [00:13<00:00,  4.69it/s, loss=0.694]


Validation --> Loss: 0.6923, Accuracy: 0.5060, Precision: 0.5052, Recall: 0.9602, F1: 0.6621
Early stopping triggered!


In [21]:
# ======================================================
#  Part 2: Fine-tune with Unfrozen RoBERTa Base (Optional LoRA)
# ======================================================
# --- Start of Part 2: Unfrozen Training ---

model = RobertaForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
model.to(device)

# OPTIONAL: Apply LoRA if training is slow
# model = apply_lora(model)

optimizer = AdamW(model.parameters(), lr=learning_rate)
num_training_steps = len(train_loader) * num_epochs_unfrozen
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

for epoch in range(num_epochs_unfrozen):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_unfrozen} (Unfrozen)")
    for batch in progress_bar:
        batch = {k: v.to(device) for k, v in batch.items()}

        outputs = model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["label"]
        )
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=total_loss / len(train_loader))

# Final evaluation on validation set (277 samples)
model.eval()
preds, labels = [], []

# Evaluate on full RTE validation split (277 examples)
for batch in test_loader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["label"]
        )
    logits = outputs.logits
    preds.extend(torch.argmax(logits, axis=-1).cpu().numpy())
    labels.extend(batch["label"].cpu().numpy())

# Calculate evaluation metrics
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

acc = accuracy_score(labels, preds)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')

print(f" Unfrozen Model - Final Test Results on RTE Validation Set (277 samples)")
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
# Save Unfrozen Model
torch.save(model.state_dict(), "roberta_rte_unfrozen.pt")
# --- End of Part 2: Unfrozen Training ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/4 (Unfrozen): 100%|██████████| 63/63 [00:38<00:00,  1.64it/s, loss=0.694]
Epoch 2/4 (Unfrozen): 100%|██████████| 63/63 [00:38<00:00,  1.63it/s, loss=0.637]
Epoch 3/4 (Unfrozen): 100%|██████████| 63/63 [00:38<00:00,  1.63it/s, loss=0.492] 
Epoch 4/4 (Unfrozen): 100%|██████████| 63/63 [00:38<00:00,  1.62it/s, loss=0.366] 


 Unfrozen Model - Final Test Results on RTE Validation Set (277 samples)
Accuracy: 0.7148
Precision: 0.7955
Recall: 0.5344
F1 Score: 0.6393


In [23]:
# ======================================================
#Final Evaluation for Unfrozen RoBERTa on 277 RTE Validation Samples
# ======================================================

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

# Set model to evaluation mode
model.eval()
preds, labels = [], []

# Loop through the full RTE validation set (277 examples)
for batch in test_loader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["label"]
        )
    logits = outputs.logits
    preds.extend(torch.argmax(logits, axis=-1).cpu().numpy())
    labels.extend(batch["label"].cpu().numpy())

# Calculate evaluation metrics
acc = accuracy_score(labels, preds)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')

# Print final evaluation metrics
print("Final Evaluation on RTE Validation Set (277 Samples)")
print(f"Accuracy  : {acc:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1 Score  : {f1:.4f}")


Final Evaluation on RTE Validation Set (277 Samples)
Accuracy  : 0.7148
Precision : 0.7955
Recall    : 0.5344
F1 Score  : 0.6393


<h3>Number of trained parameters of each of two finetuned models.</h3>

In [18]:
# --- Start of Code to Count Trainable Parameters ---
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def count_total_parameters(model):
    return sum(p.numel() for p in model.parameters())

# Example Usage:
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

# Frozen: Freeze all RoBERTa layers
for param in model.roberta.parameters():
    param.requires_grad = False

print(f"Frozen Model - Trainable Parameters: {count_trainable_parameters(model):,}")
print(f"Frozen Model - Total Parameters: {count_total_parameters(model):,}")

# Unfrozen: Load new model, all layers trainable
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)
print(f"Unfrozen Model - Trainable Parameters: {count_trainable_parameters(model):,}")
print(f"Unfrozen Model - Total Parameters: {count_total_parameters(model):,}")
# --- End of Code to Count Trainable Parameters ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Frozen Model - Trainable Parameters: 592,130
Frozen Model - Total Parameters: 124,647,170


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Unfrozen Model - Trainable Parameters: 124,647,170
Unfrozen Model - Total Parameters: 124,647,170


<h3>Per class classification report

In [19]:
# --- Start of Per-Class Metrics Calculation ---
from sklearn.metrics import classification_report

# After getting labels and preds from validation or test set:

print("Classification Report:")
print(classification_report(labels, preds, digits=4))
# --- End of Per-Class Metrics Calculation ---


Classification Report:
              precision    recall  f1-score   support

           0     0.6615    0.8836    0.7566       146
           1     0.7927    0.4962    0.6103       131

    accuracy                         0.7004       277
   macro avg     0.7271    0.6899    0.6835       277
weighted avg     0.7236    0.7004    0.6874       277

