## Imports


In [None]:
import os
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'

import torch
import numpy as np
import pandas as pd
import time
from collections import Counter

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding
from peft import get_peft_model, LoraConfig, TaskType, PeftModel

from datasets import load_dataset

from sklearn.metrics import accuracy_score, precision_recall_fscore_support

import matplotlib.pyplot as plt

## load dataset

In [None]:
# Load data 
dataset = load_dataset("arrmlet/political-social-x-us-sentiment-v1")

print(f"Dataset loaded: {len(dataset['train'])} examples")

# Sample a smaller subset for training 
sample_size = 50000 
sampled_dataset = dataset['train'].shuffle(seed=42).select(range(sample_size))
print(f"Sampled dataset size: {len(sampled_dataset)}")

# Convert sentiment scores to binary labels
def process_batch(examples):
    processed = {key: [] for key in examples.keys()}
    processed['labels'] = []
    
    for i in range(len(examples['sentiment_negative'])):
        neg_score = examples['sentiment_negative'][i]
        pos_score = examples['sentiment_positive'][i]
        neutral_score = examples['sentiment_neutral'][i]
        
        # Skip neutral-dominant examples
        if neutral_score > max(neg_score, pos_score):
            continue
        
        # Keep this example
        for key in examples.keys():
            processed[key].append(examples[key][i])
        
        # Add binary label
        if pos_score > neg_score:
            processed['labels'].append(1)  # Positive
        else:
            processed['labels'].append(0)  # Negative
    
    return processed

# Apply processing
processed_dataset = sampled_dataset.map(process_batch, batched=True, remove_columns=sampled_dataset.column_names)


# Check label distribution
label_counts = Counter(processed_dataset['labels'])

# train/validation/test splits, 70,15,15
train_test = processed_dataset.train_test_split(test_size=0.3, seed=23)
val_test = train_test['test'].train_test_split(test_size=0.5, seed=23)

train_dataset_raw = train_test['train']      
eval_dataset_raw = val_test['train']        
test_dataset_raw = val_test['test']          


# Load model and tokenizer
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_lin", "v_lin"]
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# Tokenization of data

In [None]:
# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=512, padding=False)

# Apply tokenization
train_dataset = train_dataset_raw.map(tokenize_function, batched=True)
eval_dataset = eval_dataset_raw.map(tokenize_function, batched=True)
test_dataset = test_dataset_raw.map(tokenize_function, batched=True)

# Remove unnecessary columns (keep only what we need for training)
columns_to_keep = ['input_ids', 'attention_mask', 'labels']
columns_to_remove = [col for col in train_dataset.column_names if col not in columns_to_keep]

print(f"Removing columns: {columns_to_remove}")
train_dataset = train_dataset.remove_columns(columns_to_remove)
eval_dataset = eval_dataset.remove_columns(columns_to_remove)
test_dataset = test_dataset.remove_columns(columns_to_remove)

print("Final columns:", train_dataset.column_names)
print("Sample labels:", train_dataset['labels'][:10])

# Final label distribution check
train_label_counts = Counter(train_dataset['labels'])
eval_label_counts = Counter(eval_dataset['labels'])
test_label_counts = Counter(test_dataset['labels'])

print(f"\nFinal label distributions:")
print(f"Train: {train_label_counts}")
print(f"Validation: {eval_label_counts}")
print(f"Test: {test_label_counts}")

# Define compute metrics function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

## Hyperparameter tuning for LoRA

In [None]:
def train_lora_simple(train_dataset, eval_dataset, hyperparams, trial_name):
    
    print(f"TRIAL: {trial_name}")
    print(f"Parameters: {hyperparams}")
    
    start_time = time.time()
    
    model_name = "distilbert-base-uncased"
    tokenizer_trial = AutoTokenizer.from_pretrained(model_name)
    model_trial = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    
    if tokenizer_trial.pad_token is None:
        tokenizer_trial.pad_token = tokenizer_trial.eos_token
    
    # LoRA config
    lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=hyperparams['r'],
        lora_alpha=hyperparams['alpha'],
        lora_dropout=hyperparams['dropout'],
        target_modules=["q_lin", "v_lin"]
    )
    
    model_trial = get_peft_model(model_trial, lora_config)
    
    # Training args
    training_args = TrainingArguments(
        output_dir=f"./temp_trial_{trial_name}",
        num_train_epochs=2,
        per_device_train_batch_size=hyperparams['batch_size'],
        per_device_eval_batch_size=32,
        learning_rate=hyperparams['lr'],
        warmup_steps=200,
        weight_decay=0.01,
        logging_steps=100,
        eval_strategy="steps",
        eval_steps=300,
        save_strategy="no",  
        load_best_model_at_end=False,
        dataloader_pin_memory=False,
        report_to=None,
    )
    
    trainer = Trainer(
        model=model_trial,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer_trial),
    )
    
    try:
        trainer.train()
        eval_results = trainer.evaluate()
        training_time = time.time() - start_time
        
        print(f"F1: {eval_results['eval_f1']:.4f}, Time: {training_time:.1f}s")
        
        return {
            'trial': trial_name,
            'hyperparams': hyperparams,
            'f1': eval_results['eval_f1'],
            'accuracy': eval_results['eval_accuracy'],
            'time': training_time,
            'model': model_trial,
            'tokenizer': tokenizer_trial
        }
    except Exception as e:
        print(f"❌ Failed: {e}")
        return None

# Define common hyperparameter combinations to test
hyperparameter_trials = [
    {'r': 16, 'alpha': 32, 'dropout': 0.1, 'lr': 2e-5, 'batch_size': 16},
    {'r': 8, 'alpha': 16, 'dropout': 0.1, 'lr': 2e-5, 'batch_size': 16},
    {'r': 32, 'alpha': 64, 'dropout': 0.1, 'lr': 2e-5, 'batch_size': 16},
    {'r': 16, 'alpha': 32, 'dropout': 0.1, 'lr': 1e-5, 'batch_size': 16},
    {'r': 16, 'alpha': 32, 'dropout': 0.1, 'lr': 5e-5, 'batch_size': 16},
    {'r': 16, 'alpha': 32, 'dropout': 0.2, 'lr': 2e-5, 'batch_size': 16},
    {'r': 16, 'alpha': 32, 'dropout': 0.1, 'lr': 2e-5, 'batch_size': 8},
    {'r': 8, 'alpha': 16, 'dropout': 0.1, 'lr': 5e-5, 'batch_size': 16},
]

trial_results = []
best_f1 = 0
best_trial = None

for i, params in enumerate(hyperparameter_trials, 1):
    trial_name = f"trial_{i}"
    
    result = train_lora_simple(train_dataset, eval_dataset, params, trial_name)
    
    if result:
        trial_results.append(result)
        


if trial_results:
    
    trial_results.sort(key=lambda x: x['f1'], reverse=True)
    
    for i, result in enumerate(trial_results, 1):
        params = result['hyperparams']
        print(f"{i}. F1: {result['f1']:.4f} | r={params['r']}, α={params['alpha']}, lr={params['lr']:.0e}, batch={params['batch_size']}")
        

## Fine tune with LoRA with optimized hyperparameters

In [None]:
model_name = "distilbert-base-uncased"
final_tokenizer = AutoTokenizer.from_pretrained(model_name)
final_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

if final_tokenizer.pad_token is None:
    final_tokenizer.pad_token = final_tokenizer.eos_token

final_lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_lin", "v_lin"]
)

final_model = get_peft_model(final_model, final_lora_config)

# Training arguments
final_training_args = TrainingArguments(
    output_dir="./final_lora_model",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=5e-5,
    warmup_steps=300,
    weight_decay=0.01,
    logging_steps=500,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    greater_is_better=True,
    dataloader_pin_memory=False,
    report_to=None,
    save_total_limit=1,
)

# Train final model
final_trainer = Trainer(
    model=final_model,
    args=final_training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer=final_tokenizer),
)

final_trainer.train()

final_model.save_pretrained("./final_lora_model")
final_tokenizer.save_pretrained("./final_lora_model")

model = final_model
tokenizer = final_tokenizer
    

## Full fine-tuning 


In [None]:
model_name = "distilbert-base-uncased"
tokenizer_full = AutoTokenizer.from_pretrained(model_name)
model_full = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Fix padding token to ensure equal length sequences by end of seq
if tokenizer_full.pad_token is None:
    tokenizer_full.pad_token = tokenizer_full.eos_token

print(f"Full model parameters: {sum(p.numel() for p in model_full.parameters()):,}")

data_collator_full = DataCollatorWithPadding(tokenizer=tokenizer_full)

# Define compute metrics function for full fine-tuning
def compute_metrics_full(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


training_args_full = TrainingArguments(
    output_dir="./output_full_finetune_political",
    num_train_epochs=3,  
    per_device_train_batch_size=8,  
    per_device_eval_batch_size=16,
    learning_rate=2e-5,  
    warmup_steps=300,
    weight_decay=0.01,
    logging_dir="./logs_full_political",
    logging_steps=100,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    greater_is_better=True,
    dataloader_pin_memory=False,
    gradient_accumulation_steps=2,  
    save_total_limit=2,
    report_to=None,
)


trainer_full = Trainer(
    model=model_full,
    args=training_args_full,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics_full,
    data_collator=data_collator_full,
)


trainer_full.train()


model_full.save_pretrained("./full_finetune_political_model")
tokenizer_full.save_pretrained("./full_finetune_political_model")
print("Full fine-tuning completed and model saved!")


## Comparison plots

In [None]:
lora_data = {
    'step': [500, 1000, 1500, 2000, 2500, 3000],
    'training_loss': [0.349900, 0.169400, 0.154800, 0.142300, 0.133400, 0.135500],
    'validation_loss': [0.161679, 0.143583, 0.143701, 0.145310, 0.138031, 0.138588],
    'accuracy': [0.932829, 0.945624, 0.943879, 0.942425, 0.950567, 0.948822],
    'f1': [0.933017, 0.945274, 0.944018, 0.942884, 0.950432, 0.948718]
}

full_data = {
    'step': [500, 1000, 1500, 2000, 2500, 3000],
    'training_loss': [0.191400, 0.166200, 0.105700, 0.100600, 0.026700, 0.041700],
    'validation_loss': [0.161104, 0.143089, 0.178702, 0.180091, 0.211020, 0.203565],
    'accuracy': [0.944461, 0.947659, 0.946787, 0.949113, 0.955801, 0.955220],
    'f1': [0.944144, 0.948093, 0.947299, 0.949618, 0.955525, 0.955144]
}

lora_df = pd.DataFrame(lora_data)
full_df = pd.DataFrame(full_data)

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('LoRA vs Full Fine-tuning: Complete Training Analysis', fontsize=16, fontweight='bold')

# Plot 1Training Loss Comparison
axes[0, 0].plot(lora_df['step'], lora_df['training_loss'], 'b-o', label='LoRA Training', linewidth=2.5, markersize=6)
axes[0, 0].plot(full_df['step'], full_df['training_loss'], 'r-s', label='Full FT Training', linewidth=2.5, markersize=6)
axes[0, 0].set_xlabel('Training Steps')
axes[0, 0].set_ylabel('Training Loss')
axes[0, 0].set_title('Training Loss Comparison')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2 Validation Loss Comparison (Main Focus)
axes[0, 1].plot(lora_df['step'], lora_df['validation_loss'], 'b--o', label='LoRA Validation', linewidth=3, markersize=8)
axes[0, 1].plot(full_df['step'], full_df['validation_loss'], 'r--s', label='Full FT Validation', linewidth=3, markersize=8)
axes[0, 1].set_xlabel('Training Steps')
axes[0, 1].set_ylabel('Validation Loss')
axes[0, 1].set_title('Validation Loss Comparison', fontweight='bold')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot 3 Combined Training & Validation
axes[0, 2].plot(lora_df['step'], lora_df['training_loss'], 'b-', label='LoRA Training', linewidth=2)
axes[0, 2].plot(lora_df['step'], lora_df['validation_loss'], 'b--', label='LoRA Validation', linewidth=2, alpha=0.8)
axes[0, 2].plot(full_df['step'], full_df['training_loss'], 'r-', label='Full FT Training', linewidth=2)
axes[0, 2].plot(full_df['step'], full_df['validation_loss'], 'r--', label='Full FT Validation', linewidth=2, alpha=0.8)
axes[0, 2].set_xlabel('Training Steps')
axes[0, 2].set_ylabel('Loss')
axes[0, 2].set_title('Training vs Validation Loss')
axes[0, 2].legend()
axes[0, 2].grid(True, alpha=0.3)

# Plot 4 Accuracy Comparison
axes[1, 0].plot(lora_df['step'], lora_df['accuracy'], 'b-o', label='LoRA Accuracy', linewidth=2.5, markersize=6)
axes[1, 0].plot(full_df['step'], full_df['accuracy'], 'r-s', label='Full FT Accuracy', linewidth=2.5, markersize=6)
axes[1, 0].set_xlabel('Training Steps')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].set_title('Accuracy Progression')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Plot 5 F1 Score Comparison
axes[1, 1].plot(lora_df['step'], lora_df['f1'], 'b-o', label='LoRA F1', linewidth=2.5, markersize=6)
axes[1, 1].plot(full_df['step'], full_df['f1'], 'r-s', label='Full FT F1', linewidth=2.5, markersize=6)
axes[1, 1].set_xlabel('Training Steps')
axes[1, 1].set_ylabel('F1 Score')
axes[1, 1].set_title('F1 Score Progression')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

# Plot 6 Overfitting Analysis (Train-Val Gap)
lora_gap = lora_df['training_loss'] - lora_df['validation_loss']
full_gap = full_df['training_loss'] - full_df['validation_loss']

axes[1, 2].plot(lora_df['step'], lora_gap, 'b-o', label='LoRA Gap', linewidth=2.5, markersize=6)
axes[1, 2].plot(full_df['step'], full_gap, 'r-s', label='Full FT Gap', linewidth=2.5, markersize=6)
axes[1, 2].axhline(y=0, color='gray', linestyle=':', alpha=0.7)
axes[1, 2].set_xlabel('Training Steps')
axes[1, 2].set_ylabel('Training - Validation Loss')
axes[1, 2].set_title('Overfitting Analysis (Gap)')
axes[1, 2].legend()
axes[1, 2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


## Prediction models

In [None]:
base_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

lora_base_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
lora_model = PeftModel.from_pretrained(lora_base_model, "./final_lora_model")
lora_tokenizer = AutoTokenizer.from_pretrained("./final_lora_model")

full_model = AutoModelForSequenceClassification.from_pretrained("./full_finetune_political_model")
full_tokenizer = AutoTokenizer.from_pretrained("./full_finetune_political_model")

In [None]:
if lora_tokenizer.pad_token is None:
    lora_tokenizer.pad_token = lora_tokenizer.eos_token
if full_tokenizer.pad_token is None:
    full_tokenizer.pad_token = full_tokenizer.eos_token


def predict_with_lora(text):
    inputs = lora_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    
    with torch.no_grad():
        outputs = lora_model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    
    predicted_class = torch.argmax(predictions, dim=-1).item()
    confidence = predictions[0][predicted_class].item()
    
    label_map = {0: "negative", 1: "positive"}
    predicted_label = label_map[predicted_class]
    
    return predicted_label, confidence

def predict_with_full(text):
    inputs = full_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    
    with torch.no_grad():
        outputs = full_model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    
    predicted_class = torch.argmax(predictions, dim=-1).item()
    confidence = predictions[0][predicted_class].item()
    
    label_map = {0: "negative", 1: "positive"}
    predicted_label = label_map[predicted_class]
    
    return predicted_label, confidence

def predict_with_base_model(text):
    base_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
    base_model_clean = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
    
    if base_tokenizer.pad_token is None:
        base_tokenizer.pad_token = base_tokenizer.eos_token
    
    inputs = base_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    
    with torch.no_grad():
        outputs = base_model_clean(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    
    predicted_class = torch.argmax(predictions, dim=-1).item()
    confidence = predictions[0][predicted_class].item()
    
    label_map = {0: "negative", 1: "positive"}
    predicted_label = label_map[predicted_class]
    
    return predicted_label, confidence

## Evaluation against test set

In [None]:
def evaluate_model_on_test_set(model, tokenizer, test_dataset, model_name):

    predictions = []
    true_labels = []
    confidences = []
    
    # Predict on test set
    for i, example in enumerate(test_dataset):
        text = tokenizer.decode(example['input_ids'], skip_special_tokens=True)
        true_label = example['labels']
        
        # Get prediction
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        
        predicted_class = torch.argmax(probs, dim=-1).item()
        confidence = probs[0][predicted_class].item()
        
        predictions.append(predicted_class)
        true_labels.append(true_label)
        confidences.append(confidence)

    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')

    
    # Detailed results
    print(f"{model_name} TEST RESULTS:")
    print(f"Accuracy:           {accuracy:.4f}")
    print(f"F1 Score (weighted): {f1:.4f}")
    print(f"Precision (weighted): {precision:.4f}")
    print(f"Recall (weighted):    {recall:.4f}")
    print(f"Average Confidence:   {np.mean(confidences):.4f}")
    
    return {
        'model_name': model_name,
        'accuracy': accuracy,
        'f1_weighted': f1,
        'precision': precision,
        'recall': recall,
        'predictions': predictions,
        'true_labels': true_labels,
        'confidences': confidences,
    }

base_model_eval = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
base_tokenizer_eval = AutoTokenizer.from_pretrained("distilbert-base-uncased")

if base_tokenizer_eval.pad_token is None:
    base_tokenizer_eval.pad_token = base_tokenizer_eval.eos_token


base_results = evaluate_model_on_test_set(base_model_eval, base_tokenizer_eval, test_dataset, "Base Model")

lora_results = evaluate_model_on_test_set(lora_model, lora_tokenizer, test_dataset, "LoRA")

full_results = evaluate_model_on_test_set(full_model, full_tokenizer, test_dataset, "Full Fine-tuned")

comparison_metrics = ['accuracy', 'f1_weighted', 'precision', 'recall']

## test against sample sentences

In [None]:
test_texts = [
    "I am really struggling to support the current president after his scandal", #neg
    "I love our Mayor, he has really cleaned up the streets", #pos
    "Damn, why on earth did income tax rise again", #neg
    "This year our economy has done a 180 and employment rates are up again", #pos
    "Trust the fool at the top to mess up the diplomatic relationships", #neg
    "I have high hopes for President John", #pos
    "If I were Prime Minister our economy would not be down", #neg
    "The tax relief really came in handy, i am strapped for cash"
]

In [None]:
def testpred(test_texts):

    lora_avg = 0
    full_avg = 0
    base_avg = 0
    ntext = len(test_texts)

    for i, text in enumerate(test_texts, 1):
        lora_pred, lora_conf = predict_with_lora(text)
        full_pred, full_conf = predict_with_full(text)
        base_pred, base_conf = predict_with_base_model(text)
        
        lora_avg += lora_conf
        full_avg += full_conf
        base_avg += base_conf
        
        print(f"Text: {text}")
        print(f"LoRA Model:      {lora_pred} (conf: {lora_conf})")
        print(f"Full Fine-tuned: {full_pred} (conf: {full_conf})")
        print(f"Base Model:      {base_pred} (conf: {base_conf})")

    print(f"LoRA average confidence: {lora_avg/ntext}")
    print(f"Full average confidence: {full_avg/ntext}")
    print(f"Base average confidence: {base_avg/ntext}")



In [None]:
testpred(test_texts)

## Sarcasm evaluation

In [None]:
s_texts = [
    "Oh sure, raising taxes will definitely make everyone rich overnight,brilliant plan!", 
    "Because nothing says progress like arguing about the same policy for the tenth year in a row.", 
    "Great idea to cut education funding, who needs smart voters anyway?",
    "Of course, banning debates will totally improve democracy. Genius move!",
]

In [None]:
testpred(s_texts)