# XLNet Finetuning with LoRA for Political Stance Classification

This notebook demonstrates how to finetune XLNet using LoRA (Low-Rank Adaptation) with hyperparameter tuning for classifying political stances (left, centre, right).

## 1. Install Required Packages

In [None]:
# !pip install transformers datasets peft accelerate bitsandbytes scikit-learn pandas numpy torch optuna matplotlib seaborn -q

## 2. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import torch
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
import optuna
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from bitsandbytes import BitsAndBytesConfig
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Device: {torch.cuda.get_device_name(0)}")

## 3. Load and Explore the Dataset

In [None]:
# Load the dataset
df = pd.read_csv('/content/sentence_dataset.csv')

print(f"Dataset shape: {df.shape}")
print(f"\nFirst few rows:")
print(df.head())
print(f"\nLabel distribution:")
print(df['label'].value_counts())
print(f"\nDataset info:")
print(df.info())

In [None]:
# Visualize label distribution
plt.figure(figsize=(10, 6))
df['label'].value_counts().plot(kind='bar', color=['#1f77b4', '#ff7f0e', '#2ca02c'])
plt.title('Distribution of Political Stances', fontsize=14, fontweight='bold')
plt.xlabel('Stance', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.xticks(rotation=0)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 4. Prepare Data for Training

In [None]:
# Create label mappings
label2id = {'left': 0, 'centre': 1, 'right': 2}
id2label = {0: 'left', 1: 'centre', 2: 'right'}

# Map labels to IDs
df['label_id'] = df['label'].map(label2id)

# Split the data into train, validation, and test sets
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df['label_id'])
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df['label_id'])

print(f"Training set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")
print(f"Test set size: {len(test_df)}")

# Convert to HuggingFace datasets
train_dataset = Dataset.from_pandas(train_df[['text', 'label_id']].rename(columns={'label_id': 'label'}))
val_dataset = Dataset.from_pandas(val_df[['text', 'label_id']].rename(columns={'label_id': 'label'}))
test_dataset = Dataset.from_pandas(test_df[['text', 'label_id']].rename(columns={'label_id': 'label'}))

dataset_dict = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset,
    'test': test_dataset
})

print(f"\nDataset structure:")
print(dataset_dict)

## 5. Load XLNet and Tokenizer

In [None]:
model_name = "xlnet-base-cased"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

print(f"Loaded tokenizer: {model_name}")
print(f"Tokenizer vocab size: {len(tokenizer)}")

# Tokenize datasets
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=512)

tokenized_datasets = dataset_dict.map(tokenize_function, batched=True)
print("\nTokenized datasets:")
print(tokenized_datasets)

## 6. Define Evaluation Metrics

In [None]:
def compute_metrics(eval_pred):
    """Compute evaluation metrics for the model."""
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    # Calculate metrics
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

## 7. Hyperparameter Tuning with Optuna

In [None]:
def model_init(trial=None):
    """Initialize model with LoRA configuration."""
    # Load base model
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=3,
        id2label=id2label,
        label2id=label2id,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
    )

    # LoRA configuration
    if trial is not None:
        lora_r = trial.suggest_int('lora_r', 8, 64, step=8)
        lora_alpha = trial.suggest_int('lora_alpha', 16, 128, step=16)
        lora_dropout = trial.suggest_float('lora_dropout', 0.05, 0.3)
    else:
        lora_r = 16
        lora_alpha = 32
        lora_dropout = 0.1

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        target_modules=["layer_1", "layer_2"],  
        bias="none"
    )

    # Apply LoRA
    model = get_peft_model(model, peft_config)

    # Print trainable parameters
    model.print_trainable_parameters()

    return model

In [None]:
def objective(trial):
    """Optuna objective function for hyperparameter tuning."""

    # Suggest hyperparameters
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 5e-4, log=True)
    num_train_epochs = trial.suggest_int('num_train_epochs', 3, 8)
    per_device_train_batch_size = trial.suggest_categorical('per_device_train_batch_size', [8, 16, 32])
    weight_decay = trial.suggest_float('weight_decay', 0.0, 0.3)
    warmup_ratio = trial.suggest_float('warmup_ratio', 0.0, 0.2)

    # Training arguments
    training_args = TrainingArguments(
        output_dir=f'./results/trial_{trial.number}',
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=learning_rate,
        per_device_train_batch_size=per_device_train_batch_size,
        per_device_eval_batch_size=per_device_train_batch_size,
        num_train_epochs=num_train_epochs,
        weight_decay=weight_decay,
        warmup_ratio=warmup_ratio,
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        greater_is_better=True,
        logging_dir=f'./logs/trial_{trial.number}',
        logging_steps=10,
        save_total_limit=1,
        fp16=False,
        bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
        report_to="none",
        seed=42,
        max_grad_norm=1.0
    )

    # Initialize trainer
    trainer = Trainer(
        model_init=lambda trial_inner=trial: model_init(trial_inner),
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['validation'],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
    )

    # Train the model
    trainer.train()

    # Evaluate and return the metric to optimize
    eval_results = trainer.evaluate()

    return eval_results['eval_f1']

In [None]:
study = optuna.create_study(direction='maximize', study_name='xlnet_lora_tuning')

n_trials = 10  
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

print("Hyperparameter Tuning Complete!")

print(f"\nBest trial: {study.best_trial.number}")
print(f"Best F1 score: {study.best_value:.4f}")
print(f"\nBest hyperparameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

In [None]:
# Visualize optimization history
import optuna.visualization as vis

# Plot optimization history
fig1 = vis.plot_optimization_history(study)
fig1.update_layout(title_text='XLNet Optimization History', title_font_size=16)
fig1.show()

# Plot parameter importances
fig2 = vis.plot_param_importances(study)
fig2.update_layout(title_text='Parameter Importances', title_font_size=16)
fig2.show()

# Plot parallel coordinate plot
fig3 = vis.plot_parallel_coordinate(study)
fig3.update_layout(title_text='Parallel Coordinate Plot', title_font_size=16)
fig3.show()

# Plot slice plot for each hyperparameter
fig4 = vis.plot_slice(study)
fig4.update_layout(title_text='Hyperparameter Slice Plot', title_font_size=16)
fig4.show()

## 8. Train Final Model with Best Hyperparameters

In [None]:
# Load model with best hyperparameters
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    id2label=id2label,
    label2id=label2id,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)

# Use best hyperparameters from Optuna 
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=study.best_params.get('lora_r', 16),
    lora_alpha=study.best_params.get('lora_alpha', 32),
    lora_dropout=study.best_params.get('lora_dropout', 0.1),
    target_modules=["layer_1", "layer_2"], 
    bias="none"
)

final_model = get_peft_model(model, peft_config)
final_model.print_trainable_parameters()

# Training arguments with optimal hyperparameters from Optuna
final_training_args = TrainingArguments(
    output_dir='./results/final_model',
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=study.best_params['learning_rate'],
    per_device_train_batch_size=study.best_params['per_device_train_batch_size'],
    per_device_eval_batch_size=study.best_params['per_device_train_batch_size'],
    num_train_epochs=study.best_params['num_train_epochs'],
    weight_decay=study.best_params['weight_decay'],
    warmup_ratio=study.best_params['warmup_ratio'],
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    logging_dir='./logs/final_model',
    logging_steps=10,
    save_total_limit=2,
    fp16=False,
    bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
    report_to="none",
    seed=42,
    max_grad_norm=1.0
)

# Initialize final trainer
final_trainer = Trainer(
    model=final_model,
    args=final_training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

print("Training final XLNet model with best hyperparameters...")
final_trainer.train()

## 9. Evaluate on Test Set

In [None]:
test_results = final_trainer.evaluate(tokenized_datasets['test'])

print("\nTest Set Results:")
for key, value in test_results.items():
    if key.startswith('eval_'):
        metric_name = key.replace('eval_', '').upper()
        print(f"{metric_name}: {value:.4f}")

# Get predictions for detailed analysis
predictions_output = final_trainer.predict(tokenized_datasets['test'])
pred_labels = np.argmax(predictions_output.predictions, axis=1)
true_labels = predictions_output.label_ids

In [None]:
# Classification report
print("\nDetailed Classification Report:")
print(classification_report(
    true_labels,
    pred_labels,
    target_names=['left', 'centre', 'right'],
    digits=4
))

In [None]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(true_labels, pred_labels)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['left', 'centre', 'right'],
            yticklabels=['left', 'centre', 'right'],
            cbar_kws={'label': 'Count'})
plt.title('XLNet - Confusion Matrix (Test Set)', fontsize=16, fontweight='bold', pad=20)
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.show()

## 9.5 Detailed Error Analysis

In [None]:
def analyze_directional_errors(true_labels, predictions, label2id):
    """Analyze political spectrum directional errors"""
    spectrum_order = ['left', 'centre', 'right']
    id_to_position = {label2id[l]: i for i, l in enumerate(spectrum_order)}

    errors = []
    for true, pred in zip(true_labels, predictions):
        if true != pred:
            true_pos = id_to_position[true]
            pred_pos = id_to_position[pred]
            distance = abs(true_pos - pred_pos)
            errors.append(distance)

    if errors:
        avg_error_distance = np.mean(errors)
        opposite_errors = sum(1 for d in errors if d >= 2)

        print(f"\nDIRECTIONAL ERROR ANALYSIS:")
        print(f"  Average error distance: {avg_error_distance:.2f}")
        print(f"  (1.0 = adjacent class, 4.0 = opposite end)")
        print(f"  Opposite-end errors: {opposite_errors}/{len(errors)} ({opposite_errors/len(errors)*100:.1f}%)")
        print(f"  Adjacent errors: {sum(1 for d in errors if d == 1)}/{len(errors)} ({sum(1 for d in errors if d == 1)/len(errors)*100:.1f}%)")
    else:
        print("\nPerfect predictions - no errors!")

# Run the directional error analysis
print("DIRECTIONAL ERROR ANALYSIS")
analyze_directional_errors(true_labels, pred_labels, label2id)

# Analyze high-confidence mistakes
print("HIGH-CONFIDENCE MISTAKES ANALYSIS")
probs = torch.nn.functional.softmax(torch.tensor(predictions_output.predictions), dim=-1)
confidence = probs.max(dim=1).values.numpy()

# Find mistakes
mistakes_mask = pred_labels != true_labels
high_conf_mistakes_mask = mistakes_mask & (confidence > 0.8)

print(f"\nTotal mistakes: {mistakes_mask.sum()}")
print(f"High-confidence mistakes (>80%): {high_conf_mistakes_mask.sum()}")

if high_conf_mistakes_mask.sum() > 0:
    print(f"(These are worth manually inspecting)\n")

    # Show top 5 highest-confidence mistakes
    high_conf_indices = np.where(high_conf_mistakes_mask)[0]
    sorted_indices = high_conf_indices[np.argsort(-confidence[high_conf_indices])][:5]

    print(f"Top 5 highest-confidence mistakes:")
    for rank, idx in enumerate(sorted_indices, 1):
        true_label = id2label[true_labels[idx]]
        pred_label = id2label[pred_labels[idx]]
        conf = confidence[idx]
        text = test_df.iloc[idx]['text']
        text_preview = text[:100] + "..." if len(text) > 100 else text

        print(f"\n  {rank}. Confidence: {conf:.2%}")
        print(f"     True: {true_label.upper()} | Predicted: {pred_label.upper()}")
        print(f"     Text: {text_preview}")



## 10. Save the Model

In [None]:
# Save the fine-tuned model and tokenizer
output_dir = "./xlnet_model"

final_trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"Model and tokenizer saved to: {output_dir}")


## 11. Test the Model with Sample Predictions

In [None]:
def predict_stance(text, model, tokenizer):
    """Predict political stance for a given text."""
    # Tokenize input
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Move to same device as model
    device = next(model.parameters()).device
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Get prediction
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=-1).item()
        confidence = predictions[0][predicted_class].item()

    return id2label[predicted_class], confidence, predictions[0].cpu().numpy()

# Test with sample sentences
sample_texts = [
    "We need stronger social safety nets to protect vulnerable citizens and ensure healthcare for all.",
    "Lower taxes and reduced government intervention will boost economic growth and individual freedom.",
    "A balanced approach considering both economic growth and social welfare is essential.",
    "Climate change requires immediate action with strict environmental regulations.",
    "National security and defense spending should be our top priority.",
]

print("Sample Predictions:")

for i, text in enumerate(sample_texts, 1):
    predicted_label, confidence, probabilities = predict_stance(text, final_model, tokenizer)

    print(f"\n{i}. Text: {text[:100]}...")
    print(f"   Predicted Stance: {predicted_label.upper()} (Confidence: {confidence:.2%})")
    print(f"   Probabilities: Left={probabilities[0]:.2%}, Centre={probabilities[1]:.2%}, Right={probabilities[2]:.2%}")