In [2]:
import time
import torch
from transformers import AutoModelForSequenceClassification, BertTokenizer, AdamW
from peft import get_peft_model, LoraConfig
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report
import pandas as pd

# Paths to saved models and tokenizers
bert_tokenizer_path = '../Models/bert_tokenizer'

# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Load datasets function
def load_test_datasets():
    neutral_test_df = pd.read_csv('../Data/processed/neutral_test.csv')
    racist_test_df = pd.read_csv('../Data/processed/racist_test.csv')
    anti_test_df = pd.read_csv('../Data/processed/anti_test.csv')

    # Assign labels
    anti_test_df['label'] = 0  # Label for antiracist
    neutral_test_df['label'] = 1  # Label for neutral
    racist_test_df['label'] = 2  # Label for racist

    # Combine test datasets and shuffle
    test_df = pd.concat([anti_test_df, neutral_test_df, racist_test_df], ignore_index = True)
    test_df = test_df.sample(frac = 1).reset_index(drop = True)

    return test_df


# Tokenization function
def tokenize_data(sentences, tokenizer, max_length = 128):
    encoded_inputs = tokenizer(
        sentences,
        return_tensors = 'pt',
        truncation = True,
        padding = True,
        max_length = max_length
    )
    return encoded_inputs['input_ids'], encoded_inputs['attention_mask']


# Model evaluation function
def evaluate_model(model, dataloader):
    model.eval()
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids = input_ids, attention_mask = attention_mask)
            logits = outputs.logits

            preds = torch.argmax(logits, dim = 1).cpu().numpy()
            predictions.extend(preds)
            true_labels.extend(labels.cpu().numpy())

    report = classification_report(true_labels, predictions, output_dict = True)
    return report


# Training function with time tracking
def train_model(model, dataloader, learning_rate = 2e-5, epochs = 3):
    optimizer = AdamW(model.parameters(), lr = learning_rate)
    start_time = time.time()

    model.train()
    for epoch in range(epochs):
        for batch in dataloader:
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids = input_ids, attention_mask = attention_mask, labels = labels)
            loss = outputs.loss

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

    training_time = time.time() - start_time
    return training_time


# LoRA fine-tuning function with time tracking
def train_with_lora(model, dataloader, learning_rate = 2e-5, epochs = 3):
    # Set up LoRA configuration
    lora_config = LoraConfig(r = 4, lora_alpha = 16, target_modules = ["classifier"], lora_dropout = 0.1)
    model = get_peft_model(model, lora_config)

    optimizer = AdamW(model.parameters(), lr = learning_rate)
    start_time = time.time()

    model.train()
    for epoch in range(epochs):
        for batch in dataloader:
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids = input_ids, attention_mask = attention_mask, labels = labels)
            loss = outputs.loss

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

    training_time = time.time() - start_time
    return training_time


# Load test data and tokenizers
test_df = load_test_datasets()
test_labels = torch.tensor(test_df['label'].values)

# Load BERT model and tokenizer
bert_tokenizer = BertTokenizer.from_pretrained(bert_tokenizer_path)
bert_inputs, bert_masks = tokenize_data(test_df['text'].tolist(), bert_tokenizer)
bert_data = TensorDataset(bert_inputs, bert_masks, test_labels)
bert_dataloader = DataLoader(bert_data, batch_size = 16)

# Full fine-tuning
full_model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels = 3).to(device)
full_training_time = train_model(full_model, bert_dataloader)
full_report = evaluate_model(full_model, bert_dataloader)

print("Full Fine-Tuning Training Time:", full_training_time)
print("Full Fine-Tuning Performance Report:\n", full_report)

# LoRA fine-tuning
lora_model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels = 3).to(device)
lora_training_time = train_with_lora(lora_model, bert_dataloader)
lora_report = evaluate_model(lora_model, bert_dataloader)

print("LoRA-Based Fine-Tuning Training Time:", lora_training_time)
print("LoRA-Based Fine-Tuning Performance Report:\n", lora_report)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LoRA-Based Fine-Tuning Training Time: 537.8623960018158
LoRA-Based Fine-Tuning Performance Report:
 {'0': {'precision': 0.13333333333333333, 'recall': 0.00023738872403560832, 'f1-score': 0.00047393364928909954, 'support': 8425.0}, '1': {'precision': 0.5047472307820438, 'recall': 0.7787843741968645, 'f1-score': 0.61251200161706, 'support': 15564.0}, '2': {'precision': 0.4886428165814878, 'recall': 0.4867999245709975, 'f1-score': 0.48771962969960325, 'support': 10606.0}, 'accuracy': 0.4996675820205232, 'macro avg': {'precision': 0.3755744602322883, 'recall': 0.42194056249729917, 'f1-score': 0.36690185498865074, 'support': 34595.0}, 'weighted avg': {'precision': 0.409358720794575, 'recall': 0.4996675820205232, 'f1-score': 0.42520260375074936, 'support': 34595.0}}


In [5]:
import itertools
import torch
from transformers import AutoModelForSequenceClassification, AdamW
from peft import get_peft_model, LoraConfig
from sklearn.metrics import classification_report
import pandas as pd
import time

# Define grid search parameters
rank_options = [8]  # Possible values for LoRA rank
alpha_options = [32]  # Possible values for LoRA alpha
dropout_options = [0.3]  # Possible values for LoRA dropout

# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define function to train with a specific LoRA configuration
def train_with_lora_config(model, dataloader, r, alpha, dropout, learning_rate=2e-5, epochs=3):
    # Set up LoRA configuration
    lora_config = LoraConfig(r=r, lora_alpha=alpha, target_modules=["classifier"], lora_dropout=dropout)
    model = get_peft_model(model, lora_config)
    
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    start_time = time.time()
    
    model.train()
    for epoch in range(epochs):
        for batch in dataloader:
            input_ids, attention_mask, labels = batch[0].to(device), batch[1].to(device), batch[2].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
    
    training_time = time.time() - start_time
    return model, training_time

# Function to evaluate the model
def evaluate_model(model, dataloader):
    model.eval()
    predictions, true_labels = [], []
    
    with torch.no_grad():
        for batch in dataloader:
            input_ids, attention_mask, labels = batch[0].to(device), batch[1].to(device), batch[2].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            
            preds = torch.argmax(logits, dim=1).cpu().numpy()
            predictions.extend(preds)
            true_labels.extend(labels.cpu().numpy())
    
    report = classification_report(true_labels, predictions, output_dict=True)
    return report

# Perform grid search
results = []
for r, alpha, dropout in itertools.product(rank_options, alpha_options, dropout_options):
    # Load fresh model for each configuration
    model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3).to(device)
    
    # Train and evaluate with the current LoRA configuration
    trained_model, training_time = train_with_lora_config(model, bert_dataloader, r, alpha, dropout)
    report = evaluate_model(trained_model, bert_dataloader)
    
    # Store the results
    results.append({
        'rank': r,
        'alpha': alpha,
        'dropout': dropout,
        'training_time': training_time,
        'accuracy': report['accuracy'],
        'f1_score': report['weighted avg']['f1-score'],
        'classification_report': report
    })

# Print summary of results
for result in results:
    print(f"Rank: {result['rank']}, Alpha: {result['alpha']}, Dropout: {result['dropout']}")
    print(f"Training Time: {result['training_time']}")
    print(f"Accuracy: {result['accuracy']}")
    print(f"F1 Score: {result['f1_score']}")
    print("Classification Report:", result['classification_report'])
    print("="*50)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Rank: 8, Alpha: 32, Dropout: 0.3
Training Time: 537.218136548996
Accuracy: 0.5166642578407284
F1 Score: 0.4411665067107398
Classification Report: {'0': {'precision': 0.5, 'recall': 0.00023738872403560832, 'f1-score': 0.00047455214141653814, 'support': 8425.0}, '1': {'precision': 0.5134343349794928, 'recall': 0.7882292469802108, 'f1-score': 0.6218257387602008, 'support': 15564.0}, '2': {'precision': 0.5238852014583528, 'recall': 0.5283801621723553, 'f1-score': 0.5261230812561611, 'support': 10606.0}, 'accuracy': 0.5166642578407284, 'macro avg': {'precision': 0.5124398454792819, 'recall': 0.43894893262553386, 'f1-score': 0.3828077907192595, 'support': 34595.0}, 'weighted avg': {'precision': 0.5133666262838015, 'recall': 0.5166642578407284, 'f1-score': 0.4411665067107398, 'support': 34595.0}}
