In [1]:
!pip install transformers datasets accelerate peft trl einops
!pip install -U bitsandbytes

Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Collecting trl
  Downloading trl-0.12.0-py3-none-any.whl.metadata (10 kB)
Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Collecting transformers
  Downloading transformers-4.46.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Downloading peft-0.13.2-py3-none-any.whl (320 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.7/320.7 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading trl-0.12.0-py3-none-any.whl (310 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.2/310.2 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-4.46.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m88.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m


In [7]:
import os
import torch
import time
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel, get_peft_model
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)
from sklearn.metrics import accuracy_score
import numpy as np
from accelerate import Accelerator
from trl import SFTTrainer

In [3]:
# import wandb
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# secret_value_0 = user_secrets.get_secret("wandb_api_key")
# wandb.login(key=secret_value_0)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [8]:
import os
# disable Weights and Biases
os.environ['WANDB_DISABLED']="true"

In [9]:
# Load SNLI dataset
dataset = load_dataset("snli")

# Select indices for sampling
train_indices = list(range(0, len(dataset["train"]), 550))[:1000]
validation_indices = list(range(0, len(dataset["validation"]), 100))[:100]
test_indices = list(range(0, len(dataset["test"]), 100))[:100]

# Subset datasets using the selected indices
train_data = dataset["train"].select(train_indices)
validation_data = dataset["validation"].select(validation_indices)
test_data = dataset["test"].select(test_indices)

# Remove entries with label -1 from the validation dataset
valid_labels = [0, 1, 2]

# Filter the validation dataset
validation_data = validation_data.filter(lambda x: x['label'] in valid_labels)

# Model and tokenizer
base_model = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    device_map={"": 0}
)

model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=8,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
# Prompt formatting function
def format_prompt(premise, hypothesis, label=None):
    label_map = {0: "entailment", 1: "neutral", 2: "contradiction"}
    if label is not None:
        # During training, include the label
        return f"Premise: {premise}\nHypothesis: {hypothesis}\nLabel: {label_map[label]}"
    else:
        # During inference, we leave out the label
        return f"Premise: {premise}\nHypothesis: {hypothesis}\nLabel:"

# Preprocess the dataset with tokenization and create input-target pairs
def preprocess_function(examples, tokenizer, is_train=True):
    inputs = [format_prompt(p, h, label if is_train else None) for p, h, label in zip(examples['premise'], examples['hypothesis'], examples['label'])]
    model_inputs = tokenizer(inputs, truncation=True, padding='max_length', max_length=150)

    if is_train:
        labels = model_inputs['input_ids'].copy() 
        model_inputs['labels'] = labels
    else:
        model_inputs['labels'] = model_inputs['input_ids'].copy()
    return model_inputs

In [None]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
train_encodings = preprocess_function(train_data, tokenizer, is_train=True)
validation_encodings = preprocess_function(validation_data, tokenizer, is_train=False)
test_encodings = preprocess_function(test_data, tokenizer, is_train=False)

# Custom Dataset Class
class NLIDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        # Convert each encoding dictionary item to tensor format
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

# Create train, validation, and test datasets
train_dataset = NLIDataset(train_encodings)
validation_dataset = NLIDataset(validation_encodings)
test_dataset = NLIDataset(test_encodings)

# Training arguments
train_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=5,
    load_best_model_at_end=True,
)

# Custom metric calculation for accuracy
def compute_metrics(predictions, labels):
    pred_labels = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(labels, pred_labels)
    return {"accuracy": accuracy}

# Trainer for fine-tuning
trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    compute_metrics=lambda p: compute_metrics(p.predictions, p.label_ids)
)

# Fine-tune the model
start_time = time.time()
trainer.train()
end_time = time.time()

# Save the final model
model.save_pretrained('./fine_tuned_model')
print("Time taken to fine-tune the model:", end_time - start_time)


In [None]:
# Load the fine-tuned model for evaluation
fine_tuned_model = PeftModel.from_pretrained(model, './fine_tuned_model', is_trainable=False)

# Total parameters in the model
total_params = sum(p.numel() for p in fine_tuned_model.parameters())
# Count trainable parameters
trainable_params = sum(p.numel() for p in fine_tuned_model.parameters() if p.requires_grad)

print(f"Total Parameters: {total_params}, Trainable Parameters: {trainable_params}")


#evaluation-only TrainingArguments for pretrained and fine-tuned model evaluation
eval_args = TrainingArguments(
    output_dir='./eval_results',
    per_device_eval_batch_size=4,
    do_train=False,               
    do_eval=True,                 
    logging_dir='./eval_logs',    
    report_to=None                
)

# Evaluation trainer for the fine-tuned model
eval_trainer = Trainer(
    model=fine_tuned_model,
    args=eval_args,
    eval_dataset=test_dataset,
    compute_metrics=lambda p: compute_metrics(p.predictions, p.label_ids)
)
eval_results = eval_trainer.evaluate()
print("Fine-tuned Model Accuracy on Test Set:", eval_results["eval_accuracy"])

# Evaluation trainer for the pretrained model
pretrained_model = AutoModelForCausalLM.from_pretrained(base_model)
pretrained_trainer = Trainer(
    model=pretrained_model,
    args=eval_args,
    eval_dataset=test_dataset,
    compute_metrics=lambda p: compute_metrics(p.predictions, p.label_ids)
)

pretrained_eval_results = pretrained_trainer.evaluate()
print("Pretrained Model Accuracy on Test Set:", pretrained_eval_results["eval_accuracy"])

print(f"Accuracy Comparison:\n- Pretrained Model: {pretrained_eval_results['eval_accuracy']}\n- Fine-tuned Model: {eval_results['eval_accuracy']}")
