In [3]:
!pip install transformers datasets accelerate peft trl einops

  pid, fd = os.forkpty()




In [4]:
!pip install -U bitsandbytes



In [2]:
import os
import torch
import time
from datasets import load_dataset, Dataset
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel, get_peft_model
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    pipeline,
    logging,
    Trainer
)
from accelerate import Accelerator
from trl import SFTTrainer

In [14]:
# Load SNLI dataset
dataset = load_dataset("snli")

# Select indices for sampling
train_indices = list(range(0, len(dataset["train"]), 550))[:1000]
validation_indices = list(range(0, len(dataset["validation"]), 100))[:100]
test_indices = list(range(0, len(dataset["test"]), 100))[:100]

# Subset datasets using the selected indices
train_data = dataset["train"].select(train_indices)
validation_data = dataset["validation"].select(validation_indices)
test_data = dataset["test"].select(test_indices)


In [6]:
# Model
base_model = "microsoft/phi-2"

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side="right"

In [12]:
# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)


In [None]:

# # Load base moodel
# model = AutoModelForCausalLM.from_pretrained(
#     base_model,
#     quantization_config=bnb_config,
#     trust_remote_code=True,
#     low_cpu_mem_usage=True,
#     device_map={"": 0},
#     revision="refs/pr/23" #the main version of Phi-2 doesn’t support gradient checkpointing (while training this model)
# )

# model.config.use_cache = False
# model.config.pretraining_tp = 1
# model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)



In [9]:
cls_model = AutoModelForSequenceClassification.from_pretrained(base_model,
    quantization_config=bnb_config,
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    device_map={"": 0},
    num_labels=3)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of PhiForSequenceClassification were not initialized from the model checkpoint at microsoft/phi-2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
# cls_model.config.use_cache = False
cls_model.config.pretraining_tp = 1
cls_model = prepare_model_for_kbit_training(cls_model, use_gradient_checkpointing=True)

# Define LoRA config
lora_config = LoraConfig(
    r=8,  # rank
    lora_alpha=16,  # alpha
    lora_dropout=0.1,  # dropout
    task_type="SEQ_CLS"  # task type
)

cls_model = get_peft_model(cls_model, lora_config)

In [11]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

In [13]:
print("Unique labels in training data:", set(test_data['label']))


Unique labels in training data: {0, 1, 2}


In [15]:
# Remove entries with label -1 from the validation dataset
valid_labels = [0, 1, 2]

# Filter the validation dataset
filtered_val_data = validation_data.filter(lambda x: x['label'] in valid_labels)

print("Unique labels in filtered validation data:", set(filtered_val_data['label']))


Unique labels in filtered validation data: {0, 1, 2}


In [16]:
def preprocess_function(examples):
    labels = examples['label']
    assert all(label in [0, 1, 2] for label in labels), "Labels must be in the range of 0 to 2 for 3-class classification."
    return tokenizer(
        examples['premise'], 
        examples['hypothesis'], 
        truncation=True, 
        padding='max_length',  
        max_length=256         
    )

train_encodings = preprocess_function(train_data)
validation_encodings = preprocess_function(filtered_val_data)
test_encodings = preprocess_function(test_data)

class NliDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = NliDataset(train_encodings, train_data['label'])
validation_dataset = NliDataset(validation_encodings, filtered_val_data['label'])
test_dataset = NliDataset(test_encodings, test_data['label'])


In [17]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',  
    save_strategy='epoch',        
    num_train_epochs=5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=5,
    load_best_model_at_end=True,
)



In [18]:
cls_model.config.pad_token_id = tokenizer.pad_token_id

In [None]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [19]:
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import accuracy_score

In [20]:
trainer = Trainer(
    model=cls_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    compute_metrics=lambda p: {'accuracy': accuracy_score(np.argmax(p.predictions, axis=1), p.label_ids)}
)

In [None]:
start_time = time.time()
trainer.train()
end_time = time.time()

# Save the model 
cls_model.save_pretrained('/kaggle/working/fine_tuned_model')
print("Time taken to fine-tune the model:", end_time - start_time)

**EVALUATE**

In [58]:
eval_tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
eval_tokenizer.pad_token= eval_tokenizer.eos_token
eval_tokenizer.padding_side="right"

In [29]:
final_model = PeftModel.from_pretrained(cls_model,"/kaggle/input/results/pytorch/default/1/fine_tuned_model",is_trainable=False)

In [41]:
# Trainer for the pretrained model
pretrained_trainer = Trainer(
    model=cls_model,
    args=TrainingArguments(
        output_dir='./pretrained_results',
        per_device_eval_batch_size=16,
        do_train=False,
        do_eval=True,
    ),
    eval_dataset=test_dataset,
    compute_metrics=lambda p: {'accuracy': accuracy_score(np.argmax(p.predictions, axis=1), p.label_ids)}
)

pretrained_eval_results = pretrained_trainer.evaluate()
pretrained_accuracy = pretrained_eval_results["eval_accuracy"]
print("Pretrained Model Accuracy on Test Set:", pretrained_accuracy)

# Evaluate the fine-tuned model on the test dataset
fine_tuned_trainer = Trainer(
    model=final_model,
    args=TrainingArguments(
        output_dir='./fine_tuned_results',
        per_device_eval_batch_size=16,
        do_train=False,
        do_eval=True,
    ),
    eval_dataset=test_dataset,
    compute_metrics=lambda p: {'accuracy': accuracy_score(np.argmax(p.predictions, axis=1), p.label_ids)}
)

fine_tuned_eval_results = fine_tuned_trainer.evaluate()
fine_tuned_accuracy = fine_tuned_eval_results["eval_accuracy"]

print(f"Accuracy Comparison:\n- Pretrained Model: {pretrained_accuracy:.2f}\n- Fine-tuned Model: {fine_tuned_accuracy:.2f}")


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Pretrained Model Accuracy on Test Set: 0.32


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Accuracy Comparison:
- Pretrained Model: 0.32
- Fine-tuned Model: 0.59


In [43]:
# Total parameters in the model
total_params = sum(p.numel() for p in final_model.parameters())
# Count trainable parameters
trainable_params = sum(p.numel() for p in final_model.parameters() if p.requires_grad)

print(f"Total Parameters: {total_params}, Trainable Parameters: {trainable_params}")


Total Parameters: 1399459840, Trainable Parameters: 7680
