<a href="https://colab.research.google.com/github/Aastha031295/FineTuning/blob/main/ft_LLAMA2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

Import all the required libraries

In [3]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [4]:
# 1. Load the IMDB dataset from Hugging Face Hub
dataset = load_dataset("stanfordnlp/imdb")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [5]:
from transformers import BertModel, BertConfig

# Load pre-trained BERT model
config = BertConfig.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased', config=config)

# Define LoRA parameters
rank = 1  # Low-rank
adaptation_modules = ['query', 'key', 'value']  # Typical layers in a transformer to adapt

# Modify the model to add LoRA adaptation
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        if any(n in name for n in adaptation_modules):
            d_in, d_out = module.weight.shape
            # Add LoRA matrices
            module.lora_A = torch.nn.Parameter(torch.randn(d_out, rank) * 0.01)
            module.lora_B = torch.nn.Parameter(torch.randn(rank, d_in) * 0.01)
            module.lora_scaling = torch.sqrt(torch.tensor(d_in * rank))
            # Override the forward pass to include the LoRA adaptation
            original_forward = module.forward
            def lora_forward(x):
                return original_forward(x) + module.lora_A @ module.lora_B @ x / module.lora_scaling
            module.forward = lora_forward




config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [7]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=16,  # batch size per device during training
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
)

In [9]:
from transformers import BertForSequenceClassification, BertConfig

# Load pre-trained BERT model for sequence classification
config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)  # Assuming binary classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', config=config)

# ... (rest of your code for LoRA adaptation remains the same)

# Update the Trainer initialization
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test']
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
trainer.train()



Step,Training Loss
10,0.7147
20,0.6861
30,0.6875
40,0.709
50,0.6838
60,0.6564
70,0.6529
80,0.6067
90,0.5623
100,0.4741


TrainOutput(global_step=4689, training_loss=0.16783448955726207, metrics={'train_runtime': 7309.067, 'train_samples_per_second': 10.261, 'train_steps_per_second': 0.642, 'total_flos': 1.9733329152e+16, 'train_loss': 0.16783448955726207, 'epoch': 3.0})