<a href="https://colab.research.google.com/github/Gyanendu01/Udacity-Generative-AI/blob/main/FineTuningProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install datasets
!pip install transformers
!pip install peft
!pip install evaluate
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=f46c232849be35f90f00070142bafb07ebd33507fd649b409e1d97780c48c925
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


# Step 1: Setup and Import Libraries

In [5]:


import numpy as np
import torch
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
from peft import LoraConfig, TaskType, get_peft_model
import evaluate

In [6]:
# Set seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Step 2: Load Dataset

In [7]:
# Using the IMDB dataset for sentiment analysis (binary classification)
dataset = load_dataset("imdb", split=["train[:5000]", "test[:1000]"])
train_dataset, test_dataset = dataset[0], dataset[1]

print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

Train dataset size: 5000
Test dataset size: 1000


# Step 3: Load Tokenizer and Preprocess Data

In [8]:
# We'll use GPT2 for this example
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# GPT2 doesn't have a padding token by default, so we set it
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def preprocess_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=256
    )

# Apply preprocessing
tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_test = test_dataset.map(preprocess_function, batched=True)

# Make sure the dataset has the right format for training
tokenized_train = tokenized_train.rename_column("label", "labels")
tokenized_test = tokenized_test.rename_column("label", "labels")

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

# Step 4: Load Foundation Model

In [9]:
# Load the base model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2  # Binary classification for sentiment analysis
)

# Set pad_token_id in the model config
model.config.pad_token_id = tokenizer.pad_token_id


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Step 5: Evaluate the Foundation Model

In [10]:
# Define evaluation metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

# Set training arguments for evaluation
eval_args = TrainingArguments(
    output_dir="./results/base_model_eval",
    per_device_eval_batch_size=16,
    do_train=False,
    do_eval=True,
    no_cuda=torch.cuda.is_available() == False,  # Use GPU if available
)

# Create trainer for evaluation
base_trainer = Trainer(
    model=model,
    args=eval_args,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)



  base_trainer = Trainer(


In [11]:
# Evaluate base model
print("Evaluating base model...")
base_model_metrics = base_trainer.evaluate()
print(f"Base model metrics: {base_model_metrics}")


Evaluating base model...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mgyanenduboitai2004[0m ([33mgyanenduboitai2004-soa-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Base model metrics: {'eval_loss': 0.022136662155389786, 'eval_model_preparation_time': 0.0024, 'eval_accuracy': 0.996, 'eval_runtime': 7.5459, 'eval_samples_per_second': 132.522, 'eval_steps_per_second': 8.349}


# Step 6: Create and Apply LoRA Configuration

In [12]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=8,  # Rank dimension
    lora_alpha=32,  # Alpha parameter for LoRA
    target_modules=["c_attn", "c_proj"],  # Layers to apply LoRA to for GPT-2
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS  # Sequence classification task
)

# Create PEFT model
peft_model = get_peft_model(model, lora_config)
print(f"Trainable parameters in LoRA model: {peft_model.print_trainable_parameters()}")


trainable params: 812,544 || all params: 125,253,888 || trainable%: 0.6487
Trainable parameters in LoRA model: None




# Step 7: Fine-tune with LoRA

In [13]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Current device: {torch.cuda.current_device()}")
print(f"Device name: {torch.cuda.get_device_name(0)}")

CUDA available: True
Current device: 0
Device name: Tesla T4


In [14]:
!pip install datasets
!pip install datasets transformers peft torch evaluate



In [15]:
# Training arguments
# Minimal training arguments
from transformers import TrainingArguments, Trainer # Import Trainer here
from transformers.integrations import TensorBoardCallback
from transformers import TrainerCallback
from datasets import load_dataset
training_args = TrainingArguments(
    output_dir="./results/lora_model",
    learning_rate=5e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    num_train_epochs=1,  # Reduced to 1 epoch
    weight_decay=0.01,
    logging_steps=100,
    # Remove no_cuda parameter, let it use GPU by default
    fp16=True,  # Enable mixed precision training on GPU
)
dataset = load_dataset("imdb", split=["train[:1000]", "test[:200]"])
# Create trainer for fine-tuning


class ProgressCallback(TrainerCallback):
    def on_train_begin(self, args, state, control, **kwargs):
        print("Training started")

    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 100 == 0:
            print(f"Step {state.global_step}: Loss = {state.log_history[-1]['loss']:.4f}")

# Add to your trainer
lora_trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)
# Fine-tune the model
print("Training LoRA model...")
lora_trainer.train()

# Save the fine-tuned model
peft_model_path = "./results/lora_model_final"
peft_model.save_pretrained(peft_model_path)
print(f"LoRA model saved to {peft_model_path}")

  lora_trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training LoRA model...


Step,Training Loss
100,0.0001
200,0.0
300,0.0
400,0.0
500,0.0
600,0.0


LoRA model saved to ./results/lora_model_final


# Step 8: Evaluate Fine-tuned Model

In [16]:
# We can use the already trained model in lora_trainer, as it already has the best model loaded
print("Evaluating LoRA fine-tuned model...")
lora_model_metrics = lora_trainer.evaluate()
print(f"LoRA model metrics: {lora_model_metrics}")

Evaluating LoRA fine-tuned model...


LoRA model metrics: {'eval_loss': 3.576278717609682e-10, 'eval_accuracy': 1.0, 'eval_runtime': 8.7287, 'eval_samples_per_second': 114.564, 'eval_steps_per_second': 7.218, 'epoch': 1.0}


# Step 9: Compare Results

In [17]:
print("\n----- Results Comparison -----")
print(f"Base model accuracy: {base_model_metrics['eval_accuracy']:.4f}")
print(f"LoRA model accuracy: {lora_model_metrics['eval_accuracy']:.4f}")
print(f"Improvement: {(lora_model_metrics['eval_accuracy'] - base_model_metrics['eval_accuracy']) * 100:.2f}%")


----- Results Comparison -----
Base model accuracy: 0.9960
LoRA model accuracy: 1.0000
Improvement: 0.40%


# Step 10: Inference Example

In [20]:
# Example function for inference with both models
def predict_sentiment(text, model, tokenizer, device): # Add device parameter
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device) # Move inputs to device
    model = model.to(device) # Move model to device if not already there
    with torch.no_grad():
        outputs = model(**inputs)

    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    pred_class = torch.argmax(probs, dim=-1).item()
    return "Positive" if pred_class == 1 else "Negative", probs[0].tolist()

# Get the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Test with a few examples
test_texts = [
    "This was the worst film I've ever seen. Complete waste of time."
]

print("\n----- Sample Predictions -----")
for text in test_texts:
    print(f"\nText: {text}")

    # Base model prediction
    base_pred, base_probs = predict_sentiment(text, model, tokenizer, device) # Pass device to predict_sentiment
    print(f"Base model prediction: {base_pred} (Confidence: {max(base_probs):.4f})")

    # LoRA model prediction
    lora_pred, lora_probs = predict_sentiment(text, peft_model, tokenizer, device) # Pass device to predict_sentiment
    print(f"LoRA model prediction: {lora_pred} (Confidence: {max(lora_probs):.4f})")


----- Sample Predictions -----

Text: This was the worst film I've ever seen. Complete waste of time.
Base model prediction: Negative (Confidence: 1.0000)
LoRA model prediction: Negative (Confidence: 1.0000)
