In [9]:
pip install transformers datasets peft accelerate

Note: you may need to restart the kernel to use updated packages.


In [4]:
%pip install sentencepiece


Collecting sentencepiece
  Downloading sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Downloading sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.2.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
from transformers import LlamaTokenizer, LlamaForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
import sentencepiece
print("SentencePiece is installed and working!")


SentencePiece is installed and working!


In [7]:
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

model_name = "llama3-8b-instruct"
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(model_name)

# Set the tokenizer padding token (necessary for fine-tuning)
tokenizer.pad_token = tokenizer.eos_token

ImportError: 
LlamaTokenizer requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.


In [None]:
from datasets import load_dataset, DatasetDict

# Load SQuAD dataset
dataset = load_dataset("squad")

# Split the original training dataset into 90% train and 10% validation
train_valid_split = dataset["train"].train_test_split(test_size=0.1, seed=42)

# Rename the split keys for clarity
new_dataset = DatasetDict({
    "train": train_valid_split["train"],  # 90% of original train set
    "validation": train_valid_split["test"],  # 10% of original train set
    "test": dataset["validation"]  # Original validation set becomes test set
})

# Function to preprocess the dataset for LLaMA
def preprocess(data):
    instruction = "Answer the question based on the given context."
    context = data["context"]
    question = data["question"]
    answer = data["answers"]["text"][0]  # First answer
    inputs = tokenizer(
        f"{instruction}\nContext: {context}\nQuestion: {question}",
        truncation=True,
        padding="max_length",
        max_length=512
    )
    outputs = tokenizer(answer, truncation=True, padding="max_length", max_length=128)
    inputs["labels"] = outputs["input_ids"]
    return inputs

# Apply preprocessing to each split
tokenized_dataset = new_dataset.map(preprocess, batched=True)


In [None]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # Fine-tune query and value projections
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Check trainable parameters

In [None]:
training_args = TrainingArguments(
    output_dir="./llama_finetuned",
    evaluation_strategy="steps",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
    learning_rate=2e-5,
    warmup_steps=500,
    fp16=True,  # Enable mixed precision for faster training
    report_to="none"  # Disable logging to external tools like WandB
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"]
)

# Start training
trainer.train()


In [None]:
model.save_pretrained("./llama_finetuned")
tokenizer.save_pretrained("./llama_finetuned")

In [None]:
# Evaluate on validation set
results = trainer.evaluate()
print(results)

In [None]:
# Load fine-tuned model
from transformers import pipeline

qa_pipeline = pipeline("text-generation", model="./llama_finetuned", tokenizer=tokenizer)

# Inference
context = "The capital of India is New Delhi."
question = "What is the capital of India?"
input_text = f"Answer the question based on the given context.\nContext: {context}\nQuestion: {question}"

response = qa_pipeline(input_text, max_new_tokens=50)
print("Answer:", response[0]["generated_text"])


In [None]:
def validate_answer(context, answer):
    return answer if answer in context else "I don't know."

# Test the validation function
predicted_answer = response[0]["generated_text"]
validated_answer = validate_answer(context, predicted_answer)
print("Validated Answer:", validated_answer)
