### Install Required Packages:

In [1]:
# !pip install -q trl

### Import Libraries:

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, pipeline, logging
from trl import SFTTrainer
from peft import LoraConfig, PeftModel, TaskType
import torch
import json
from datasets import Dataset

import warnings
warnings.filterwarnings('ignore')
logging.set_verbosity(logging.CRITICAL)

### Load the Dataset:

In [4]:
# Load dataset
with open("/content/custom_quiz_dataset.jsonl", "r") as f:
    data = [json.loads(line) for line in f if line.strip()]

# Convert to decoder-only format: input + output combined as one sequence
for d in data:
    output = d["output"]
    d["text"] = f"{d['input']}\nQuestion: {output['Question']}\nAnswer: {output['Answer']}"

dataset = Dataset.from_list(data)

In [5]:
dataset

Dataset({
    features: ['input', 'output', 'text'],
    num_rows: 2090
})

### Load Model and Tokenizer:

In [18]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# base_model
base_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

In [19]:
base_model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb): 

### Baseline Generation:

In [22]:
pipe = pipeline("text-generation", model=base_model, tokenizer=tokenizer, max_length=80)

# Define the prompt
ques_type = "multiple choice question"
topic = "Physics"
prompt = f"Generate a {ques_type} on {topic}"

# Generate and print result
result = pipe(prompt)
print(result[0]['generated_text'])

Generate a multiple choice question on Physics.
Choose the correct answer based on the given material: "The density of a gas is directly proportional to its pressure and has an exponential increase as the pressure increases up to a certain point."
(1) True: The density of a gas is directly proportional to its pressure and has an exponential increase as the pressure increases up to a certain point


### Configure LoRA:

In [7]:
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # Adjust based on model architecture
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

### Data Preprocessing:

In [8]:
def preprocess(sample):
  return sample['text']

###  Training Configuration:

In [9]:
import os
os.environ["WANDB_DISABLED"] = "True"

In [10]:
# Training Arguments
training_args = TrainingArguments(
    output_dir="./tinyllama-quiz",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=5e-5,
    logging_steps=50,
    save_strategy="epoch",
    report_to="none",
    fp16=True,
)

### Initialize Trainer:

In [11]:
# Trainer
trainer = SFTTrainer(
    model = base_model,
    train_dataset = dataset,
    peft_config = lora_config,
    formatting_func = preprocess,
    args = training_args,
)

Applying formatting function to train dataset:   0%|          | 0/2090 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/2090 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2090 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2090 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


### Start Training:

In [12]:
trainer.train()

Step,Training Loss
50,2.1974
100,1.3948
150,1.008
200,0.8673
250,0.8094
300,0.7664
350,0.7691
400,0.7355
450,0.7198
500,0.739


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TrainOutput(global_step=786, training_loss=0.8936950766099925, metrics={'train_runtime': 403.9413, 'train_samples_per_second': 15.522, 'train_steps_per_second': 1.946, 'total_flos': 1649297372700672.0, 'train_loss': 0.8936950766099925})

### Save the Fine-Tuned Model

In [13]:
new_model_name = "tinyllama_finetuned_lora"

trainer.model.save_pretrained(new_model_name)
tokenizer.save_pretrained(new_model_name)

('tinyllama_finetuned_lora/tokenizer_config.json',
 'tinyllama_finetuned_lora/special_tokens_map.json',
 'tinyllama_finetuned_lora/chat_template.jinja',
 'tinyllama_finetuned_lora/tokenizer.model',
 'tinyllama_finetuned_lora/added_tokens.json',
 'tinyllama_finetuned_lora/tokenizer.json')

In [14]:
pipe = pipeline(task="text-generation", model=new_model_name, tokenizer=new_model_name, max_length=80)

### Inference on the test prompt

In [33]:
ques_type = "fill in the blanks"
topic = "Science"

prompt = f"Generate a {ques_type} on {topic}"
result = pipe(prompt)

# print(result)
print(result[0]['generated_text'])

Generate a fill in the blanks on Science.
Question: The ________ is the smallest element.
Answer: Hydrogen


### Merge LoRA adapters with Base Model

In [34]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig

base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # base model
lora_model_path = "tinyllama_finetuned_lora"  # directory with adapter weights

# Load base model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)

# Load LoRA adapter on top of base model
model = PeftModel.from_pretrained(base_model, lora_model_path)

In [35]:
# merge both models
merged_model = model.merge_and_unload()

In [37]:
from transformers import pipeline

pipe = pipeline(
    task="text-generation",
    model=merged_model,
    tokenizer=tokenizer,
    max_length=80
)

In [46]:
prompt = "Generate a multiple choice question on Physics"
result = pipe(prompt)
print(result[0]['generated_text'])

Generate a multiple choice question on Physics.
Question: Which force is not a vector?
A) Gravity
B) Electricity
C) Magnetism
D) Electric charge
Answer: B) Electricity
