## Fine-tuning TinyLlama using Ultrachat dataset

In [1]:
!pip install -q accelerate peft bitsandbytes transformers trl sentencepiece

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.3/318.3 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.9/484.9 kB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m59.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m44.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from datasets import load_dataset

#### Lets load & explore the Ultrachat dataset

In [3]:
# Load a small subset of Ultrachat dataset
dataset = (
    load_dataset("HuggingFaceH4/ultrachat_200k",  split="test_sft")
      .shuffle(seed=42)
      .select(range(3_000))
)

README.md:   0%|          | 0.00/3.90k [00:00<?, ?B/s]

(…)-00000-of-00003-a3ecf92756993583.parquet:   0%|          | 0.00/244M [00:00<?, ?B/s]

(…)-00001-of-00003-0a1804bcb6ae68c6.parquet:   0%|          | 0.00/244M [00:00<?, ?B/s]

(…)-00002-of-00003-ee46ed25cfae92c6.parquet:   0%|          | 0.00/244M [00:00<?, ?B/s]

(…)-00000-of-00001-f7dfac4afe5b93f4.parquet:   0%|          | 0.00/81.2M [00:00<?, ?B/s]

(…)-00000-of-00003-a6c9fb894be3e50b.parquet:   0%|          | 0.00/244M [00:00<?, ?B/s]

(…)-00001-of-00003-d6a0402e417f35ca.parquet:   0%|          | 0.00/243M [00:00<?, ?B/s]

(…)-00002-of-00003-c0db75b92a2f48fd.parquet:   0%|          | 0.00/243M [00:00<?, ?B/s]

(…)-00000-of-00001-3d4cd8309148a71f.parquet:   0%|          | 0.00/80.4M [00:00<?, ?B/s]

Generating train_sft split:   0%|          | 0/207865 [00:00<?, ? examples/s]

Generating test_sft split:   0%|          | 0/23110 [00:00<?, ? examples/s]

Generating train_gen split:   0%|          | 0/256032 [00:00<?, ? examples/s]

Generating test_gen split:   0%|          | 0/28304 [00:00<?, ? examples/s]

In [4]:
dataset

Dataset({
    features: ['prompt', 'prompt_id', 'messages'],
    num_rows: 3000
})

In [5]:
dataset['prompt'][2]

'Does the University of Pennsylvania offer any programs for non-traditional students?'

In [6]:
dataset['prompt_id'][2]

'bea86e56e110ef3dc4992d794488a72adb943b54f35b0ba67a51a8e0ff93107b'

In [7]:
dataset['messages'][2]

[{'content': 'Does the University of Pennsylvania offer any programs for non-traditional students?',
  'role': 'user'},
 {'content': 'Yes, the University of Pennsylvania offers several programs for non-traditional students, including:\n\n1. Penn LPS Online: This program offers online courses and degree programs for non-traditional students who wish to earn a degree from Penn. It offers several undergraduate and graduate degree programs.\n\n2. College of Liberal and Professional Studies: This program offers a variety of degree programs, including full-time, part-time, online, and on-campus options. It is designed for working professionals and those who wish to complete their degree later in life.\n\n3. Executive Education: This program offers short-term, intensive courses for working professionals who wish to enhance their skills and knowledge in their field.\n\n4. Summer Sessions: This program offers summer courses for undergraduate and graduate students, including non-traditional stud

#### Convert this instruction dataset into a chat template dataset

In [8]:
# Instruction dataset is typically prepared as a chat template.
# We can use the same template as the chat version of TinyLlama

template_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

In [9]:
# A simple function to format the dataset into a chat template used by TinyLlama
def format_prompt(example):
    """Format the prompt to using the <|user|> template TinyLLama is using"""

    # Format answers
    chat = example["messages"]
    prompt = template_tokenizer.apply_chat_template(chat, tokenize=False)

    return {"text": prompt}

In [10]:
dataset = dataset.map(format_prompt)

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [11]:
dataset

Dataset({
    features: ['prompt', 'prompt_id', 'messages', 'text'],
    num_rows: 3000
})

In [12]:
print (dataset['text'][2])

<|user|>
Does the University of Pennsylvania offer any programs for non-traditional students?</s>
<|assistant|>
Yes, the University of Pennsylvania offers several programs for non-traditional students, including:

1. Penn LPS Online: This program offers online courses and degree programs for non-traditional students who wish to earn a degree from Penn. It offers several undergraduate and graduate degree programs.

2. College of Liberal and Professional Studies: This program offers a variety of degree programs, including full-time, part-time, online, and on-campus options. It is designed for working professionals and those who wish to complete their degree later in life.

3. Executive Education: This program offers short-term, intensive courses for working professionals who wish to enhance their skills and knowledge in their field.

4. Summer Sessions: This program offers summer courses for undergraduate and graduate students, including non-traditional students who wish to complete thei

#### Load the actual tiny llama base model so that we can fine tune it

In [13]:
# 4-bit quantization configuration - Q in QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit precision model loading
    bnb_4bit_quant_type="nf4",  # Quantization type
    bnb_4bit_compute_dtype="float16",  # Compute dtype
    bnb_4bit_use_double_quant=True,  # Apply nested quantization
)

In [14]:
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

# Load the model to train on the GPU
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",

    # Leave this out for regular SFT
    quantization_config=bnb_config,
)
model.config.use_cache = False
model.config.pretraining_tp = 1

config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

In [15]:
# Load LLaMA tokenizer
#tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
tokenizer.pad_token = "<PAD>"
tokenizer.padding_side = "left"

In [16]:
# import torch
# print("CUDA Available:", torch.cuda.is_available())
# print("CUDA Device Count:", torch.cuda.device_count())
# print("CUDA Version:", torch.version.cuda)
# print("PyTorch Version:", torch.__version__)


### LoRA Configuration

In [17]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

# Prepare LoRA Configuration
peft_config = LoraConfig(
    lora_alpha=32,  # LoRA Scaling
    lora_dropout=0.1,  # Dropout for LoRA Layers
    r=64,  # Rank
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=  # Layers to target
     ['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

# prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

### Training configurationm

In [23]:
# import os
# os.environ["WANDB_DISABLED"] = "true"

In [24]:
from trl import SFTConfig

# Convert TrainingArguments to SFTConfig
training_arguments = SFTConfig(
    output_dir="./results",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    num_train_epochs=1,
    logging_steps=10,
    fp16=True,
    gradient_checkpointing=True,

    # New additions from SFTConfig version
    max_seq_length=512,  # Ensures sequences are truncated properly
    dataset_text_field="text",  # Specifies which dataset column contains text
    report_to="none",  # Equivalent to disabling W&B
)


In [25]:
# from transformers import TrainingArguments

# output_dir = "./results"

# # Training arguments
# training_arguments = TrainingArguments(
#     output_dir=output_dir,
#     per_device_train_batch_size=2,
#     gradient_accumulation_steps=4,
#     optim="paged_adamw_32bit",
#     learning_rate=2e-4,
#     lr_scheduler_type="cosine",
#     num_train_epochs=1,
#     logging_steps=10,
#     fp16=True,
#     gradient_checkpointing=True
# )

### Actual Fine-tuning

In [26]:
# SFTTrainer?

In [27]:
from trl import SFTTrainer

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
  model=model,
    train_dataset=dataset,
    tokenizer=tokenizer,
    args=training_arguments,

    # Leave this out for regular SFT
    peft_config=peft_config,
)

# Train model
trainer.train()

# Save QLoRA weights
trainer.model.save_pretrained("TinyLlama-1.1B-qlora")

  trainer = SFTTrainer(


Applying chat template to train dataset:   0%|          | 0/3000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/3000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss
10,1.378
20,1.34
30,1.3358
40,1.3759
50,1.387
60,1.3489
70,1.478
80,1.4292
90,1.4144
100,1.3863


### Merging qlora model with orginal model

In [28]:
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(
    "TinyLlama-1.1B-qlora",
    low_cpu_mem_usage=True,
    device_map="auto",
)

# Merge LoRA and base model
merged_model = model.merge_and_unload()

### Inference

In [29]:
from transformers import pipeline

# Use our predefined prompt template
prompt = """<|user|>
Tell me something about Large Language Models.</s>
<|assistant|>
"""

# Run our instruction-tuned model
pipe = pipeline(task="text-generation", model=merged_model, tokenizer=tokenizer)
print(pipe(prompt)[0]["generated_text"])

Device set to use cuda:0


<|user|>
Tell me something about Large Language Models.</s>
<|assistant|>
Large Language Models (LLMs) are a type of artificial intelligence (AI) that can generate human-like language. They are trained on large amounts of data, including text, audio, and video, and are capable of generating complex and nuanced language.

LLMs are used in a variety of applications, including natural language processing (NLP), machine translation, and chatbots. They can be used to generate text, speech, or images, and can be trained to understand different languages and dialects.

One of the most significant applications of LLMs is in the field of natural language generation (NLG). LLMs can be used to generate text in a variety of languages, including English, French, and German. They can also be used to generate speech, such as in a chatbot or voice assistant.

LLMs have the potential to revolutionize the way we communicate and interact with each other. They can help us to communicate more effectively, 

In [41]:
from transformers import pipeline

# Use our predefined prompt template
prompt = """<|user|>
Do you know who Ramana Maharshi is?.</s>
<|assistant|>
"""

# Run our instruction-tuned model
pipe = pipeline(task="text-generation", model=merged_model, tokenizer=tokenizer)
print(pipe(prompt)[0]["generated_text"])

Device set to use cuda:0


<|user|>
Do you know who Ramana Maharshi is?.</s>
<|assistant|>
Ramana Maharshi was a renowned Indian philosopher and yogi who lived in the 19th century. He is known for his teachings on meditation and self-realization. He is considered a pioneer in the field of yoga and is credited with founding the Vedanta school of thought.
