# Fine-Tuning TinyLLAMA on "burkelibbey/colors" Dataset

This project fine-tunes the `TinyLLAMA` model using the `"burkelibbey/colors"` dataset to improve its ability to generate natural language descriptions for colors. Through this fine-tuning, TinyLLAMA becomes adept at interpreting and describing colors in response to prompts, making it useful for applications requiring accurate color-related text generation.


In [1]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset_id="burkelibbey/colors"
model_id = "C:/Users/BS567/Documents/LLM/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6"
output_model = "tinyllama-colorist"

In [3]:
def formatted_train(input, response)->str:
    return f"<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>\n"

In [4]:
def prepare_train_data(data_id):
    data = load_dataset(data_id, split="train")
    print(data.column_names)
    data_df = data.to_pandas()
    data_df["text"] = data_df[["description", "color"]].apply(lambda x: "<|im_start|>user\n" + x["description"]+ "<|im_end|>\n<|im_start|>assistant\n" + x["color"]+ "<|im_end|>\n", axis=1)
    data = Dataset.from_pandas(data_df)
    return data

In [5]:
data = prepare_train_data(dataset_id)

['color', 'description']


In [6]:
data

Dataset({
    features: ['color', 'description', 'text'],
    num_rows: 33887
})

In [7]:
data[0]

{'color': '#000000',
 'description': 'Pure Black: A shade that completely absorbs light and does not reflect any colors. It is the darkest possible shade.',
 'text': '<|im_start|>user\nPure Black: A shade that completely absorbs light and does not reflect any colors. It is the darkest possible shade.<|im_end|>\n<|im_start|>assistant\n#000000<|im_end|>\n'}

In [8]:
def get_bnb_configuration():
   bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )
   return bnb_config

In [9]:
def get_model_and_tokenizer(model_id):
    tokenizer = AutoTokenizer.from_pretrained(model_id);
    tokenizer.pad_token = tokenizer.eos_token
    bnb_config = get_bnb_configuration()
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config = bnb_config,
        device_map = "auto"
    )
    model.config.use_cache=False
    model.config_pretraining_tp=1
    return model, tokenizer

In [10]:
model, tokenizer = get_model_and_tokenizer(model_id)

In [11]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CASUAL_LM"
)

In [12]:
training_arguments = TrainingArguments(
    output_dir=output_model,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    save_strategy="epoch",
    logging_steps=10,
    num_train_epochs=2,
    max_steps=100,
    fp16=True
)

In [13]:
trainer = SFTTrainer(
    model=model,
    train_dataset=data,
    peft_config=peft_config,
    dataset_text_field="text",
    args=training_arguments,
    tokenizer=tokenizer,
    packing=False,
    max_seq_length=1024
    )


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
Map: 100%|██████████| 33887/33887 [00:05<00:00, 6437.25 examples/s]
max_steps is given, it will override any value given in num_train_epochs


In [14]:
trainer.train()

  attn_output = torch.nn.functional.scaled_dot_product_attention(
 10%|█         | 10/100 [17:17<2:42:42, 108.47s/it]

{'loss': 2.4202, 'grad_norm': 2.4379231929779053, 'learning_rate': 0.00019510565162951537, 'epoch': 0.01}


 20%|██        | 20/100 [34:38<2:19:47, 104.85s/it]

{'loss': 1.7545, 'grad_norm': 2.2352042198181152, 'learning_rate': 0.00018090169943749476, 'epoch': 0.02}


 30%|███       | 30/100 [52:17<2:04:42, 106.89s/it]

{'loss': 1.3828, 'grad_norm': 0.9807547926902771, 'learning_rate': 0.00015877852522924732, 'epoch': 0.03}


 40%|████      | 40/100 [1:10:08<1:50:23, 110.39s/it]

{'loss': 1.1993, 'grad_norm': 0.7539818286895752, 'learning_rate': 0.00013090169943749476, 'epoch': 0.04}


 50%|█████     | 50/100 [1:28:24<1:30:32, 108.64s/it]

{'loss': 1.0982, 'grad_norm': 0.7402814030647278, 'learning_rate': 0.0001, 'epoch': 0.05}


 60%|██████    | 60/100 [1:46:02<1:10:01, 105.05s/it]

{'loss': 1.0609, 'grad_norm': 0.6680492162704468, 'learning_rate': 6.909830056250527e-05, 'epoch': 0.06}


 70%|███████   | 70/100 [2:04:43<56:13, 112.43s/it]  

{'loss': 1.0109, 'grad_norm': 0.7770265340805054, 'learning_rate': 4.12214747707527e-05, 'epoch': 0.07}


 80%|████████  | 80/100 [2:23:16<35:50, 107.53s/it]

{'loss': 0.9964, 'grad_norm': 0.6150736808776855, 'learning_rate': 1.9098300562505266e-05, 'epoch': 0.08}


 90%|█████████ | 90/100 [2:40:42<17:15, 103.57s/it]

{'loss': 0.9963, 'grad_norm': 0.5920224189758301, 'learning_rate': 4.8943483704846475e-06, 'epoch': 0.08}


100%|██████████| 100/100 [2:58:33<00:00, 110.53s/it]

{'loss': 0.9899, 'grad_norm': 0.6912044882774353, 'learning_rate': 0.0, 'epoch': 0.09}


100%|██████████| 100/100 [2:58:34<00:00, 107.15s/it]

{'train_runtime': 10714.9624, 'train_samples_per_second': 0.299, 'train_steps_per_second': 0.009, 'train_loss': 1.2909243869781495, 'epoch': 0.09}





TrainOutput(global_step=100, training_loss=1.2909243869781495, metrics={'train_runtime': 10714.9624, 'train_samples_per_second': 0.299, 'train_steps_per_second': 0.009, 'total_flos': 1941797864669184.0, 'train_loss': 1.2909243869781495, 'epoch': 0.09442870632672333})

## Load From Fine Tune Model

In [31]:
def formatted_prompt(question)-> str: 
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"

In [29]:
from peft import AutoPeftModelForCausalLM, PeftModel
from transformers import AutoModelForCausalLM
import torch
import os
model_path = "tinyllama-colorist/checkpoint-100"

from transformers import TextStreamer

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

prompt = formatted_prompt('give me a sky blue color')
inputs = tokenizer([prompt], return_tensors="pt")
streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, eos_token_id=[tokenizer.eos_token_id],streamer=streamer, max_new_tokens=10)

<s> <|im_start|>user
give me a sky blue 

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


color<|im_end|>
<|im_start|>assistant: #119999<|im


Let's, try measure the generation performance with regular inference

In [35]:
from transformers import GenerationConfig
from time import perf_counter

prompt = formatted_prompt('give me a red color')

inputs = tokenizer([prompt], return_tensors="pt")
generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,
    top_k=5,temperature=0.5,repetition_penalty=1.2,
    max_new_tokens=12,pad_token_id=tokenizer.eos_token_id
)
start_time = perf_counter()
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
output_time = perf_counter() - start_time
print(f"Time taken for inference: {round(output_time,2)} seconds")

<|im_start|>user
give me a red color<|im_end|>
<|im_start|>assistant: #e01836<|im_end
Time taken for inference: 51.28 seconds
