In [None]:
# checking GPU 
!nvidia-smi

Tue Dec  2 03:43:14 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   42C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
# installing all the libraries needed for fine-tuning
!pip install transformers datasets peft bitsandbytes accelerate sentencepiece

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# saving dataset to Drive 
!cp /content/recipe_dataset_200.jsonl /content/drive/MyDrive/AI-Recipe-Datasets

In [None]:
from datasets import load_dataset

dataset = load_dataset(
    "json",
    data_files="/content/drive/MyDrive/AI-Recipe-Datasets/recipe_dataset_200.jsonl",
    split="train"
)

dataset[0]

{'ingredients': ['bell pepper', 'butter', 'salt', 'beans'],
 'recipe_title': 'Creative Bell Pepper, Butter, Salt, Beans Recipe',
 'ingredients_list': ['bell pepper', 'butter', 'salt', 'beans'],
 'steps': ['Prepare the ingredients: wash, chop, and set aside bell pepper, butter, salt, beans.',
  'Heat a pan and add beans to develop aroma.',
  'Mix in remaining ingredients and cook on medium flame.',
  'Season with salt, pepper, and any herbs available.',
  'Simmer until the dish reaches desired consistency.',
  'Serve hot and garnish lightly for presentation.']}

In [None]:
from datasets import load_dataset

dataset = load_dataset(
    "json",
    data_files="/content/drive/MyDrive/AI-Recipe-Datasets/recipe_dataset_200.jsonl",
    split="train"
)

# formatting each recipe into TinyLlama's chat template format
def format_example(example):
    ing = ", ".join(example["ingredients"])
    ing_list = "\n".join(f"- {i}" for i in example["ingredients_list"])
    steps = "\n".join(f"{i+1}. {s}" for i, s in enumerate(example["steps"]))

    target = f"""Title: {example['recipe_title']}

Ingredients:
{ing_list}

Steps:
{steps}
""".strip()

    # this prompt format works with TinyLlama chat template
    prompt = f"""You are a recipe generator.

Ingredients: {ing}

Generate a structured recipe with:
- Creative title
- Ingredient list
- 5–6 cooking steps.
"""

    return {"text": f"<s>[INST] {prompt} [/INST]\n{target}</s>"}

dataset = dataset.map(format_example, remove_columns=dataset.column_names)


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [None]:
batch = dataset[:5] 
print(batch)

print("Type of batch['text']:", type(batch['text']))

def check_batch(example):
    print("Inside map function, type(example['text']):", type(example['text']))
    return example

_ = dataset.map(check_batch, batched=True)


{'text': ['<s>[INST] You are a recipe generator.\n\nIngredients: bell pepper, butter, salt, beans\n\nGenerate a structured recipe with:\n- Creative title\n- Ingredient list\n- 5–6 cooking steps.\n [/INST]\nTitle: Creative Bell Pepper, Butter, Salt, Beans Recipe\n\nIngredients:\n- bell pepper\n- butter\n- salt\n- beans\n\nSteps:\n1. Prepare the ingredients: wash, chop, and set aside bell pepper, butter, salt, beans.\n2. Heat a pan and add beans to develop aroma.\n3. Mix in remaining ingredients and cook on medium flame.\n4. Season with salt, pepper, and any herbs available.\n5. Simmer until the dish reaches desired consistency.\n6. Serve hot and garnish lightly for presentation.</s>', '<s>[INST] You are a recipe generator.\n\nIngredients: bell pepper, garlic, beans, pasta, spinach\n\nGenerate a structured recipe with:\n- Creative title\n- Ingredient list\n- 5–6 cooking steps.\n [/INST]\nTitle: Creative Bell Pepper, Garlic, Beans, Pasta, Spinach Recipe\n\nIngredients:\n- bell pepper\n- g

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Inside map function, type(example['text']): <class 'list'>


In [None]:
type(batch['text'][0])

str

In [None]:
from transformers import AutoTokenizer

BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

# TinyLlama doesn't have pad_token, need to set it or training crashes
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    # need to pass list of strings, not single string
    texts = examples["text"]
    return tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=512  # 512 is enough for recipes, tried 1024 but too slow
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [None]:
# for causal LM, labels are same as input_ids (predict next token)
# this is needed for the trainer
tokenized_dataset = tokenized_dataset.map(
    lambda batch: {"labels": batch["input_ids"]},
    batched=True
)


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [None]:
# splitting into train/test - 10% for validation
tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1)


In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

# 4-bit quantization to fit in T4 GPU (16GB VRAM)
# without this, model won't load - tried full precision and ran out of memory
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map="auto"
)

# LoRA config - r=32 and alpha=64 worked best after trying different values
# targeting attention layers where most learning happens
lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.1,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/recipe-qlora-model",
    per_device_train_batch_size=1,  
    gradient_accumulation_steps=8,  
    warmup_steps=20,
    max_steps=500,
    learning_rate=2e-4,  
    fp16=True,  
    logging_steps=10,
    save_steps=200,
    remove_unused_columns=True,
    report_to="none"  
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator
)

trainer.train()

Step,Training Loss
10,1.6262
20,0.7637
30,0.1847
40,0.1187
50,0.0976
60,0.0931
70,0.0889
80,0.0837
90,0.0806
100,0.0793


TrainOutput(global_step=500, training_loss=0.10767788779735565, metrics={'train_runtime': 966.3312, 'train_samples_per_second': 4.139, 'train_steps_per_second': 0.517, 'total_flos': 1.2553538626387968e+16, 'train_loss': 0.10767788779735565, 'epoch': 21.755555555555556})

In [None]:
# testing the model after training
def generate_recipe(ingredients_list, max_new_tokens=300):
    ingredients_str = ", ".join(ingredients_list)
    prompt = f"""You are a recipe generator.

Ingredients: {ingredients_str}

Generate a structured recipe with:
- Creative title
- Ingredient list
- 5–6 cooking steps.
"""

    inputs = tokenizer(f"<s>[INST] {prompt} [/INST]", return_tensors="pt").to(model.device)
    # these params work okay but outputs still feel repetitive
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        do_sample=True,
        top_p=0.9,
        repetition_penalty=1.2  
    )
    recipe = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return recipe

# testing with some ingredients
ingredients = ["chicken", "garlic", "lemon"]
print(generate_recipe(ingredients))

[INST] You are a recipe generator.

Ingredients: chicken, garlic, lemon

Generate a structured recipe with:
- Creative title
- Ingredient list
- 5–6 cooking steps.
 [/INST]
Title: Creative Chicken, Garlic, Lemon Recipe

Ingredients:
- chicken
- garlic
- lemon

Steps:
1. Prepare the ingredients: wash, chop, and set aside chicken, garlic, lemon.
2. Heat a pan and add garlic to develop aroma.
3. Mix in remaining ingredients and cook on medium flame.
4. Season with salt, pepper, and any herbs available.
5. Simmer until the dish reaches desired consistency.
6. Serve hot and garnish lightly for presentation.

Tips:
- Use fresh ingredients for best results.
- Don't overcook dish as it can lose its nutrients.
- Season with herbs available for optimal taste.

Creative Title: Creative Chicken, Garlic, Lemon Recipe


In [None]:
def generate_recipe(ingredients_list, max_new_tokens=300):
    ingredients_str = ", ".join(ingredients_list)
    prompt = f"""You are a recipe generator.

Ingredients: {ingredients_str}

Generate a structured recipe with:
- Creative title
- Ingredient list
- 5–6 cooking steps.
"""

    inputs = tokenizer(f"<s>[INST] {prompt} [/INST]", return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        do_sample=True,
        top_p=0.9,
        repetition_penalty=1.2
    )
    recipe = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return recipe

# Example test
ingredients = ["bell pepper", "butter", "salt", "beans"]
print(generate_recipe(ingredients))

[INST] You are a recipe generator.

Ingredients: bell pepper, butter, salt, beans

Generate a structured recipe with:
- Creative title
- Ingredient list
- 5–6 cooking steps.
 [/INST]
Title: Creative Bell Pepper, Butter, Salt, Beans Recipe

Ingredients:
- bell pepper
- butter
- salt
- beans

Steps:
1. Prepare the ingredients: wash, chop, and set aside bell pepper, butter, salt, beans.
2. Heat a pan and add beans to develop aroma.
3. Mix in remaining ingredients and cook on medium flame.
4. Season with salt, pepper, and any herbs available.
5. Simmer until the dish reaches desired consistency.
6. Serve hot and garnish lightly for presentation.

Cooking step by step:
1. Heat a pan and add beans to develop aroma.
2. Mix in remaining ingredients and cook on medium flame.
3. Season with salt, pepper, and any herbs available.
4. Simmer until the dish reaches desired consistency.
5. Serve hot and garnish lightly for presentation.

Enjoy your creative bell pepper, butter, salt, beans recipe!


In [None]:
!ls /content/drive/MyDrive/recipe-qlora-model/checkpoint-500

adapter_config.json	   rng_state.pth	    tokenizer.json
adapter_model.safetensors  scaler.pt		    tokenizer.model
chat_template.jinja	   scheduler.pt		    trainer_state.json
optimizer.pt		   special_tokens_map.json  training_args.bin
README.md		   tokenizer_config.json


In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# merging LoRA adapter with base model for inference
# this creates a single model file instead of base + adapter
base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
adapter_path = "/content/drive/MyDrive/recipe-qlora-model/checkpoint-500"

tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

model = PeftModel.from_pretrained(base_model, adapter_path)
merged_model = model.merge_and_unload()

# saving merged model to local directory first, then will copy to Drive
merged_model.save_pretrained("./recipe-merged-model")
tokenizer.save_pretrained("./recipe-merged-model")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

('./recipe-merged-model/tokenizer_config.json',
 './recipe-merged-model/special_tokens_map.json',
 './recipe-merged-model/chat_template.jinja',
 './recipe-merged-model/tokenizer.model',
 './recipe-merged-model/added_tokens.json',
 './recipe-merged-model/tokenizer.json')

In [None]:
# copying merged model to Drive so I can use it in the backend notebook
!cp -r /content/recipe-merged-model "/content/drive/MyDrive/recipe-merged-final"

In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# loading base model 
base_model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=torch.float16,
    device_map="auto"
)
model_with_lora = PeftModel.from_pretrained(base_model, "/content/drive/MyDrive/recipe-qlora-model/checkpoint-500")

# merging adapter into base model
final_model = model_with_lora.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# quick test to see if it works
prompt = "<s>[INST] Ingredients: chicken, rice, soy sauce, garlic\nGenerate a recipe with title, ingredients and steps. [/INST]"

inputs = tokenizer(prompt, return_tensors="pt").to(final_model.device)

output = final_model.generate(
    **inputs,
    max_new_tokens=400,
    temperature=0.8,
    do_sample=True,
    top_p=0.9,
    repetition_penalty=1.2
)

# extracting just the recipe part, ignoring the prompt
print(tokenizer.decode(output[0], skip_special_tokens=True).split("[/INST]")[-1])

 Title: Chicken, Rice, Soy Sauce, Garlic Recipe
Ingredients: chicken, rice, soy sauce, garlic
Steps:
1. Prepare the ingredients: wash, chop, and set aside chicken, rice, soy sauce, garlic.
2. Heat a pan and add soy sauce to develop aroma.
3. Mix in remaining ingredients and cook on medium flame.
4. Season with salt, pepper, and any herbs available.
5. Simmer until the dish reaches desired consistency.
6. Serve hot and garnish lightly for presentation.
Ingredient recommendations:
- Chicken: butter, bell pepper, pasta, olive oil
- Rice: butter, carrot, potato, onion
- Soy sauce: tomatoes, egg, rice wine
- Garlic: bell pepper, lemon, spinach

Cooking step by step:
1. Prepare the ingredients: wash, chop, and set aside chicken, rice, soy sauce, garlic.
2. Heat a pan and add soy sauce to develop aroma.
3. Mix in remaining ingredients and cook on medium flame.
4. Season with salt, pepper, and any herbs available.
5. Simmer until the dish reaches desired consistency.
6. Serve hot and garnish l