In [None]:
# Install required libraries

!pip install bitsandbytes torch loralib datasets

In [None]:
!pip install -U accelerate
!pip install -U peft
!pip install -U transformers

In [None]:
# Import required libraries

import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
# Log to Hugging Face, your token should be in https://huggingface.co/settings/tokens
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Loading pretrained model, it's tokenizer form Hugging Face (Load any text generation model [max 7b version if you are not using colab pro])
# Configuration of model to load and run in 4 bit precission(less memory, usable with LLM's)

MODEL_NAME = "bigscience/bloom-7b1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/739 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/28.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.16G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

In [None]:
# The function shows how many parameters we can update (during training)

def print_trainable_parameters(model):
  """
  Prints the number of trainable parameters in the model.
  """
  trainable_params = 0
  all_param = 0
  for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()
  print(
      f"trainable params: {trainable_params} || all params: {all_param} || trainables%: {100 * trainable_params / all_param}"
  )

In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
# We are using LoRA technique to finetune our LLM

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 7864320 || all params: 4056981504 || trainables%: 0.19384658254532677


In [None]:
prompt = """
Create a recipe with the following ingredients: milk, flour, sugar , eggs.
""".strip()

In [None]:
generation_config = model.generation_config  # Setting the model generation parameters
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [None]:
%%time
device = "cuda:0"  # Use the GPU device

encoding = tokenizer(prompt, return_tensors="pt").to(device)  # Move the encoding to the GPU

with torch.inference_mode():
  outputs = model.generate(
      input_ids = encoding.input_ids,
      attention_mask = encoding.attention_mask,
      generation_config = generation_config
  )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))



Create a recipe with the following ingredients: milk, flour, sugar , eggs. Add the following to the recipe: butter, vanilla extract, and salt. Then, add the following to the recipe: cinnamon, nutmeg, and cloves. Finally, add the following to the recipe: applesauce, and lemon juice.
CPU times: user 9.47 s, sys: 690 ms, total: 10.2 s
Wall time: 15.9 s


In [None]:
# Load the data in csv format
# Link to dataset: https://www.kaggle.com/datasets/wilmerarltstrmberg/recipe-dataset-over-2m
dataset = load_dataset('csv', data_files='/content/drive/MyDrive/Datasets/Food/recipes_data_reduced.csv')

In [None]:
from datasets import DatasetDict

# We are only using couple of columns from dataset
columns_to_keep = ['title', 'ingredients', 'directions', 'NER']

def filter_columns(data_point):
    return {key: data_point[key] for key in columns_to_keep}

# Filter and select for each split in the DatasetDict and make it smaller (only 20 000 rows, because all data weights 2GB and we don't have that much computing power)
filtered_dataset = DatasetDict()
for split in dataset.keys():
    filtered_dataset[split] = dataset[split].map(
        lambda x: {key: x[key] for key in columns_to_keep if key in x},
        batched=True,
        remove_columns=dataset[split].column_names
    ).select(range(20000))


Map:   0%|          | 0/557785 [00:00<?, ? examples/s]

In [None]:
# Generate prompt function
def generate_prompt(data_point):
    ingredients = ', '.join(data_point['NER'])
    return f"Create a recipe with the following ingredients: {ingredients}."

# Generate and tokenize prompt function
def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True, return_tensors='pt')
    return {'input_ids': tokenized_full_prompt['input_ids'][0], 'attention_mask': tokenized_full_prompt['attention_mask'][0]} # Return the first element of the tensors to get a 1D tensor for input_ids and attention_mask


In [None]:
# Shuffling the dataset randomly and applying generate_and_tokenize_prompt() to each element of dataset
filtered_dataset = filtered_dataset.shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

In [None]:
filtered_dataset

DatasetDict({
    train: Dataset({
        features: ['title', 'ingredients', 'directions', 'NER', 'input_ids', 'attention_mask'],
        num_rows: 20000
    })
})

In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=10,
    output_dir="experiments",
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    max_steps = 500,
)

# Set up the trainer
trainer = Trainer(
    model=model,
    train_dataset=filtered_dataset["train"],
    args=training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

# Disable caching to avoid issues during training
model.config.use_cache = False

# Train the model
trainer.train()

# Connect the drive and save trained model
model.save_pretrained("/content/drive/MyDrive/Models/reciepeGPT/trained_model")

# Push the model to the Hugging Face Hub
MY_MODEL = "[Your_username]/recipe-gpt-bloom-7b1"
model.push_to_hub(MY_MODEL, use_auth_token=True, private=True)

max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
10,1.1466
20,0.968
30,0.732
40,0.6384
50,0.576
60,0.5706
70,0.4664
80,0.5239
90,0.4385
100,0.4473




adapter_model.safetensors:   0%|          | 0.00/31.5M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/KVM1L/recipe-gpt-bloom-7b1/commit/16b09df44f497f47e8e4ce29412a64c15f23b869', commit_message='Upload model', commit_description='', oid='16b09df44f497f47e8e4ce29412a64c15f23b869', pr_url=None, pr_revision=None, pr_num=None)

In [4]:
MY_MODEL = "KVM1L/recipe-gpt-bloom-7b1"  # Load your model from Hugging Face

In [6]:
# Configuration of model to load and run in 4 bit precission(less memory, usable with LLM's)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [8]:

config = PeftConfig.from_pretrained(MY_MODEL)

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [58]:
generation_config = model.generation_config
generation_config.max_new_tokens = 300
generation_config.temperature = 0.7
generation_config.top_p = 0.9
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [61]:
prompt = """
Create a recipe with the following ingredients: tuna, pimentos, onion, macaroni.
""".strip()

In [62]:
%%time
device = "cuda:0"

# Tokenize the prompt and move to the correct device
encoding = tokenizer(prompt, return_tensors="pt")
input_ids = encoding.input_ids.to(device)
attention_mask = encoding.attention_mask.to(device)

# Generate text with the model
with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=generation_config.max_new_tokens + len(input_ids[0]),  # Ensure max length
        temperature=generation_config.temperature,
        top_p=generation_config.top_p,
        num_return_sequences=generation_config.num_return_sequences,
        pad_token_id=generation_config.pad_token_id,
        eos_token_id=generation_config.eos_token_id,
        do_sample=True
    )

# Decode and print the output
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Both `max_new_tokens` (=300) and `max_length`(=320) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Create a recipe with the following ingredients: tuna, pimentos, onion, macaroni. Cook until tuna is done and pimentos are soft. Add in the macaroni and cook until it is soft. Drain and set aside.
Put the tuna, pimentos, onion, and macaroni in a blender. Blend until smooth. Set aside and let cool.
Add the tuna, pimentos, onion, and macaroni to a bowl. Stir in the olive oil, garlic, parsley, and salt and pepper. Add the tomatoes and mix well.
To make the dressing, in a small bowl, combine the mayonnaise, mustard, Worcestershire sauce, and lemon juice. Season with salt and pepper.
Place the salad on a plate. Pour the dressing over the salad. Serve immediately. This salad can be served as is or refrigerated and reheated before serving.
This is my favorite salad to make. I make it all the time and we eat it all the time. I have a recipe for it on my site too, but this one is my favorite. Thanks for sharing it!
The best tuna salad is always made with real tuna, not processed. I don’t have a 