In [29]:
import pandas as pd

In [30]:
df = pd.read_csv("/kaggle/input/food-recepies/dataset/full_dataset.csv")
df['X'] = df['title'] + ' - ' + df['ingredients']
df = df.drop(["Unnamed: 0", "link", "source", "NER", "title", "ingredients"], axis= 1)
df.head(5)

Unnamed: 0,directions,X
0,"[""In a heavy 2-quart saucepan, mix brown sugar...","No-Bake Nut Cookies - [""1 c. firmly packed bro..."
1,"[""Place chipped beef on bottom of baking dish....","Jewell Ball'S Chicken - [""1 small jar chipped ..."
2,"[""In a slow cooker, combine all ingredients. C...","Creamy Corn - [""2 (16 oz.) pkg. frozen corn"", ..."
3,"[""Boil and debone chicken."", ""Put bite size pi...","Chicken Funny - [""1 large whole chicken"", ""2 (..."
4,"[""Combine first four ingredients and press in ...","Reeses Cups(Candy) - [""1 c. peanut butter"", ..."


In [31]:
df = df[:100000]

In [32]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model_name = "flax-community/t5-recipe-generation"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [33]:
def generate_recipe(text):
    inputs = tokenizer(text, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=200)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [34]:
from torch.utils.data import Dataset, DataLoader
class data_idk(Dataset):
    def __init__(self, df):
        super().__init__()
        self.X = df["X"]
        self.y = df["directions"]
    def __len__(self):
        return len(self.X)
    def __getitem__(self, index):
        return self.X[index], self.y[index]
full_dataset = data_idk(df)

In [35]:
from torch.utils.data import random_split
train_len = int(len(full_dataset) * 0.8)
valid_len = len(full_dataset) - train_len
train_dataset, valid_dataset = random_split(full_dataset, [train_len, valid_len])

In [36]:
from torch.nn.utils.rnn import pad_sequence
def collate_fn(batch):
    X_batch, y_batch = zip(*batch)
    
    X_tokens = tokenizer(
        list(X_batch),
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512
    )
    y_tokens = tokenizer(
        list(y_batch),
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512
    )
    labels = y_tokens["input_ids"]
    labels[labels == tokenizer.pad_token_id] = -100

    return {
        "input_ids": X_tokens["input_ids"],
        "attention_mask": X_tokens["attention_mask"],
        "labels": labels
    }

In [37]:
train_loader = DataLoader(train_dataset, collate_fn= collate_fn, batch_size=16)
valid_loader = DataLoader(valid_dataset, collate_fn= collate_fn, batch_size=16)

In [38]:
from peft import LoraConfig, get_peft_model, TaskType

# Original model loading
model_name = "google/flan-t5-small"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# LoRA configuration
lora_config = LoraConfig(
    r=8,  # Rank
    lora_alpha=32,
    target_modules=["q", "v"],  # Modules to apply LoRA to (query and value in attention)
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM  # For sequence-to-sequence models
)

# Convert model to PEFT model with LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Should show ~1% of parameters are trainable

trainable params: 344,064 || all params: 77,305,216 || trainable%: 0.4451


In [39]:
import torch
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training parameters
EPOCHS = 1
optimizer = AdamW(model.parameters(), lr=5e-5)
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)

In [None]:
from tqdm import tqdm  

for epoch in range(EPOCHS):
    model.train()
    total_train_loss = 0
    train_progress = tqdm(
        train_loader, 
        desc=f'Epoch {epoch+1}/{EPOCHS} [Training]',
        bar_format='{l_bar}{bar:20}{r_bar}{bar:-20b}'
    )
    
    for batch in train_progress:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        
        total_train_loss += loss.item()
        avg_loss = total_train_loss / (train_progress.n + 1)
        train_progress.set_postfix({'loss': f'{avg_loss:.4f}'})
    
    model.eval()
    total_val_loss = 0
    val_progress = tqdm(
        valid_loader, 
        desc=f'Epoch {epoch+1}/{EPOCHS} [Validation]',
        bar_format='{l_bar}{bar:20}{r_bar}{bar:-20b}',
        leave=False  
    )
    with torch.no_grad():
        for batch in val_progress:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            loss = outputs.loss
            total_val_loss += loss.item()
            avg_loss = total_val_loss / (val_progress.n + 1)
            val_progress.set_postfix({'val_loss': f'{avg_loss:.4f}'})
    avg_train_loss = total_train_loss / len(train_loader)
    avg_val_loss = total_val_loss / len(valid_loader)
    
    print(f"\nEpoch {epoch+1}/{EPOCHS} Summary:")
    print(f"Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    print('-' * 50)

Epoch 1/1 [Training]: 100%|████████████████████| 5000/5000 [13:22<00:00,  6.23it/s, loss=2.4716]
Epoch 1/1 [Validation]:  93%|██████████████████▋ | 1168/1250 [01:22<00:05, 14.66it/s, val_loss=2.0638]

In [43]:
print("HI")

HI


In [44]:
def predict_recipe(title_ingredients):
    """
    Generate recipe directions from title and ingredients
    Format: "Title - [ingredient1, ingredient2, ...]"
    """
    # Preprocess input
    text = f"{title_ingredients['title']} - {title_ingredients['ingredients']}"
    # Tokenize and generate
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=512
    ).to(device)
    
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=200,
        num_beams=5,
        early_stopping=True
    )
    
    # Decode and return
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [49]:
predict_recipe({"title" : "Smoothie",
                "ingredients" :"Watermelon, Kiwi, Apple and Frozen Banana Smoothie"})

'Pour watermelon, Kiwi, Apple and Banana into a blender. Blend until smooth.'

In [53]:
from huggingface_hub import HfApi
from transformers import AutoConfig
api = HfApi()

repo_id = "DavidGI23200/recepie_llm_fine_tuned_with_lora"  # e.g. "myuser/myawesome-model"
api.create_repo(repo_id=repo_id, exist_ok=True)  # creates repo if not exists

model.push_to_hub(repo_id)
tokenizer.push_to_hub(repo_id)
config = AutoConfig.from_pretrained(model_name)
config.push_to_hub("DavidGI23200/recepie_llm_fine_tuned_with_lora", commit_message="Update config with model_type")

Uploading...:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/DavidGI23200/recepie_llm_fine_tuned_with_lora/commit/0cae7787b0e047fa7cab3134f8ae049246ae5032', commit_message='Update config with model_type', commit_description='', oid='0cae7787b0e047fa7cab3134f8ae049246ae5032', pr_url=None, repo_url=RepoUrl('https://huggingface.co/DavidGI23200/recepie_llm_fine_tuned_with_lora', endpoint='https://huggingface.co', repo_type='model', repo_id='DavidGI23200/recepie_llm_fine_tuned_with_lora'), pr_revision=None, pr_num=None)