In [1]:
import sys

print("Python version")
print(sys.version)
print("Version info.")
print(sys.version_info)

Python version
3.8.19 (default, Mar 20 2024, 15:27:52) 
[Clang 14.0.6 ]
Version info.
sys.version_info(major=3, minor=8, micro=19, releaselevel='final', serial=0)


In [20]:
# Notes: 
# #torch
#!pip install transformers

# Plate2Recipe Project


## Data Collection and Preprocessing


- Data loading: *use food-101.tar.gz for CNN and full_dataset.csv for NLP* (https://drive.google.com/drive/folders/1ui_zS11_ENZTCNLUsgg_UwAYr-ZaLbac)
- Data cleaning
- Data augmentation

In [2]:
# CNN - Load and Transform Data

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(), 
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

valid_transforms = transforms.Compose([
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

root_dir = './data/'

train_dataset = datasets.Food101(root=root_dir, split='train', transform=train_transforms, download=True)
valid_dataset = datasets.Food101(root=root_dir, split='test', transform=valid_transforms, download=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

In [3]:
# NLP - Load data

import pandas as pd

recipes = pd.read_csv('./data/full_dataset.csv', encoding='UTF-8')
recipes = recipes.drop(recipes.columns[0], axis=1)
recipes.head(20)

Unnamed: 0,title,ingredients,directions,link,source,NER
0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. eva...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"[""brown sugar"", ""milk"", ""vanilla"", ""nuts"", ""bu..."
1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned ...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"[""beef"", ""chicken breasts"", ""cream of mushroom..."
2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"[""frozen corn"", ""cream cheese"", ""butter"", ""gar..."
3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"[""chicken"", ""chicken gravy"", ""cream of mushroo..."
4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker ...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"[""peanut butter"", ""graham cracker crumbs"", ""bu..."
5,Cheeseburger Potato Soup,"[""6 baking potatoes"", ""1 lb. of extra lean gro...","[""Wash potatoes; prick several times with a fo...",www.cookbooks.com/Recipe-Details.aspx?id=20115,Gathered,"[""baking potatoes"", ""extra lean ground beef"", ..."
6,Rhubarb Coffee Cake,"[""1 1/2 c. sugar"", ""1/2 c. butter"", ""1 egg"", ""...","[""Cream sugar and butter."", ""Add egg and beat ...",www.cookbooks.com/Recipe-Details.aspx?id=210288,Gathered,"[""sugar"", ""butter"", ""egg"", ""buttermilk"", ""flou..."
7,Scalloped Corn,"[""1 can cream-style corn"", ""1 can whole kernel...","[""Mix together both cans of corn, crackers, eg...",www.cookbooks.com/Recipe-Details.aspx?id=876969,Gathered,"[""cream-style corn"", ""whole kernel corn"", ""cra..."
8,Nolan'S Pepper Steak,"[""1 1/2 lb. round steak (1-inch thick), cut in...","[""Roll steak strips in flour."", ""Brown in skil...",www.cookbooks.com/Recipe-Details.aspx?id=375254,Gathered,"[""tomatoes"", ""water"", ""onions"", ""Worcestershir..."
9,Millionaire Pie,"[""1 large container Cool Whip"", ""1 large can c...","[""Empty Cool Whip into a bowl."", ""Drain juice ...",www.cookbooks.com/Recipe-Details.aspx?id=794547,Gathered,"[""pineapple"", ""condensed milk"", ""lemons"", ""pec..."


## Model Development
- CNN for Image Processing
- NLP for Recipe Generation


### CNN Model
- Assume an input image, generate the list of ingredients from the image

In [2]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import numpy as np
import os
import pickle
from torchvision import transforms
from PIL import Image
import time
from tensorflow.keras.preprocessing import image
from utils import prepare_output

data_dir= './data/'

use_gpu = True
device = torch.device('cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
map_loc = None if torch.cuda.is_available() and use_gpu else 'cpu'

ingrs_vocab = pickle.load(open(os.path.join(data_dir, 'ingr_vocab.pkl'), 'rb'))
vocab = pickle.load(open(os.path.join(data_dir, 'instr_vocab.pkl'), 'rb'))

ingr_vocab_size = len(ingrs_vocab)
instrs_vocab_size = len(vocab)
output_dim = instrs_vocab_size

t = time.time()
import sys; sys.argv=['']; del sys
args = {
    'save_dir': 'path/to/save/models',
    'project_name':'inversecooking',
    'model_name':'model',
    'transfer_from':'',
    'suff':'',
    'image_model':'resnet50',
    'recipe1m_dir':'path/to/recipe1m',
    'aux_data_dir':'../data',
    'crop_size':224,
    'image_size':256,
    'log_step':10,
    'learning_rate':0.001,
    'scale_learning_rate_cnn':0.01,
    'lr_decay_rate':0.99,
    'lr_decay_every':1, 
    'weight_decay':0.0,
    'embed_size':512,
    'n_att':8,
    'n_att_ingrs':4,
    'transf_layers': 16,
    'transf_layers_ingrs': 4,
    'num_epochs': 400,
    'batch_size': 128,
    'num_workers': 8,
    'dropout_encoder': 0.3,
    'dropout_decoder_r': 0.3,
    'dropout_decoder_i':0.3,
    'finetune_after': -1,
    'loss_weight': [1.0, 0.0, 0.0, 0.0],
    'max_eval': 4096,
    'label_smoothing_ingr': 0.1,
    'patience':50,
    'maxseqlen':15,
    'maxnuminstrs':10,
    'maxnumims':5,
    'maxnumlabels':20,
    'es_metric':'loss',
    'eval_split':'val',
    'numgens': 3,
    'greedy': False,
    'temperature': 1.0,
    'beam': -1,
    'ingrs_only':False,
    'recipe_only': False,
    'log_term': False,
    'tensorboard': True,
    'resume': False,
    'decay_lr': True,
    'use_lmdb': True,
    'get_perplexity': False, 
    'use_true_ingrs': False
}


from model import get_model
model=get_model(args, ingr_vocab_size, instrs_vocab_size)

# Load the pre-trained model parameters
model_path = os.path.join(data_dir, 'modelbest.ckpt')
model.load_state_dict(torch.load(model_path, map_location=map_loc))
model.to(device)
model.eval()
model.ingrs_only = False
model.recipe_only = False

transf_list_batch = []
transf_list_batch.append(transforms.ToTensor())
transf_list_batch.append(transforms.Normalize((0.485, 0.456, 0.406), 
                                            (0.229, 0.224, 0.225)))
to_input_transf = transforms.Compose(transf_list_batch)

greedy = [True, False]
beam = [-1, -1]
temperature = 1.0
numgens = len(greedy)
img=image.load_img(data_dir + 'food-101/images/spaghetti_bolognese/51063.jpg')

show_anyways = False #if True, it will show the recipe even if it's not valid
transf_list = []
transf_list.append(transforms.Resize(256))
transf_list.append(transforms.CenterCrop(224))
transform = transforms.Compose(transf_list)

image_transf = transform(img)
image_tensor = to_input_transf(image_transf).unsqueeze(0).to(device)

num_valid = 1
title=[]
ingredients=[]
recipe=[]
for i in range(numgens):
    with torch.no_grad():
        outputs = model.sample(image_tensor, greedy=greedy[i], 
                            temperature=temperature, beam=beam[i], true_ingrs=None)
            
    ingr_ids = outputs['ingr_ids'].cpu().numpy()
    recipe_ids = outputs['recipe_ids'].cpu().numpy()
            
    outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)
        
    if valid['is_valid'] or show_anyways:  
        title.append(outs['title'])
        ingredients.append(outs['ingrs'])
        recipe.append(outs['recipe'])
    else:
        title.append("Not a valid recipe!")
        recipe.append("Reason: "+valid['reason'])

print('title ', title)
print('ingredients ', ingredients)
print('recipe ', recipe)





title  ['Beef shank simmered in tomato sauce', 'Simple tomato spaghetti sauce']
ingredients  [['onion', 'pepper', 'oil', 'tomato', 'clove', 'salt', 'beef', 'parsley', 'stock', 'wine'], ['onion', 'pepper', 'oil', 'tomato', 'clove', 'salt', 'beef', 'parsley', 'stock', 'wine']]
recipe  [['Cut the onion into thin wedges.', 'Heat olive oil in a pan and saute the onion and garlic.', 'When the onion becomes translucent, add the beef and fry.', 'When the beef is browned, add the tomato paste and cook.', 'Add the wine and simmer.', 'Add the soup stock cube and simmer for about 10 minutes.', 'Add the parsley and season with salt and pepper.'], ['Mince onion and garlic.', 'Heat olive oil in a frying pan and add garlic.', "Stir to make sure garlic does n't burn.", 'Add ground meat and cook over a high heat, breaking up the meat.', 'When the meat is evenly browned, add onions and cook until soft.', 'Add tomato paste and continue to cook over low heat for a while.', 'Add wine and boil, uncovered, fo

UsageError: Line magic function `%python` not found (But cell magic `%%python` exists, did you mean that instead?).


In [27]:
%python train.py

UsageError: Line magic function `%python` not found (But cell magic `%%python` exists, did you mean that instead?).


### NLP Model
- Assume list of ingredients is provided, we can use this to generate the recipe. 
- Match input list of ingredients with the ingredients in the dataset and generate the recipe
- Approaches:
        -- Best Matching using Cosine Similarity: The chosen recipe will be the one with the highest similarity score
        -- GPT2 model
        -- ...

In [60]:
# Assume 'user_ingredients' list is already defined
user_ingredients = ['chicken', 'onion', 'pepper']

- #### Using TF-IDF for vectorization & cosine similarity for best matching

In [61]:
# using TF-IDF for vectorization and cosine similarity for matching

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# Preprocess data:
def preprocess(ingredients):
    return [' '.join(eval(ing)).lower().replace('[^a-z\s]', '') for ing in ingredients]

# VECTORIZE
vectorizer = TfidfVectorizer()

# Vectorize ingredients: 
recipe_ingredients = preprocess(recipes['NER'])
recipe_tfidf = vectorizer.fit_transform(recipe_ingredients)

# Vectorize user ingredients:
user_ingredients_string = ' '.join(user_ingredients).lower()
user_tfidf = vectorizer.transform([user_ingredients_string])


# COMPUTE COSINE SIMILARITY
similarity_scores = cosine_similarity(user_tfidf, recipe_tfidf)

# Select the best matching recipe
best_match_index = similarity_scores.argmax()
best_recipe = recipes.iloc[best_match_index]

print("Best Matching Recipe:", best_recipe['title'])
print("Ingredients:", best_recipe['ingredients'])
print("Directions:", best_recipe['directions'])

Best Matching Recipe: My Chopped Chicken Liver
Ingredients: ["1 lb chicken liver", "3 tablespoons chicken fat (SHMALTZ)", "1 onion, diced", "3 hard-boiled eggs, chopped", "salt", "pepper", "onion powder", "garlic powder"]
Directions: ["In microwave, cook chicken livers and fat until done, about 5 minutes (use a pie dish).", "Remove livers with a slotted spoon, add onions and microwave until cooked, about 3 minutes.", "Meanwhile hard boil eggs in saucepan until done.", "Add livers, fat and onions to food processor; pulse until all ingredients are chopped fine.", "Mash eggs and add seasoning.", "Add liver mixture to egg mixture.", "Serve chopped liver on greens, surrounded by tomatoes, and lettuce.", "Serve with toast points or crackers."]


- #### Using GPT-2: 
This approach combines traditional NLP techniques for ingredient matching with advanced language generation capabilities of GPT-2


In [64]:
# Use GPT-2 to generate a Recipe

import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained model tokenizer (vocabulary)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Load pre-trained model
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Input Text to GPT-2: provide context to GPT-2 for generating the recipe. The format & content can be adjusted based on how we want GPT-2 to expand the recipe information.
text = f"Ingredients: {', '.join(user_ingredients)}; Recipe: {best_recipe['title']} - Directions:"

# Encode the text input
indexed_tokens = tokenizer.encode(text, add_special_tokens=True)
tokens_tensor = torch.tensor([indexed_tokens])

# Generate a text using the model
model.eval()
with torch.no_grad():
    outputs = model.generate(tokens_tensor, max_length=300, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)

generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Ingredients: chicken, onion, pepper; Recipe: My Chopped Chicken Liver - Directions: Preheat oven to 350 degrees F. Line a baking sheet with parchment paper and set aside. In a large bowl, whisk together the olive oil, salt, and pepper. Add the chicken mixture to the dry ingredients and mix well. Pour the mixture into the prepared baking dish and bake for 20-25 minutes, or until golden brown. Remove from the oven and allow to cool completely before serving.


## Training
- Model compilation
- Model training
- Hyperparameter tuning

## Evaluation
- Model evaluation metrics
- Visualization of results

## Conclusion
- Summary of findings
- Future work