In [3]:
import pandas as pd
import numpy as np
import torch

In [4]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    runs_on_gpu = True
else:
    device = torch.device('cpu')
    runs_on_gpu = False
print(f"Runs on {device.type}")

Runs on cpu


In [5]:
data = pd.read_json('preprocessed_recipes.json')
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 20100 entries, 0 to 20099
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   directions   20100 non-null  object        
 1   fat          15901 non-null  float64       
 2   date         20100 non-null  datetime64[ns]
 3   categories   20100 non-null  object        
 4   calories     15969 non-null  float64       
 5   desc         20100 non-null  object        
 6   protein      15922 non-null  float64       
 7   rating       20100 non-null  float64       
 8   title        20100 non-null  object        
 9   ingredients  20100 non-null  object        
 10  sodium       15967 non-null  float64       
dtypes: datetime64[ns](1), float64(5), object(5)
memory usage: 1.8+ MB


In [6]:
data.head()

Unnamed: 0,directions,fat,date,categories,calories,desc,protein,rating,title,ingredients,sodium
0,"1. Place the stock, lentils, celery, carrot, t...",7.0,2006-09-01 04:00:00,"[Sandwich, Bean, Fruit, Tomato, turkey, Vegeta...",426.0,,30.0,2.5,"Lentil, Apple, and Turkey Wrap","[4 cups low-sodium vegetable or chicken stock,...",559.0
1,Combine first 9 ingredients in heavy medium sa...,23.0,2004-08-20 04:00:00,"[Food Processor, Onion, Pork, Bake, Bastille D...",403.0,This uses the same ingredients found in boudin...,18.0,4.375,Boudin Blanc Terrine with Red Onion Confit,"[1 1/2 cups whipping cream, 2 medium onions, c...",1439.0
2,In a large heavy saucepan cook diced fennel an...,7.0,2004-08-20 04:00:00,"[Soup/Stew, Dairy, Potato, Vegetable, Fennel, ...",165.0,,6.0,3.75,Potato and Fennel Soup Hodge,"[1 fennel bulb (sometimes called anise), stalk...",165.0
3,Heat oil in heavy large skillet over medium-hi...,,2009-03-27 04:00:00,"[Fish, Olive, Tomato, Sauté, Low Fat, Low Cal,...",,The Sicilian-style tomato sauce has tons of Me...,,5.0,Mahi-Mahi in Tomato Olive Sauce,"[2 tablespoons extra-virgin olive oil, 1 cup c...",
4,Preheat oven to 350°F. Lightly grease 8x8x2-in...,32.0,2004-08-20 04:00:00,"[Cheese, Dairy, Pasta, Vegetable, Side, Bake, ...",547.0,,20.0,3.125,Spinach Noodle Casserole,"[1 12-ounce package frozen spinach soufflé, th...",452.0


## 4
### Extension
Generate new recipes based on random ingredients

In [7]:
number_of_ingredients = np.random.randint(4, 8)
random_ingredients_list = []
for i in range(number_of_ingredients):
    ingredients = data['ingredients'].sample().iloc[0]
    if len(ingredients) == 0:
        continue
    random_ingredient = ingredients[np.random.randint(0, len(ingredients))]
    random_ingredients_list.append(random_ingredient)

random_ingredients = ', '.join(random_ingredients_list)
print(random_ingredients_list)

['2/3 cup whole milk', 'a 1/4-pound piece of prosciutto, cut into 1/4-inch dice (about 2/3 cup)', '2 (3- by 1-inch) strips fresh lemon zest, removed with a vegetable peeler', '1/4 teaspoon cumin seeds', '1 (1/2-oz) dried ancho chile*']


### 4.1
Fine tune transformer to give directions for random ingredients 

Tutorial from: https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb

In [9]:
from transformers import AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
X = data['ingredients'].apply(lambda x: ', '.join(x)).tolist()
y = data['directions'].tolist()

In [14]:
tokenizer = AutoTokenizer.from_pretrained('t5-small')  # You can choose other models like `t5-base`, `gpt-2`, etc.

def preprocess_function(X, y):
    model_inputs = tokenizer(X, max_length=512, truncation=True)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(y, max_length=512, truncation=True)
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

tokenized_datasets = list(map(preprocess_function, X, y))



In [15]:
tokenized_datasets

[{'input_ids': [314, 12294, 731, 18, 7, 32, 12925, 12065, 42, 3832, 1519, 6, 209, 4119, 11549, 4216, 24026, 7, 6, 7739, 4119, 11549, 2379, 1442, 24026, 7, 6, 204, 24161, 7, 1235, 651, 6, 18510, 6, 209, 508, 15578, 6, 158, 400, 26, 11, 18510, 6, 209, 3, 7, 2246, 122, 1434, 3, 189, 63, 526, 6, 209, 21776, 3, 9692, 760, 3136, 6, 209, 2768, 12784, 6, 2583, 26, 6, 6677, 15, 26, 6, 11, 20225, 26, 6, 209, 422, 25147, 8947, 6, 2583, 26, 11, 20225, 26, 6, 209, 18396, 19635, 20103, 26, 7490, 5143, 6, 204, 21776, 7, 996, 18, 5771, 122, 77, 7994, 1043, 6, 8767, 120, 1591, 1001, 5270, 12, 2373, 6, 220, 8487, 829, 18, 210, 88, 144, 50, 208, 3198, 6, 1340, 16, 985, 2269, 10684, 6, 42, 431, 16465, 18, 4976, 61, 7055, 25592, 7, 6, 16672, 3, 8861, 15306, 6748, 6, 5551, 120, 3, 23645, 6, 7739, 819, 3, 27915, 115, 21857, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [16]:
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained('t5-small')
model.to(device)

In [9]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,  # You may need to adjust the number of epochs
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
)

trainer.train()

In [None]:
trainer.evaluate()
model.save_pretrained('recipe-directions-model')
tokenizer.save_pretrained('recipe-directions-tokenizer')

In [None]:
model.eval()

ingredients = "1 cup flour, 2 eggs, 1/2 cup milk"
inputs = tokenizer(ingredients, return_tensors='pt').input_ids

outputs = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
print("Generated Recipe Directions:", tokenizer.decode(outputs[0], skip_special_tokens=True))

### 4.2
Compare to directions and title made by groq

In [9]:
from groq import Groq
import os
from dotenv import load_dotenv
import json

In [12]:
load_dotenv()

API_KEY = os.getenv('API_KEY')

client = Groq(
    api_key=API_KEY
)

prompt = f'I have following ingredients: {random_ingredients}. Give title and directions for a recipe. Your answer is a json-file wrapped in ```. The json looks like this: {{ "title" : "title", "directions" : ["direction1","direction2"] }}'

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="llama3-8b-8192",
)
reply = chat_completion.choices[0].message.content
print(reply)

```
{
  "title" : "Roasted Bell Pepper and Apricot Soup",
  "directions" : [
    "Preheat the oven to 400°F (200°C). Place the bell peppers on a baking sheet, drizzle with 2 tablespoons of vegetable oil, and season with salt and pepper. Roast the peppers in the oven for about 45 minutes, or until the skin is blistered and charred.",
    "Remove the peppers from the oven and let them cool. Once cool enough to handle, peel off the skin, discarding it, and place the peppers in a blender or food processor. Add the onion, tomato juice, and apricots. Blend until smooth.",
    "Heat the olive oil in a large pot over medium heat. Add the blended pepper mixture and potatoes. Bring to a simmer and cook for about 20-25 minutes, or until the potatoes are tender.",
    "Use an immersion blender to puree the soup until smooth. Alternatively, you can transfer it to a blender and blend until smooth, then return it to the pot. Taste and adjust the seasoning as needed. Serve warm or at room temperature.

In [15]:
new_recipe = reply[reply.find('```')+3:reply.rfind('```')]
new_recipe = json.loads(new_recipe)
new_recipe['ingredients'] = random_ingredients_list

new_recipes = pd.read_json('new_recipes.json')

new_recipe = pd.DataFrame([new_recipe])
new_recipes = pd.concat([new_recipes, new_recipe], ignore_index=True)
new_recipes.to_json('new_recipes.json')

new_recipes.head()

Unnamed: 0,title,directions,ingredients
0,Shrimp with Apricot Glaze and Salt,"[Preheat oven to 400°F (200°C)., In a small bo...","[2 tbsp/20 g finely chopped red onion, 3 table..."
1,Roasted Root Vegetables with Coriander and Salt,"[Preheat the oven to 425°F (220°C)., Peel the ...","[1 1/2 teaspoons kosher salt, 2 teaspoons grou..."
2,Roasted Bell Pepper and Apricot Soup,[Preheat the oven to 400°F (200°C). Place the ...,"1 cup tomato juice, 1/2 cup extra-virgin olive..."
3,Roasted Bell Pepper and Apricot Soup,[Preheat the oven to 400°F (200°C). Place the ...,"[1 cup tomato juice, 1/2 cup extra-virgin oliv..."


In [16]:
new_recipe

Unnamed: 0,title,directions,ingredients
0,Roasted Bell Pepper and Apricot Soup,[Preheat the oven to 400°F (200°C). Place the ...,"[1 cup tomato juice, 1/2 cup extra-virgin oliv..."
