In [10]:
import pandas as pd
import numpy as np
import torch

In [11]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    runs_on_gpu = True
else:
    device = torch.device('cpu')
    runs_on_gpu = False
print(f"Runs on {device.type}")

Runs on cpu


In [12]:
data = pd.read_json('preprocessed_recipes.json')
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 20100 entries, 0 to 20099
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   directions       20100 non-null  object        
 1   fat              15901 non-null  float64       
 2   date             20100 non-null  datetime64[ns]
 3   categories       20100 non-null  object        
 4   calories         15969 non-null  float64       
 5   desc             20100 non-null  object        
 6   protein          15922 non-null  float64       
 7   rating           20100 non-null  float64       
 8   title            20100 non-null  object        
 9   ingredients      20100 non-null  object        
 10  sodium           15967 non-null  float64       
 11  num_categories   20100 non-null  int64         
 12  num_ingredients  20100 non-null  int64         
 13  num_directions   20100 non-null  int64         
dtypes: datetime64[ns](1), float64(5), int64(3),

In [13]:
data.head()

Unnamed: 0,directions,fat,date,categories,calories,desc,protein,rating,title,ingredients,sodium,num_categories,num_ingredients,num_directions
0,"1. Place the stock, lentils, celery, carrot, t...",-0.016601,2006-09-01 04:00:00,"[Sandwich, Bean, Fruit, Tomato, turkey, Vegeta...",-0.016407,,-0.018242,2.5,"Lentil, Apple, and Turkey Wrap","[4 cups low-sodium vegetable or chicken stock,...",-0.016984,11,15,3
1,Combine first 9 ingredients in heavy medium sa...,-0.015818,2004-08-20 04:00:00,"[Food Processor, Onion, Pork, Bake, Bastille D...",-0.016471,This uses the same ingredients found in boudin...,-0.02137,4.375,Boudin Blanc Terrine with Red Onion Confit,"[1 1/2 cups whipping cream, 2 medium onions, c...",-0.014341,11,28,5
2,In a large heavy saucepan cook diced fennel an...,-0.016601,2004-08-20 04:00:00,"[Soup/Stew, Dairy, Potato, Vegetable, Fennel, ...",-0.017135,,-0.024498,3.75,Potato and Fennel Soup Hodge,"[1 fennel bulb (sometimes called anise), stalk...",-0.018168,7,6,2
3,Heat oil in heavy large skillet over medium-hi...,,2009-03-27 04:00:00,"[Fish, Olive, Tomato, Sauté, Low Fat, Low Cal,...",,The Sicilian-style tomato sauce has tons of Me...,,5.0,Mahi-Mahi in Tomato Olive Sauce,"[2 tablespoons extra-virgin olive oil, 1 cup c...",,17,10,2
4,Preheat oven to 350°F. Lightly grease 8x8x2-in...,-0.015378,2004-08-20 04:00:00,"[Cheese, Dairy, Pasta, Vegetable, Side, Bake, ...",-0.01607,,-0.020849,3.125,Spinach Noodle Casserole,"[1 12-ounce package frozen spinach soufflé, th...",-0.017306,11,6,1


## 4
### Extension
Generate new recipes based on random ingredients

In [14]:
number_of_ingredients = np.random.randint(4, 8)
random_ingredients_list = []
for i in range(number_of_ingredients):
    ingredients = data['ingredients'].sample().iloc[0]
    if len(ingredients) == 0:
        continue
    random_ingredient = ingredients[np.random.randint(0, len(ingredients))]
    random_ingredients_list.append(random_ingredient)

random_ingredients = ', '.join(random_ingredients_list)
print(random_ingredients_list)

['1 3/4 cups Pearl Oyster Bar Tartar Sauce', '6 tablespoons unsweetened cocoa powder', '8 garlic cloves, chopped', '3 garlic cloves, minced']


In [16]:
prompt = f'I have following ingredients: {random_ingredients}. Give title and directions for a recipe. Your answer is a json-file wrapped in ```. The json looks like this: {{ "title" : "title", "directions" : ["direction1","direction2"] }}'
prompt

'I have following ingredients: 1 3/4 cups Pearl Oyster Bar Tartar Sauce, 6 tablespoons unsweetened cocoa powder, 8 garlic cloves, chopped, 3 garlic cloves, minced. Give title and directions for a recipe. Your answer is a json-file wrapped in ```. The json looks like this: { "title" : "title", "directions" : ["direction1","direction2"] }'

### 4.1
Fine tune transformer to give directions for random ingredients

Tutorial from: https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb

In [17]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [18]:
tokenizer = AutoTokenizer.from_pretrained('t5-small')

In [22]:
model = AutoModelForSeq2SeqLM.from_pretrained('google/t5-v1_1-xl')
model.to(device)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


T5ForConditionalGeneration(
  (shared): Embedding(32128, 2048)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 2048)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=2048, out_features=2048, bias=False)
              (k): Linear(in_features=2048, out_features=2048, bias=False)
              (v): Linear(in_features=2048, out_features=2048, bias=False)
              (o): Linear(in_features=2048, out_features=2048, bias=False)
              (relative_attention_bias): Embedding(32, 32)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=2048, out_features=5120, bias=False)
              (wi_1): Linear(in_features=2048, out_features=5120, bias=False)
       

In [23]:
inputs = tokenizer(prompt, return_tensors='pt', max_length=512, truncation=True, padding='max_length').to(device)
outputs = model.generate(**inputs)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(result)

minced, 1 cup water, 1 cup milk, 1 cup milk, 1 cup water,


### 4.2
Compare to directions and title made by groq

In [None]:
from groq import Groq
import os
from dotenv import load_dotenv
import json

In [None]:
load_dotenv()

API_KEY = os.getenv('API_KEY')

client = Groq(
    api_key=API_KEY
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="llama3-8b-8192",
)
reply = chat_completion.choices[0].message.content
print(reply)

In [None]:
new_recipe = reply[reply.find('```')+3:reply.rfind('```')]
new_recipe = json.loads(new_recipe)
new_recipe['ingredients'] = random_ingredients_list

new_recipes = pd.read_json('new_recipes.json')

new_recipe = pd.DataFrame([new_recipe])
new_recipes = pd.concat([new_recipes, new_recipe], ignore_index=True)
new_recipes.to_json('new_recipes.json')

new_recipes.head()

In [None]:
new_recipe