# Modelo de recomendación de recetas

### Librerías necesarias

In [2]:
# Para manipulacion de datos
import ast
import numpy as np
import pandas as pd

# Para preparar datos
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical

# Para la construccion del modelo
from keras.layers import Dense, Embedding, Flatten 
from keras.models import Sequential

# Para la evaluacion del modelo
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from collections import Counter


### Data inicial

In [3]:
recipes = pd.read_csv("data/RAW_recipes.csv")
recipes['ingredients'] = recipes['ingredients'].apply(ast.literal_eval)
recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"[ground beef, yellow onions, diced tomatoes, t...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","[spreadable cheese with garlic and herbs, new ...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"[tomato juice, apple cider vinegar, sugar, sal...",8


### Preparación de datos

In [4]:

ingredients = ['apple', 'banana', 'orange', 'tomato', 'carrot', 'bread', 'cheese', 'mango', 'broccoli', 'grape', 'lemmon', 'pineapple']

# Filtrando las recetas que contengan al menos 1 ingredieente de la lista o mas
recipes = recipes[recipes['ingredients'].apply(lambda x: any([k in x for k in ingredients]))]
len(recipes)

18122

In [5]:
# Filtrando receptas que contengan 20 ingredientes o menos
max_len = 20
recipes = recipes[recipes['ingredients'].apply(lambda x: all(len(i) <= max_len for i in x))]
len(recipes)

11191

In [6]:
# Representación binaria de los ingredientes
mlb = MultiLabelBinarizer()
ingredients_presence = mlb.fit_transform(recipes['ingredients'])

# Tokenizacion de los ingredientes
tokenizer = Tokenizer()
tokenizer.fit_on_texts(recipes['ingredients'])
sequences = tokenizer.texts_to_sequences(recipes['ingredients'])
padded_sequences = pad_sequences(sequences)


In [7]:
# Encoding del target (ID de la receta)
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(recipes['id'])
one_hot_labels = to_categorical(encoded_labels)

In [8]:
# Hiperparametros del modelo
vocab_size = len(tokenizer.word_index)+1
embedding_dim = 50
max_length = padded_sequences.shape[1]
recipes_q = recipes['id'].nunique()

# Construccion del modelo
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))
model.add(Flatten())
model.add(Dense(120, activation='relu'))
model.add(Dense(recipes_q, activation='softmax'))

# Compilacion del modelo
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Entrenamiento del modelo
history = model.fit(padded_sequences, one_hot_labels, epochs=25, verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [9]:
recipes

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...",6
16,chile rellenos,43026,45,52268,2002-10-14,"['60-minutes-or-less', 'time-to-make', 'course...","[94.0, 10.0, 0.0, 11.0, 11.0, 21.0, 0.0]",9,"['drain green chiles', 'sprinkle cornstarch on...",a favorite from a local restaurant no longer i...,"[egg roll wrap, whole green chilies, cheese, c...",5
23,deep fried dessert thingys,107699,20,158966,2005-01-05,"['30-minutes-or-less', 'time-to-make', 'course...","[1663.3, 221.0, 168.0, 66.0, 19.0, 158.0, 29.0]",20,"['in a large bowl , mix flour , granulated sug...",my mother used to make this for us as a specia...,"[all-purpose flour, granulated sugar, baking p...",13
63,more more apple pear jigglers,63593,125,47858,2003-06-02,"['weeknight', 'time-to-make', 'main-ingredient...","[22.9, 1.0, 5.0, 0.0, 1.0, 3.0, 0.0]",6,"['in a food processor or blender , process the...",it's a fun finger food for infants and toddler...,"[unflavored gelatin, milk, cream cheese, yogur...",6
66,my muffuletta sandwich,78655,20,12875,2003-12-12,"['30-minutes-or-less', 'time-to-make', 'course...","[181.1, 26.0, 6.0, 17.0, 2.0, 11.0, 2.0]",3,"['mix everything in food processor', 'chop fin...",watched a documentary about the,"[ciabatta, provolone cheese, genoa salami, ham...",17
...,...,...,...,...,...,...,...,...,...,...,...,...
231454,zucchini with pine nuts and orange,27752,10,29956,2002-05-08,"['15-minutes-or-less', 'time-to-make', 'course...","[73.7, 8.0, 13.0, 0.0, 2.0, 3.0, 1.0]",1,['fry onions in oil for 1 minute then add zucc...,a great way to use up zucchini (which can be a...,"[spring onions, olive oil, zucchini, pine nuts...",8
231509,zucchini banana pumpkin bread,52888,60,3288,2003-02-02,"['60-minutes-or-less', 'time-to-make', 'main-i...","[2306.0, 96.0, 890.0, 105.0, 75.0, 185.0, 133.0]",8,['prepare bread pan by spraying with cooking s...,this is a moist bread that could easily be fro...,"[flour, baking powder, cinnamon, salt, sugar, ...",14
231510,zucchini beef bake scd,434460,65,600072,2010-08-06,"['time-to-make', 'course', 'main-ingredient', ...","[570.5, 58.0, 38.0, 65.0, 73.0, 81.0, 7.0]",11,"['saut&eacute', 'zucchini and mushrooms in oil...",source: www.scdrecipe.com/recipes/print/268/\r...,"[zucchini, ground beef, onion, sliced mushroom...",11
231519,zucchini carrot muffins,287490,40,227586,2008-02-21,"['60-minutes-or-less', 'time-to-make', 'course...","[382.2, 30.0, 78.0, 13.0, 14.0, 13.0, 15.0]",9,"['preheat oven to 350f spray the muffin tins',...","got this recipe from from family circle, march...","[all-purpose flour, baking soda, baking powder...",13


In [19]:
try_ingredients = ['flour', 'baking powder', 'cinnamon', 'salt', 'sugar', 'evaporated milk', 'pumpkin', 'brown sugar', 'banana', 'zucchini', 'butter', 'egg', 'vanilla extract', 'water']
try_ingredients_sequence = tokenizer.texts_to_sequences([try_ingredients])
try_ingredients_padded = pad_sequences(try_ingredients_sequence, maxlen=padded_sequences.shape[1])

predictions = model.predict(try_ingredients_padded)
predicted_recipe_id = label_encoder.inverse_transform([np.argmax(predictions)])

print(predicted_recipe_id)

# Top 3 de recetas mas probables
top_3 = predictions.argsort()[0][-3:][::-1]
for i in top_3:
    print( recipes[recipes['id'] == label_encoder.inverse_transform([i])[0]]['id'].values[0], recipes[recipes['id'] == label_encoder.inverse_transform([i])[0]]['name'].values[0])

[52888]
52888 zucchini banana pumpkin bread
348104 banana cinnamon pancakes with butter rum sauce
185021 morning sunshine bars


In [11]:
try_ingredients_2 = ['tomato', 'cheese', 'pasta']

try_ingredients_sequence = tokenizer.texts_to_sequences([try_ingredients_2])
try_ingredients_padded = pad_sequences(try_ingredients_sequence, maxlen=padded_sequences.shape[1])

predictions = model.predict(try_ingredients_padded)
predicted_recipe_id = label_encoder.inverse_transform([np.argmax(predictions)])

print(predicted_recipe_id)

predictions

[131329]


array([[2.8891945e-05, 2.1380999e-06, 1.2603592e-10, ..., 2.5457652e-08,
        2.1772778e-06, 1.1688997e-05]], dtype=float32)