# Recipe Content-Based Recommender

## Imports and Downloads

In [None]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
!gdown 1zvb2c8buI52xHa-rYJuoRthWx1gFApPD
!unzip growell-recommendation-dataset.zip

Downloading...
From: https://drive.google.com/uc?id=1zvb2c8buI52xHa-rYJuoRthWx1gFApPD
To: /content/growell-recommendation-dataset.zip
  0% 0.00/4.88k [00:00<?, ?B/s]100% 4.88k/4.88k [00:00<00:00, 14.0MB/s]
Archive:  growell-recommendation-dataset.zip
  inflating: recipes.csv             
  inflating: recipes_ingredients.csv  
  inflating: ingredients.csv         


In [None]:
recipes = pd.read_csv('recipes.csv')
ingredients = pd.read_csv('ingredients.csv')
ingredients_recipes = pd.read_csv('recipes_ingredients.csv')

In [None]:
recipes.head()

Unnamed: 0,id,name,target_age,estimated_time,ingredients
0,12893e1a-1d30-47fb-a6ee-8c46e034b437,Sup Sayur Bayam dan Tomat,6,30,wortel|tomat
1,256e140d-b6fd-4729-8c5f-92a2cc949d21,Chicken Bone Broth,2,30,ayam|wortel
2,3b66fb82-5ede-4f0f-821b-d5da4d78db5c,Tempe Goreng dengan Sayuran Wortel dan Brokoli,13,30,wortel|brokoli|tempe
3,4ae654db-16bb-43da-8f3e-12a9308f4dbe,Tumis Ayam dengan Tomat dan Tempe,13,30,ayam|tomat|tempe
4,4e921465-bc2d-4f6c-83f6-3ccd60153e40,Mashed Avocado dan Kentang Panggang,12,60,kentang


In [None]:
ingredients.head()

Unnamed: 0,id,name,calories,picture
0,02c3c4e5-666f-44e3-a104-5d1c215c0dfc,ayam,133,
1,0803dbb8-3f92-4e68-add6-eb56e8dfe872,wortel,48,
2,3f02f9e7-e344-43fc-b74e-763a352842e6,kentang,73,
3,4393be63-9656-49a0-a974-dd8a088172f8,tomat,21,
4,763e08e2-f2f6-4f9b-ab08-a7834b4eabc1,daging sapi,243,


In [None]:
ingredients_recipes.head()

Unnamed: 0,A,B
0,0803dbb8-3f92-4e68-add6-eb56e8dfe872,12893e1a-1d30-47fb-a6ee-8c46e034b437
1,4393be63-9656-49a0-a974-dd8a088172f8,12893e1a-1d30-47fb-a6ee-8c46e034b437
2,02c3c4e5-666f-44e3-a104-5d1c215c0dfc,256e140d-b6fd-4729-8c5f-92a2cc949d21
3,0803dbb8-3f92-4e68-add6-eb56e8dfe872,256e140d-b6fd-4729-8c5f-92a2cc949d21
4,0803dbb8-3f92-4e68-add6-eb56e8dfe872,3b66fb82-5ede-4f0f-821b-d5da4d78db5c


## Preprocessing

### Convert Ingredient-Recipe Relation to Dictionary

In [None]:
ingredients_map = {}

for ingredient, recipe in ingredients_recipes.to_numpy():
  ingredient_name = ingredients[ingredients['id'] == ingredient]['name'].values[0]
  ingredients_map[recipe] = ingredient_name if recipe not in ingredients_map else ingredients_map[recipe] + '|' + ingredient_name

ingredients_map

{'12893e1a-1d30-47fb-a6ee-8c46e034b437': 'wortel|tomat',
 '256e140d-b6fd-4729-8c5f-92a2cc949d21': 'ayam|wortel',
 '3b66fb82-5ede-4f0f-821b-d5da4d78db5c': 'wortel|brokoli|tempe',
 '4ae654db-16bb-43da-8f3e-12a9308f4dbe': 'ayam|tomat|tempe',
 '4e921465-bc2d-4f6c-83f6-3ccd60153e40': 'kentang',
 '5a5d6c25-dd72-4a5e-a27a-69f4ed1cc04f': 'jagung|brokoli',
 '70c7509e-7a39-4949-9f87-b84f0edaa8d0': 'ayam|wortel',
 '7c7b8c15-0f96-4da1-88ff-172174b0ecbc': 'wortel|telur|brokoli',
 '810f4de1-e7cb-4395-8545-636df5ab433e': 'daging sapi|jagung',
 '83387a4d-50ee-4d9a-9a93-86532a652e1c': 'wortel|kentang|brokoli',
 '8f8a4951-bb15-4478-b533-80f11b0c07de': 'wortel|tomat',
 'aa68742d-9042-4a4d-8837-37eca799a775': 'kentang|telur',
 'b4f306f9-cf8d-47c0-ba57-2dfef0db1d78': 'wortel|daging sapi|jagung',
 'bac3b441-e982-436e-9181-9538c6b45523': 'wortel|ikan|brokoli',
 'bfd4acec-af8d-4d70-90d6-8309bbbbee8f': 'tomat|brokoli',
 'c15d53fd-6e4c-4209-bde5-4f95d111ed8c': 'ayam',
 'd484cbe8-84e0-496f-9dcf-b35ee577746a': 'w

### Apply Dictionary to DataFrame and Remove Unnecessary Columns

In [None]:
recipes['ingredients'] = recipes['id'].map(ingredients_map)
recipes.drop(['rating', 'picture', 'how_to', 'tools', 'description', 'created_at', 'updated_at'], axis = 1, inplace = True)

In [None]:
recipes['ingredients'].str.get_dummies('|').info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   ayam         22 non-null     int64
 1   brokoli      22 non-null     int64
 2   daging sapi  22 non-null     int64
 3   ikan         22 non-null     int64
 4   jagung       22 non-null     int64
 5   kentang      22 non-null     int64
 6   telur        22 non-null     int64
 7   tempe        22 non-null     int64
 8   tomat        22 non-null     int64
 9   wortel       22 non-null     int64
dtypes: int64(10)
memory usage: 1.8 KB


## Cosine Similarity

In [None]:
data = recipes.copy()
data.head()

Unnamed: 0,id,name,target_age,estimated_time,ingredients
0,12893e1a-1d30-47fb-a6ee-8c46e034b437,Sup Sayur Bayam dan Tomat,6,30,wortel|tomat
1,256e140d-b6fd-4729-8c5f-92a2cc949d21,Chicken Bone Broth,2,30,ayam|wortel
2,3b66fb82-5ede-4f0f-821b-d5da4d78db5c,Tempe Goreng dengan Sayuran Wortel dan Brokoli,13,30,wortel|brokoli|tempe
3,4ae654db-16bb-43da-8f3e-12a9308f4dbe,Tumis Ayam dengan Tomat dan Tempe,13,30,ayam|tomat|tempe
4,4e921465-bc2d-4f6c-83f6-3ccd60153e40,Mashed Avocado dan Kentang Panggang,12,60,kentang


### Get TF-IDF Based On Recipe Ingredients

In [None]:
def ingredients_tokenizer(ingredients):
  return ingredients.split('|')

In [None]:
tf_vectorizer = TfidfVectorizer(tokenizer=ingredients_tokenizer, token_pattern=None)
tf_vectorizer.fit(data['ingredients'])
tf_vectorizer.get_feature_names_out()

array(['ayam', 'brokoli', 'daging sapi', 'ikan', 'jagung', 'kentang',
       'telur', 'tempe', 'tomat', 'wortel'], dtype=object)

In [None]:
tfidf_matrix = tf_vectorizer.fit_transform(data['ingredients'])
tfidf_matrix.shape

(22, 10)

In [None]:
pd.DataFrame(
    tfidf_matrix.todense(),
    columns=tf_vectorizer.get_feature_names_out(),
    index=data.name
).sample(10, axis=1).sample(22, axis=0)

Unnamed: 0_level_0,brokoli,ayam,wortel,telur,tempe,daging sapi,jagung,kentang,ikan,tomat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Mashed Avocado dan Kentang Panggang,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
Sup Ikan dan Kentang Wortel,0.0,0.0,0.371149,0.0,0.0,0.0,0.0,0.543064,0.753213,0.0
Sup Labu dan Ayam,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Kaldu ayam untuk mpasi,0.0,0.0,0.50968,0.860364,0.0,0.0,0.0,0.0,0.0,0.0
Salad Tomat dan Brokoli,0.659464,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.751736
Puree Buah,0.0,0.0,0.538147,0.0,0.0,0.0,0.0,0.0,0.0,0.842851
Tumis Ayam dengan Tomat dan Tempe,0.0,0.544256,0.0,0.0,0.638412,0.0,0.0,0.0,0.0,0.544256
Chicken Bone Broth,0.0,0.842851,0.538147,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Tim Ayam, Nasi, wortel",0.0,0.0,0.50968,0.860364,0.0,0.0,0.0,0.0,0.0,0.0
Sup Jagung Wortel dan Daging Sapi,0.0,0.0,0.343125,0.0,0.0,0.696341,0.630377,0.0,0.0,0.0


### Combine Ingredients Data with Estimated Time and Target Age

In [None]:
estimated_time_arr = data['estimated_time'].values
min_estimated_time = np.min(estimated_time_arr)
max_estimated_time = np.max(estimated_time_arr)
normalized_estimated_time = data['estimated_time'].map(lambda x: (x - min_estimated_time) / (max_estimated_time - min_estimated_time)).values

In [None]:
target_age_arr = data['target_age'].values
min_target_age = np.min(target_age_arr)
max_target_age = np.max(target_age_arr)
normalized_target_age = data['target_age'].map(lambda x: (x - min_target_age) / (max_target_age - min_target_age)).values

In [None]:
estimated_time_column = normalized_estimated_time.reshape(-1, 1)
target_age_column = normalized_target_age.reshape(-1, 1)

In [None]:
tfidf_matrix.todense().shape

(22, 10)

In [None]:
feature_matrix = np.concatenate([tfidf_matrix.todense(), estimated_time_column, target_age_column], axis=1)
feature_matrix.shape

(22, 12)

In [None]:
feature_columns = np.concatenate([tf_vectorizer.get_feature_names_out(), np.array(['estimated_time', 'target_age'])])

In [None]:
pd.DataFrame(
    np.asarray(feature_matrix),
    columns=feature_columns,
    index=data.name
).sample(11, axis=1).sample(22, axis=0)

Unnamed: 0_level_0,daging sapi,brokoli,tempe,tomat,telur,estimated_time,wortel,ikan,target_age,ayam,kentang
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Chicken Bone Broth,0.0,0.0,0.0,0.0,0.0,0.0,0.538147,0.0,0.0,0.842851,0.0
"Tim Ayam, Nasi, wortel",0.0,0.0,0.0,0.0,0.860364,1.0,0.50968,0.0,0.0,0.0,0.0
Telur Rebus dengan Wortel dan Brokoli,0.0,0.573618,0.0,0.0,0.704744,0.0,0.417491,0.0,0.545455,0.0,0.0
Puree Jagung dan Brokoli,0.0,0.598909,0.0,0.0,0.0,0.0,0.0,0.0,0.363636,0.0,0.0
Tumis Ayam dengan Tomat dan Tempe,0.0,0.0,0.638412,0.544256,0.0,0.0,0.0,0.0,1.0,0.544256,0.0
Telur Dadar dengan Kentang,0.0,0.0,0.0,0.0,0.755639,0.0,0.0,0.0,0.545455,0.0,0.654988
Sup Labu dan Ayam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.363636,1.0,0.0
Bubur Ayam dan Ketang,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.363636,0.730729,0.682668
Sup Jagung Wortel dan Daging Sapi,0.696341,0.0,0.0,0.0,0.0,0.0,0.343125,0.0,0.363636,0.0,0.0
Sup Sayur Bayam dan Tomat,0.0,0.0,0.0,0.842851,0.0,0.0,0.538147,0.0,0.363636,0.0,0.0


### Apply Cosine Similarity

In [None]:
cosine_sim = cosine_similarity(np.asarray(feature_matrix))
cosine_sim.shape

(22, 22)

In [None]:
cosine_sim_df = pd.DataFrame(cosine_sim, index=data['name'], columns=data['name'])
print('Shape:', cosine_sim_df.shape)

cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

Shape: (22, 22)


name,Telur Rebus dengan Wortel dan Brokoli,Sup Sayur Bayam dan Tomat,Tempe Goreng dengan Sayuran Wortel dan Brokoli,Sup Ikan dan Kentang Wortel,Sup Jagung Wortel dan Daging Sapi
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Sup Sayur Bayam dan Tomat,0.349007,1.0,0.384548,0.293194,0.279875
"Tim Ayam, Nasi, wortel",0.508484,0.18227,0.101831,0.125708,0.116216
Puree Buah,0.279716,0.985767,0.276092,0.245813,0.231868
Puree Jagung dan Brokoli,0.447083,0.116788,0.460156,0.116788,0.562648
Chicken Bone Broth,0.197238,0.272166,0.152053,0.187707,0.173534
Sup Jagung Wortel dan Daging Sapi,0.281833,0.279875,0.332762,0.229266,1.0
Telur Rebus dengan Wortel dan Brokoli,1.0,0.349007,0.637653,0.291485,0.281833
Puree Tempe dan Tomat,0.08566,0.56673,0.515144,0.061133,0.061133
Daging Sapi Panggang dengan Jagung,0.3386,0.241649,0.5,0.241649,0.865839
Tumis Ayam dengan Tomat dan Tempe,0.3386,0.546489,0.734331,0.241649,0.241649


## Prediction

### Use Cosine Similarity Result to Get Recommendations

In [None]:
items = data.iloc[:, 1:]
items

Unnamed: 0,name,target_age,estimated_time,ingredients
0,Sup Sayur Bayam dan Tomat,6,30,wortel|tomat
1,Chicken Bone Broth,2,30,ayam|wortel
2,Tempe Goreng dengan Sayuran Wortel dan Brokoli,13,30,wortel|brokoli|tempe
3,Tumis Ayam dengan Tomat dan Tempe,13,30,ayam|tomat|tempe
4,Mashed Avocado dan Kentang Panggang,12,60,kentang
5,Puree Jagung dan Brokoli,6,30,jagung|brokoli
6,Puree Sayuran dan Ayam,6,30,ayam|wortel
7,Telur Rebus dengan Wortel dan Brokoli,8,30,wortel|telur|brokoli
8,Daging Sapi Panggang dengan Jagung,13,30,daging sapi|jagung
9,Nasi Tim Sayur,12,30,wortel|kentang|brokoli


In [None]:
def recipe_recommendations(recipe_name, similarity_data=cosine_sim_df, items=items, k=5):
    index = similarity_data.loc[:,recipe_name].to_numpy().argpartition(range(-1, -k, -1))
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    closest = closest.drop(recipe_name, errors='ignore')
    return pd.DataFrame(closest).merge(items).head(k)

In [None]:
sample = items.name.sample(1).iloc[0]
items[items['name'] == sample]

Unnamed: 0,name,target_age,estimated_time,ingredients
1,Chicken Bone Broth,2,30,ayam|wortel


In [None]:
recipe_recommendations(sample)

Unnamed: 0,name,target_age,estimated_time,ingredients
0,Puree Sayuran dan Ayam,6,30,ayam|wortel
1,Sup Labu dan Ayam,6,30,ayam
2,Bubur Ayam dan Ketang,6,30,ayam|kentang
3,Tumis Ayam dengan Tomat dan Tempe,13,30,ayam|tomat|tempe
4,Puree Buah,4,30,wortel|tomat


## Export to CSV

In [None]:
data.to_csv('recipes_data.csv')
cosine_sim_df.to_csv('recipe_similarity.csv')

## Load Similarity Data

### Imports

In [None]:
import pandas as pd
import numpy as np

### Load CSV

In [None]:
recipes_df = pd.read_csv('recipes_data.csv')
sim_df = pd.read_csv('recipe_similarity.csv')
sim_df = pd.DataFrame(sim_df.values[:, 1:], index=recipes_df['id'], columns=recipes_df['id'])
# sim_df.sample(5, axis=1).sample(10, axis=0)

id,7c7b8c15-0f96-4da1-88ff-172174b0ecbc,8f8a4951-bb15-4478-b533-80f11b0c07de,f98d8fbe-c94f-487f-934b-6ba433c67ff6,810f4de1-e7cb-4395-8545-636df5ab433e,c15d53fd-6e4c-4209-bde5-4f95d111ed8c
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
810f4de1-e7cb-4395-8545-636df5ab433e,0.3386,0.126491,0.241649,1.0,0.241649
b4f306f9-cf8d-47c0-ba57-2dfef0db1d78,0.281833,0.231868,0.116788,0.865839,0.116788
bac3b441-e982-436e-9181-9538c6b45523,0.621345,0.267934,0.241649,0.5,0.241649
4e921465-bc2d-4f6c-83f6-3ccd60153e40,0.312739,0.11683,0.684101,0.461812,0.223193
d8d86b7d-a178-49c9-b2e4-104626774b58,0.508484,0.190819,0.0,0.0,0.0
e0715743-d18b-41e7-b290-9d1d4b8e4686,0.291485,0.245813,0.444224,0.241649,0.116788
aa68742d-9042-4a4d-8837-37eca799a775,0.639722,0.08566,0.532552,0.3386,0.163644
ee35eb4e-ac5a-4534-b47a-602bc760ab5c,0.08566,0.561309,0.061133,0.126491,0.061133
8f8a4951-bb15-4478-b533-80f11b0c07de,0.279716,1.0,0.061133,0.126491,0.061133
d484cbe8-84e0-496f-9dcf-b35ee577746a,0.759505,0.284946,0.03094,0.064018,0.03094


In [None]:
def recipe_recommendations(recipe_id, similarity_data, k=5):
    index = similarity_data.loc[:,recipe_id].to_numpy().argpartition(range(-1, -k, -1))
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    closest = closest.drop(recipe_id, errors='ignore')
    return pd.DataFrame(closest)

In [None]:
# Index 0 is the most similar, index 1 is the second most similar, etc.
print(recipe_recommendations('810f4de1-e7cb-4395-8545-636df5ab433e', sim_df).values.reshape(-1))

['b4f306f9-cf8d-47c0-ba57-2dfef0db1d78'
 '5a5d6c25-dd72-4a5e-a27a-69f4ed1cc04f'
 '3b66fb82-5ede-4f0f-821b-d5da4d78db5c'
 '4ae654db-16bb-43da-8f3e-12a9308f4dbe'
 'bfd4acec-af8d-4d70-90d6-8309bbbbee8f']
