# Dataset Preparation
Author: Shiyi Wang

In [1]:
import pandas as pd
import numpy as np
from collections import Counter

Load two datasets

In [2]:
interactions = pd.read_csv('../data/interactions.csv')
recipes = pd.read_csv('../data/recipes.csv')

Merge two datasets

In [3]:
recipes = recipes.rename(columns={"id": "recipe_id"})
data = interactions.join(recipes.set_index('recipe_id'), on='recipe_id')

Add review count column

In [4]:
def setReviewCountCol(series, name):
    counts = dict(Counter(series))
    return pd.DataFrame.from_dict(counts, orient='index').reset_index().rename(columns={'index': name, 0: f'{name}_count'})

In [5]:
recipe_id_count = setReviewCountCol(interactions.recipe_id,'recipe_id')

Merge with dataset

In [6]:
data = data.merge(recipe_id_count, how='left',
                        left_on='recipe_id', right_on='recipe_id')
data

Unnamed: 0,user_id,recipe_id,date,rating,review,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,recipe_id_count
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...,white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9,2
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall...",white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9,2
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...,devilicious cookie cake delights,20,56824,2002-10-27,"['30-minutes-or-less', 'time-to-make', 'course...","[132.3, 11.0, 39.0, 5.0, 4.0, 11.0, 5.0]",5,"['blend together cake mix , oil and eggs', 'ad...",,"[""devil's food cake mix"", 'vegetable oil', 'eg...",4,1
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...,baked potato toppings,10,64342,2004-02-25,"['15-minutes-or-less', 'time-to-make', 'course...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13,2
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin...",baked potato toppings,10,64342,2004-02-25,"['15-minutes-or-less', 'time-to-make', 'course...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132362,116593,72730,2003-12-09,0,Another approach is to start making sauce with...,cranberry peach maple relish,22,103078,2003-10-06,"['30-minutes-or-less', 'time-to-make', 'course...","[1941.7, 3.0, 1569.0, 3.0, 10.0, 1.0, 154.0]",6,"['blend peaches until coarse', 'place in sauce...",great simple alternative to regular cranberry ...,"['peaches', 'brown sugar', 'allspice', 'maple ...",6,1
1132363,583662,386618,2009-09-29,5,These were so delicious! My husband and I tru...,stacey e s yummy veggie burgers,21,742029,2009-08-24,"['30-minutes-or-less', 'time-to-make', 'course...","[124.7, 1.0, 11.0, 24.0, 10.0, 0.0, 8.0]",9,['microwave carrot slices in 1 / 4 inch water ...,my friend stacey significantly modified a reci...,"['carrot', 'garbanzo beans', 'salsa', 'corn fl...",8,1
1132364,157126,78003,2008-06-23,5,WOW! Sometimes I don't take the time to rate ...,pot roast with port stove top,115,108291,2003-12-05,"['weeknight', 'time-to-make', 'course', 'prepa...","[828.2, 87.0, 22.0, 20.0, 93.0, 112.0, 4.0]",8,"['in a large pan , heat oil and brown roast on...",this is a recipe from the frugal gourmet cooki...,"['boneless beef chuck roast', 'olive oil', 'ta...",10,4
1132365,53932,78003,2009-01-11,4,Very good! I used regular port as well. The ...,pot roast with port stove top,115,108291,2003-12-05,"['weeknight', 'time-to-make', 'course', 'prepa...","[828.2, 87.0, 22.0, 20.0, 93.0, 112.0, 4.0]",8,"['in a large pan , heat oil and brown roast on...",this is a recipe from the frugal gourmet cooki...,"['boneless beef chuck roast', 'olive oil', 'ta...",10,4


Remove those with only one review

In [7]:
data_filtered = data[data['recipe_id_count'] > 1]
data_filtered

Unnamed: 0,user_id,recipe_id,date,rating,review,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,recipe_id_count
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...,white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9,2
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall...",white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9,2
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...,baked potato toppings,10,64342,2004-02-25,"['15-minutes-or-less', 'time-to-make', 'course...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13,2
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin...",baked potato toppings,10,64342,2004-02-25,"['15-minutes-or-less', 'time-to-make', 'course...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13,2
5,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,sugared raspberries,10,37449,2005-05-02,"['15-minutes-or-less', 'time-to-make', 'course...","[838.0, 1.0, 820.0, 0.0, 2.0, 0.0, 71.0]",6,"['carefully pick over the berries , removing l...",here's an old method for preserving fruit with...,"['raspberries', 'granulated sugar']",2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132360,2002357020,82303,2018-12-05,5,Delicious quick thick chocolate sauce with ing...,easy microwave hot fudge topping,10,106436,2004-01-28,"['15-minutes-or-less', 'time-to-make', 'course...","[177.1, 9.0, 100.0, 3.0, 1.0, 18.0, 10.0]",7,"['mix dry ingredients', 'stir in room temperat...",this is an easy and delicious way to get a cho...,"['sugar', 'cocoa', 'cornstarch', 'salt', 'wate...",7,19
1132361,102526,54493,2007-04-26,0,I am not going to rate this because I did have...,garlic clove chicken,65,49304,2003-02-21,"['weeknight', 'time-to-make', 'course', 'main-...","[338.3, 32.0, 1.0, 7.0, 53.0, 29.0, 2.0]",10,"['preheat oven to 325 degrees', 'spray a large...","garlic and chicken what could be better, the c...","['chicken', 'garlic', 'dry white wine', 'lemon...",6,2
1132364,157126,78003,2008-06-23,5,WOW! Sometimes I don't take the time to rate ...,pot roast with port stove top,115,108291,2003-12-05,"['weeknight', 'time-to-make', 'course', 'prepa...","[828.2, 87.0, 22.0, 20.0, 93.0, 112.0, 4.0]",8,"['in a large pan , heat oil and brown roast on...",this is a recipe from the frugal gourmet cooki...,"['boneless beef chuck roast', 'olive oil', 'ta...",10,4
1132365,53932,78003,2009-01-11,4,Very good! I used regular port as well. The ...,pot roast with port stove top,115,108291,2003-12-05,"['weeknight', 'time-to-make', 'course', 'prepa...","[828.2, 87.0, 22.0, 20.0, 93.0, 112.0, 4.0]",8,"['in a large pan , heat oil and brown roast on...",this is a recipe from the frugal gourmet cooki...,"['boneless beef chuck roast', 'olive oil', 'ta...",10,4


Clean up by dropping `recipe_id_count` column

In [8]:
data_cleaned = data_filtered.drop(['recipe_id_count'], axis=1)
data_cleaned

Unnamed: 0,user_id,recipe_id,date,rating,review,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...,white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall...",white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...,baked potato toppings,10,64342,2004-02-25,"['15-minutes-or-less', 'time-to-make', 'course...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin...",baked potato toppings,10,64342,2004-02-25,"['15-minutes-or-less', 'time-to-make', 'course...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13
5,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,sugared raspberries,10,37449,2005-05-02,"['15-minutes-or-less', 'time-to-make', 'course...","[838.0, 1.0, 820.0, 0.0, 2.0, 0.0, 71.0]",6,"['carefully pick over the berries , removing l...",here's an old method for preserving fruit with...,"['raspberries', 'granulated sugar']",2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132360,2002357020,82303,2018-12-05,5,Delicious quick thick chocolate sauce with ing...,easy microwave hot fudge topping,10,106436,2004-01-28,"['15-minutes-or-less', 'time-to-make', 'course...","[177.1, 9.0, 100.0, 3.0, 1.0, 18.0, 10.0]",7,"['mix dry ingredients', 'stir in room temperat...",this is an easy and delicious way to get a cho...,"['sugar', 'cocoa', 'cornstarch', 'salt', 'wate...",7
1132361,102526,54493,2007-04-26,0,I am not going to rate this because I did have...,garlic clove chicken,65,49304,2003-02-21,"['weeknight', 'time-to-make', 'course', 'main-...","[338.3, 32.0, 1.0, 7.0, 53.0, 29.0, 2.0]",10,"['preheat oven to 325 degrees', 'spray a large...","garlic and chicken what could be better, the c...","['chicken', 'garlic', 'dry white wine', 'lemon...",6
1132364,157126,78003,2008-06-23,5,WOW! Sometimes I don't take the time to rate ...,pot roast with port stove top,115,108291,2003-12-05,"['weeknight', 'time-to-make', 'course', 'prepa...","[828.2, 87.0, 22.0, 20.0, 93.0, 112.0, 4.0]",8,"['in a large pan , heat oil and brown roast on...",this is a recipe from the frugal gourmet cooki...,"['boneless beef chuck roast', 'olive oil', 'ta...",10
1132365,53932,78003,2009-01-11,4,Very good! I used regular port as well. The ...,pot roast with port stove top,115,108291,2003-12-05,"['weeknight', 'time-to-make', 'course', 'prepa...","[828.2, 87.0, 22.0, 20.0, 93.0, 112.0, 4.0]",8,"['in a large pan , heat oil and brown roast on...",this is a recipe from the frugal gourmet cooki...,"['boneless beef chuck roast', 'olive oil', 'ta...",10


Reserialize Recipe and User IDs

In [9]:
def reserialize(values):
    unique_values = np.unique(values)
    return dict([(x, y) for y, x in enumerate(unique_values)])

In [10]:
reserialized_User_ID = reserialize(data_cleaned.user_id)
reserialized_Recipe_ID = reserialize(data_cleaned.recipe_id)

In [11]:
user_ID_converted_dataframe = pd.DataFrame.from_dict(reserialized_User_ID, orient='index').reset_index(
).rename(columns={'index': 'user_id', 0: 'new_user_id'})
recipe_ID_converted_dataframe = pd.DataFrame.from_dict(reserialized_Recipe_ID, orient='index').reset_index(
).rename(columns={'index': 'recipe_id', 0: 'new_recipe_id'})

Integrate new serialization into dataset

In [12]:
data_reserialized = data_cleaned.join(user_ID_converted_dataframe.set_index('user_id'), on='user_id')
data_reserialized = data_reserialized .join(recipe_ID_converted_dataframe.set_index('recipe_id'), on='recipe_id')
data_reserialized

Unnamed: 0,user_id,recipe_id,date,rating,review,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,new_user_id,new_recipe_id
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...,white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9,3787,16642
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall...",white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9,95286,16642
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...,baked potato toppings,10,64342,2004-02-25,"['15-minutes-or-less', 'time-to-make', 'course...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13,14502,34897
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin...",baked potato toppings,10,64342,2004-02-25,"['15-minutes-or-less', 'time-to-make', 'course...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13,6559,34897
5,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,sugared raspberries,10,37449,2005-05-02,"['15-minutes-or-less', 'time-to-make', 'course...","[838.0, 1.0, 820.0, 0.0, 2.0, 0.0, 71.0]",6,"['carefully pick over the berries , removing l...",here's an old method for preserving fruit with...,"['raspberries', 'granulated sugar']",2,5690,49598
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132360,2002357020,82303,2018-12-05,5,Delicious quick thick chocolate sauce with ing...,easy microwave hot fudge topping,10,106436,2004-01-28,"['15-minutes-or-less', 'time-to-make', 'course...","[177.1, 9.0, 100.0, 3.0, 1.0, 18.0, 10.0]",7,"['mix dry ingredients', 'stir in room temperat...",this is an easy and delicious way to get a cho...,"['sugar', 'cocoa', 'cornstarch', 'salt', 'wate...",7,215681,33795
1132361,102526,54493,2007-04-26,0,I am not going to rate this because I did have...,garlic clove chicken,65,49304,2003-02-21,"['weeknight', 'time-to-make', 'course', 'main-...","[338.3, 32.0, 1.0, 7.0, 53.0, 29.0, 2.0]",10,"['preheat oven to 325 degrees', 'spray a large...","garlic and chicken what could be better, the c...","['chicken', 'garlic', 'dry white wine', 'lemon...",6,11621,22415
1132364,157126,78003,2008-06-23,5,WOW! Sometimes I don't take the time to rate ...,pot roast with port stove top,115,108291,2003-12-05,"['weeknight', 'time-to-make', 'course', 'prepa...","[828.2, 87.0, 22.0, 20.0, 93.0, 112.0, 4.0]",8,"['in a large pan , heat oil and brown roast on...",this is a recipe from the frugal gourmet cooki...,"['boneless beef chuck roast', 'olive oil', 'ta...",10,17831,32160
1132365,53932,78003,2009-01-11,4,Very good! I used regular port as well. The ...,pot roast with port stove top,115,108291,2003-12-05,"['weeknight', 'time-to-make', 'course', 'prepa...","[828.2, 87.0, 22.0, 20.0, 93.0, 112.0, 4.0]",8,"['in a large pan , heat oil and brown roast on...",this is a recipe from the frugal gourmet cooki...,"['boneless beef chuck roast', 'olive oil', 'ta...",10,5947,32160


Finalize dataset by keeping the new IDs and removing the old ones.

In [13]:
data_finalized = data_reserialized.drop(['user_id', 'recipe_id'], axis= 1)
data_finalized = data_finalized.rename(columns={"new_user_id": "user_id", "new_recipe_id": "recipe_id"})


Finalized dataset

In [14]:
data_finalized.sort_values(by=["recipe_id"])

Unnamed: 0,date,rating,review,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,user_id,recipe_id
110594,2009-01-15,4,very good!! the directions were alittle weird ...,low fat berry blue frozen dessert,1485,1533,1999-08-09,"['weeknight', 'time-to-make', 'course', 'prepa...","[170.9, 3.0, 120.0, 1.0, 6.0, 6.0, 12.0]",13,"['toss 2 cups berries with sugar', 'let stand ...","this is yummy and low-fat, it always turns out...","['blueberries', 'granulated sugar', 'vanilla y...",4,84409,0
110595,2014-07-22,4,This does taste great! I think the recipe need...,low fat berry blue frozen dessert,1485,1533,1999-08-09,"['weeknight', 'time-to-make', 'course', 'prepa...","[170.9, 3.0, 120.0, 1.0, 6.0, 6.0, 12.0]",13,"['toss 2 cups berries with sugar', 'let stand ...","this is yummy and low-fat, it always turns out...","['blueberries', 'granulated sugar', 'vanilla y...",4,158411,0
110596,2014-08-14,4,Tasty and refreshing! I love the creamy flavor...,low fat berry blue frozen dessert,1485,1533,1999-08-09,"['weeknight', 'time-to-make', 'course', 'prepa...","[170.9, 3.0, 120.0, 1.0, 6.0, 6.0, 12.0]",13,"['toss 2 cups berries with sugar', 'let stand ...","this is yummy and low-fat, it always turns out...","['blueberries', 'granulated sugar', 'vanilla y...",4,126961,0
110593,2008-02-13,5,"Yummy, yummy, yummy! I am a big fan of fruit a...",low fat berry blue frozen dessert,1485,1533,1999-08-09,"['weeknight', 'time-to-make', 'course', 'prepa...","[170.9, 3.0, 120.0, 1.0, 6.0, 6.0, 12.0]",13,"['toss 2 cups berries with sugar', 'let stand ...","this is yummy and low-fat, it always turns out...","['blueberries', 'granulated sugar', 'vanilla y...",4,57566,0
634159,2007-07-20,5,My parents went like wow when they dranked it! :),best lemonade,35,1566,1999-09-05,"['60-minutes-or-less', 'time-to-make', 'course...","[311.1, 0.0, 308.0, 0.0, 0.0, 0.0, 27.0]",8,"['into a 1 quart jar with tight fitting lid , ...",this is from one of my first good house keepi...,"['sugar', 'lemons, rind of', 'fresh water', 'f...",6,53641,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524809,2018-10-10,5,I was a little surprised to see the previous r...,sheet pan turkey caprese meatballs with rosema...,50,2001112113,2018-08-31,"['60-minutes-or-less', 'time-to-make', 'course...","[541.6, 44.0, 14.0, 43.0, 74.0, 45.0, 11.0]",14,"['preheat oven to 400 degrees f', 'line a baki...",description: try these turkey caprese meatball...,"['ground turkey', 'egg', 'italian seasoning', ...",15,214596,139681
771231,2018-09-11,0,What is cauliflower rice ? Is it a califlower ...,sheet pan shrimp scampi,30,2001112113,2018-08-31,"['30-minutes-or-less', 'time-to-make', 'course...","[728.9, 43.0, 2.0, 69.0, 78.0, 64.0, 25.0]",14,"['preheat oven to 450 degrees f', 'line a baki...","a super quick, yet super delicious sheet pan s...","['unsalted butter', 'olive oil', 'salt', 'crus...",12,207893,139682
771232,2018-10-22,0,What do you do with the olive oil?,sheet pan shrimp scampi,30,2001112113,2018-08-31,"['30-minutes-or-less', 'time-to-make', 'course...","[728.9, 43.0, 2.0, 69.0, 78.0, 64.0, 25.0]",14,"['preheat oven to 450 degrees f', 'line a baki...","a super quick, yet super delicious sheet pan s...","['unsalted butter', 'olive oil', 'salt', 'crus...",12,207339,139682
119482,2018-12-03,0,"I thought this came out pretty good, nice fres...",baked shrimp and orzo with chickpeas lemon a...,15,2002285039,2018-10-02,"['15-minutes-or-less', 'time-to-make', 'main-i...","[735.9, 39.0, 22.0, 71.0, 75.0, 40.0, 27.0]",18,"['preheat oven to 450 degrees', 'dry shrimp wi...",shrimp and orzo make for a simple and flavorfu...,"['jumbo shrimp', 'salt & freshly ground black ...",15,213239,139683


Save as Pickle file

In [15]:
data_finalized.to_pickle("../data/processed_data.pkl")
print("Pickle file saved successfully")

Pickle file saved successfully


A data process for the random generation of "Ingredients I might want to have" functionality used in a frontend button.


Generate a list of top 1000 popular ingredients in json file among 6714 ingredients from a total of 39774 recipes. 

Most popular ingredient is on the top.

Original dataset: https://www.kaggle.com/datasets/kaggle/recipe-ingredients-dataset?resource=download&select=train.json

In [16]:
import json
'''
Generate top 1000 popular ingredients in a total of 6714 ingredients 
from 39774 recipes. Most popular ingredient is on the top.
'''

with open('../data/ingredients.json') as f:
    data = json.load(f)

ingredients = {}
for item in data:
    for i in item['ingredients']:
        if i not in ingredients:
            ingredients[str(i)] = 1
        else:
            ingredients[str(i)] += 1

sorted_ingredients_in_frequency = sorted(
    ingredients, key=ingredients.get, reverse=True)
random_generate_ingredients_range = sorted_ingredients_in_frequency[:1000]

# only change generating to true if you want to regenerate the data
# data is already generated. Please do not regenerate when re-run.
generating = False
if generating:
    with open('../data/top_ingredients.json', 'w') as f:
        json.dump(random_generate_ingredients_range, f, indent=2)

The top 1000 ingredients is stored in "../data/top_ingredients.json"