In [34]:
import pandas as pd
from ast import literal_eval

In [35]:
recipe_df = pd.read_csv('./data/RAW_recipes.csv')
inter_df = pd.read_csv('./data/RAW_interactions.csv')

In [36]:
''' 
Filtering
'''

# Drop NA and dups
recipe_df.dropna(inplace=True)
recipe_df.drop_duplicates(inplace=True)
inter_df.dropna(inplace=True)
inter_df.drop_duplicates(inplace=True)
inter_df.drop(columns=['review'], inplace=True)


# get users/recipes with n or more reviews
n = 10
recipe_rating_count = inter_df[['recipe_id', 'rating']].groupby(by=['recipe_id']).count()
recipe_more_than_n_rating = recipe_rating_count[recipe_rating_count['rating'] >= n]

user_rating_count = inter_df[['user_id', 'rating']].groupby(by=['user_id']).count()
user_more_than_n_rating = user_rating_count[user_rating_count['rating'] >= n]

# Filter out recipes and users with less than n ratings
inter_df = inter_df[inter_df['recipe_id'].isin(list(recipe_more_than_n_rating.index.to_numpy()))]
inter_df = inter_df[inter_df['user_id'].isin(list(user_more_than_n_rating.index.to_numpy()))]
recipe_df = recipe_df[recipe_df['id'].isin(list(recipe_more_than_n_rating.index.to_numpy()))]
recipe_df = recipe_df[recipe_df['id'].isin(list(inter_df['recipe_id'].unique()))]

In [37]:
'''
Mapping User_id, Recipe_id
'''

user_ids = inter_df['user_id'].unique()
user_id_map = {}
for i, uid in enumerate(user_ids):
    user_id_map[uid] = i

recipe_ids = inter_df['recipe_id'].unique()
recipe_id_map = {}
for i, rec_id in enumerate(recipe_ids):
    recipe_id_map[rec_id] = i

inter_df['user_id'] = inter_df['user_id'].apply(lambda x: user_id_map[x])
inter_df['recipe_id'] = inter_df['recipe_id'].apply(lambda x: recipe_id_map[x])
recipe_df['id'] = recipe_df['id'].apply(lambda x: recipe_id_map[x])


In [38]:
'''
Get unique ingredients, create ingrient id mapping/dataframe
update recipe_df with ingredient tokens
'''

unique_ingredients = set()
ingredients_list = recipe_df['ingredients']
for ingredients in ingredients_list:
    ingredients = literal_eval(ingredients)
    for ingredient in ingredients:
        unique_ingredients.add(ingredient)


unique_ingredients_map = {}
for i, ingredient in enumerate(unique_ingredients):
    unique_ingredients_map[ingredient] = i


def map_ingredients(ingredient_list):
    ingredient_list = literal_eval(ingredient_list)
    for i in range(len(ingredient_list)):
        ingredient_list[i] = unique_ingredients_map[ingredient_list[i]] 

    return ingredient_list


recipe_df['ingredients'] = recipe_df['ingredients'].apply(map_ingredients)


recipe_name_map = {}
for i, row in recipe_df[['name', 'id']].iterrows():
    recipe_name_map[row['id']] = row['name']
    
recipe_name_map = dict(sorted(recipe_name_map.items(), key=lambda item: item[0]))
ingredients_df = pd.DataFrame(index=recipe_name_map.keys(), columns=['recipe name'], data=recipe_name_map.values())

In [39]:
'''
Get unique tags, create tag -> id mapping
update recipe_df

Add country columns
'''

# add region tags
with open('region_tags.txt', 'r') as f:
    data = f.read().rstrip()
    region_tags = literal_eval(data)


def extract_country(tags):
    tags = literal_eval(tags)
    for tag in tags:
        if tag in region_tags:
            return tag
    return ''


recipe_df['region'] = recipe_df['tags'].apply(extract_country)


unique_tags = set()
tags_list = recipe_df['tags']

for tags in tags_list:
    tags = literal_eval(tags)
    for tag in tags:
        unique_tags.add(tag)

unique_tags_id_map = {}
for i, tag in enumerate(sorted(list(unique_tags))):
    unique_tags_id_map[tag] = i

tags_df = pd.DataFrame(index=unique_tags_id_map.values(), columns=['tag description'], data=unique_tags_id_map.keys())

def map_tags(tags):
    tags = literal_eval(tags)
    for i in range(len(tags)):
        tags[i] = unique_tags_id_map[tags[i]]
    
    return tags


recipe_df['tags'] = recipe_df['tags'].apply(map_tags)

In [41]:
print(ingredients_df, tags_df)

                                             recipe name
0                                    potato crab chowder
1                            tom s vanilla frozen yogurt
2                      st  louis style gooey butter cake
3                              albers sweet corn muffins
4                                  jungle gems snack mix
...                                                  ...
21299                   easy microwave hot fudge topping
21300                               one loaf white bread
21301                      salmon cakes with lemon aioli
21302  savory roasted pepper bread for the bread machine
21303                          roast potatoes with herbs

[20841 rows x 1 columns]          tag description
0                       
1          1-day-or-more
2     15-minutes-or-less
3        3-steps-or-less
4     30-minutes-or-less
..                   ...
478                wings
479               winter
480  yams-sweet-potatoes
481                yeast
482            

In [42]:
recipe_df

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,region
12,better then bush s baked beans,19081,2970,85627,2003-07-26,"[470, 447, 112, 253, 119, 343, 296, 286, 394, ...","[462.4, 28.0, 214.0, 69.0, 14.0, 29.0, 23.0]",9,['in a very large sauce pan cover the beans an...,i'd have to say that this is a labor of love d...,"[2564, 3587, 1953, 211, 4186, 3490, 3971, 4320...",13,north-american
15,chicken lickin good pork chops,11229,500,14664,2003-06-06,"[470, 447, 112, 253, 343, 252, 334, 116, 130, ...","[105.7, 8.0, 0.0, 26.0, 5.0, 4.0, 3.0]",5,"['dredge pork chops in mixture of flour , salt...",here's and old standby i enjoy from time to ti...,"[5439, 3810, 3201, 2975, 3971, 6228, 5447]",7,
16,chile rellenos,19851,45,52268,2002-10-14,"[7, 447, 112, 253, 119, 343, 286, 6, 12, 394, ...","[94.0, 10.0, 0.0, 11.0, 11.0, 21.0, 0.0]",9,"['drain green chiles', 'sprinkle cornstarch on...",a favorite from a local restaurant no longer i...,"[1303, 2055, 3748, 4186, 6228]",5,north-american
17,chinese candy,19611,15,35268,2002-03-29,"[2, 447, 112, 343, 296, 6, 128, 140, 222, 66, ...","[232.7, 21.0, 77.0, 4.0, 6.0, 38.0, 8.0]",4,['melt butterscotch chips in heavy saucepan ov...,"a little different, and oh so good. i include ...","[4717, 316, 3316]",3,
33,grilled venison burgers,9072,26,68357,2003-02-15,"[4, 447, 112, 253, 343, 249, 130, 477, 126, 260]","[190.9, 10.0, 10.0, 10.0, 45.0, 15.0, 2.0]",13,"['in bowl , mix dry ingredients', 'add venison...",delicious venison burgers with that,"[5385, 301, 5558, 4660, 5431, 3490, 3201, 2880...",10,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
231542,zucchini pineapple loaf cake,5169,70,89831,2003-09-15,"[470, 447, 112, 253, 343, 128, 462, 62, 130, 4...","[3323.6, 237.0, 1118.0, 129.0, 91.0, 109.0, 15...",11,"['set oven to 350 degrees', 'grease two 9 x 5-...",i got this recipe from a local paper a long ti...,"[698, 6228, 320, 936, 3379, 5013, 3810, 1711, ...",14,
231566,zucotte,10559,65,65056,2004-08-03,"[447, 112, 253, 343, 296, 246, 394, 462, 140, ...","[145.7, 13.0, 10.0, 2.0, 3.0, 27.0, 5.0]",10,"['in a large , heavy saucpan , melt 1 tbs of t...",a garlicky braised winter squash dish that cou...,"[1234, 4158, 4962, 4884, 3680, 3115]",6,
231587,zuppa di pesce cioppino or fish stew,15851,60,58104,2005-04-12,"[7, 447, 112, 253, 119, 343, 296, 161, 252, 40...","[160.3, 3.0, 17.0, 31.0, 37.0, 2.0, 4.0]",9,['in a pot add 5 cups water and shrimp shells ...,whatever you may call it this italian named fi...,"[4350, 4076, 4858, 4001, 1260, 1506, 5076, 158...",29,european
231600,zuppa toscana soup olive garden clone,11860,60,346694,2007-01-30,"[7, 447, 112, 343, 296, 246, 403, 140, 35, 130...","[432.8, 32.0, 29.0, 39.0, 42.0, 39.0, 15.0]",10,['bring chicken stock and water to a light boi...,i have tried quite a few different recipes tha...,"[1783, 1355, 5857, 6197, 4953, 4858, 4350, 301...",14,


In [43]:
inter_df

Unnamed: 0,user_id,recipe_id,date,rating
31,0,0,2006-11-11,5
43,1,0,2006-02-13,5
44,2,0,2006-03-21,5
46,3,0,2008-02-01,4
47,4,0,2008-03-07,4
...,...,...,...,...
1132309,2009,21299,2010-05-23,5
1132354,11694,21299,2010-07-26,5
1132355,193,21299,2010-10-01,5
1132356,1329,21299,2013-03-18,5
