In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import itertools
import ast
import pickle
from surprise import Dataset, Reader, KNNWithMeans, KNNWithZScore
from surprise.model_selection import GridSearchCV

In [164]:
recipes = pd.read_csv("Food/RAW_recipes.csv")
user_interactions = pd.read_csv("Food/RAW_interactions.csv")


In [165]:
recipes[['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']] = recipes.nutrition.str.strip("[]").str.split(",", expand=True).astype('float')
recipes.drop('nutrition', axis=1, inplace=True)
recipes.drop('submitted', axis=1, inplace=True)
recipes.drop('contributor_id', axis=1, inplace=True)

In [166]:
recipes.head()

Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
0,arriba baked winter squash mexican style,137739,55,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,a bit different breakfast pizza,31490,30,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,all in the kitchen chili,112140,130,"['time-to-make', 'course', 'preparation', 'mai...",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,alouette potatoes,59389,45,"['60-minutes-or-less', 'time-to-make', 'course...",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,amish tomato ketchup for canning,44061,190,"['weeknight', 'time-to-make', 'course', 'main-...",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0


In [167]:
user_interactions.drop('date', axis=1, inplace=True)
user_interactions.drop('review', axis=1, inplace=True)
user_interactions = user_interactions.rename(columns={'recipe_id': 'id'})

In [168]:
user_interactions.head()

Unnamed: 0,user_id,id,rating
0,38094,40893,4
1,1293707,40893,5
2,8937,44394,4
3,126440,85009,5
4,57222,85009,5


In [169]:
ratings_count = user_interactions.groupby('user_id').size()
users_with_one_rating = ratings_count[ratings_count == 1].index.tolist()

In [170]:
for i in range(1, 11):
    print(f"Number of users with {i} review: {len(ratings_count[ratings_count == i].index.tolist())}")

Number of users with 1 review: 166256
Number of users with 2 review: 22738
Number of users with 3 review: 9346
Number of users with 4 review: 5144
Number of users with 5 review: 3421
Number of users with 6 review: 2566
Number of users with 7 review: 1937
Number of users with 8 review: 1461
Number of users with 9 review: 1215
Number of users with 10 review: 1040


In [171]:
user_interactions = user_interactions[~user_interactions['user_id'].isin(users_with_one_rating)]

In [172]:
user_interactions.describe()

Unnamed: 0,user_id,id,rating
count,966111.0,966111.0,966111.0
mean,32612890.0,158358.502113,4.529013
std,248137200.0,127955.619873,1.058641
min,1533.0,38.0,0.0
25%,121690.0,53908.0,4.0
50%,269480.0,118761.0,5.0
75%,558429.0,239179.0,5.0
max,2002368000.0,537716.0,5.0


In [173]:
user_id_map = {}
recipe_id_map = {}
user_norm = 1
for user_id in user_interactions['user_id'].unique():
    user_id_map[user_id] = user_norm
    user_norm += 1
recipe_norm = 1
for recipe_id in recipes['id'].unique():
    recipe_id_map[recipe_id] = recipe_norm
    recipe_norm += 1

In [174]:
user_interactions.head()

Unnamed: 0,user_id,id,rating
0,38094,40893,4
1,1293707,40893,5
2,8937,44394,4
3,126440,85009,5
4,57222,85009,5


In [175]:
user_interactions['user_id'] = user_interactions['user_id'].map(user_id_map)
user_interactions['id'] = user_interactions['id'].map(recipe_id_map)
recipes['id'] = recipes['id'].map(recipe_id_map)

In [176]:
recipes.head()

Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
0,arriba baked winter squash mexican style,1,55,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,a bit different breakfast pizza,2,30,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,all in the kitchen chili,3,130,"['time-to-make', 'course', 'preparation', 'mai...",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,alouette potatoes,4,45,"['60-minutes-or-less', 'time-to-make', 'course...",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,amish tomato ketchup for canning,5,190,"['weeknight', 'time-to-make', 'course', 'main-...",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0


In [177]:
user_interactions.head()

Unnamed: 0,user_id,id,rating
0,1,225878,4
1,2,225878,5
2,3,71600,4
3,4,14907,5
4,5,14907,5


In [178]:
user_interactions.to_csv("Data/Interactions.csv", index=False)
recipes.to_csv("Data/Recipes.csv", index=False)

In [179]:
bayesian_interactions = user_interactions.groupby('id').agg({'rating': ['mean', 'count']})

In [180]:
bayesian_interactions.columns = bayesian_interactions.columns.droplevel(level=0)

In [181]:
bayesian_interactions = bayesian_interactions.rename(columns={'mean': 'recipe_mean', 'count': 'n_ratings'})

In [182]:
C = 10
global_mean = user_interactions['rating'].mean()

In [183]:
def bayesian_average(rating, n_ratings):
    return ((C * global_mean + rating * n_ratings) / (C + n_ratings))

In [185]:
bayes = user_interactions.merge(bayesian_interactions, on='id')

In [186]:
bayes['baeysian_avg'] = bayes.apply(lambda x: bayesian_average(x['rating'], x['n_ratings']), axis=1)

In [187]:
bayes.head()

Unnamed: 0,user_id,id,rating,recipe_mean,n_ratings,baeysian_avg
0,1,225878,4,4.5,2,4.440844
1,2,225878,5,4.5,2,4.607511
2,3,71600,4,4.0,1,4.480921
3,4,14907,5,5.0,2,4.607511
4,5,14907,5,5.0,2,4.607511


In [188]:
bayes.drop('recipe_mean', axis=1, inplace=True)
bayes.drop('n_ratings', axis=1, inplace=True)
bayes.drop('rating', axis=1, inplace=True)

In [189]:
bayes.head()

Unnamed: 0,user_id,id,baeysian_avg
0,1,225878,4.440844
1,2,225878,4.607511
2,3,71600,4.480921
3,4,14907,4.607511
4,5,14907,4.607511


In [190]:
bayes.to_csv("Data/Bayesian_Interactions.csv", index=False)

In [192]:
recipe_stats = user_interactions.groupby('id').agg({'rating': ['mean', 'count']})
recipe_stats.columns = ["_".join(col) for col in recipe_stats.columns.to_flat_index()]
recipe_stats = recipe_stats.rename(columns={'rating_mean': 'recipe_mean', 'rating_count': 'n_ratings'})

C = 10
global_mean = user_interactions['rating'].mean()

recipe_stats['bayesian_avg'] = recipe_stats.apply(lambda row: bayesian_average(row['recipe_mean'], row['n_ratings']), axis=1)

In [193]:
recipe_stats[:20]

Unnamed: 0_level_0,recipe_mean,n_ratings,bayesian_avg
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,5.0,3,4.637702
2,3.0,3,4.176164
4,4.0,1,4.480921
5,5.0,1,4.57183
6,5.0,2,4.607511
7,2.0,1,4.299102
8,5.0,1,4.57183
9,5.0,2,4.607511
10,4.4,5,4.486008
11,3.0,1,4.390012


In [194]:
recipe_stats.drop('recipe_mean', axis=1, inplace=True)
recipe_stats.drop('n_ratings', axis=1, inplace=True)

In [195]:
recipe_stats.head()

Unnamed: 0_level_0,bayesian_avg
id,Unnamed: 1_level_1
1,4.637702
2,4.176164
4,4.480921
5,4.57183
6,4.607511


In [196]:
recipe_stats.to_csv("Data/Recipe_Bayesian_Ratings.csv")

In [25]:
excercise = pd.read_csv("Gym/GymDataset.csv")

In [26]:
print(len(excercise))
excercise[:10]

2918


Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,RatingDesc
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,0.0,
1,1,Banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate,,
2,2,FYR Banded Plank Jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate,,
3,3,Banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate,,
4,4,Crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate,,
5,5,Decline band press sit-up,The decline band press sit-up is a weighted co...,Strength,Abdominals,Bands,Intermediate,,
6,6,FYR2 Banded Frog Pump,,Strength,Abdominals,Bands,Intermediate,,
7,7,Band low-to-high twist,The band low-to-high twist is a core exercise ...,Strength,Abdominals,Bands,Intermediate,,
8,8,Barbell roll-out,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,8.9,Average
9,9,Barbell Ab Rollout - On Knees,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,8.9,Average


In [27]:
print(excercise['Type'].unique())

['Strength' 'Plyometrics' 'Cardio' 'Stretching' 'Powerlifting' 'Strongman'
 'Olympic Weightlifting']


In [28]:
excercise = excercise.dropna(subset=['Desc'])

In [29]:
print(len(excercise))
excercise[:10]

1368


Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,RatingDesc
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,0.0,
1,1,Banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate,,
2,2,FYR Banded Plank Jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate,,
3,3,Banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate,,
4,4,Crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate,,
5,5,Decline band press sit-up,The decline band press sit-up is a weighted co...,Strength,Abdominals,Bands,Intermediate,,
7,7,Band low-to-high twist,The band low-to-high twist is a core exercise ...,Strength,Abdominals,Bands,Intermediate,,
8,8,Barbell roll-out,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,8.9,Average
9,9,Barbell Ab Rollout - On Knees,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,8.9,Average
10,10,Decline bar press sit-up,The decline bar press sit-up is a weighted cor...,Strength,Abdominals,Barbell,Intermediate,8.5,Average


In [30]:
excercise = excercise.dropna(subset=['Rating'])

In [31]:
print(len(excercise))
excercise[:10]

595


Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,RatingDesc
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,0.0,
8,8,Barbell roll-out,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,8.9,Average
9,9,Barbell Ab Rollout - On Knees,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,8.9,Average
10,10,Decline bar press sit-up,The decline bar press sit-up is a weighted cor...,Strength,Abdominals,Barbell,Intermediate,8.5,Average
11,11,Bench barbell roll-out,The bench barbell roll-out is a challenging ex...,Strength,Abdominals,Barbell,Beginner,8.3,Average
13,13,Seated bar twist,The seated bar twist is a core exercise meant ...,Strength,Abdominals,Barbell,Intermediate,4.7,Average
21,21,Kettlebell Windmill,The single-kettlebell windmill is a dynamic ke...,Strength,Abdominals,Kettlebells,Intermediate,7.7,Average
23,23,Kettlebell 3-point leg extension,The kettlebell 3-point leg extension is a dyna...,Strength,Abdominals,Kettlebells,Intermediate,0.0,
39,39,Dumbbell V-Sit Cross Jab,The dumbbell V-sit cross jab is a hybrid movem...,Strength,Abdominals,Dumbbell,Intermediate,9.3,Average
40,40,Dumbbell spell caster,The dumbbell spell caster is an exercise that ...,Strength,Abdominals,Dumbbell,Beginner,9.3,Average


In [32]:
excercise.drop('RatingDesc', axis=1, inplace=True)

In [33]:
excercise.head()

Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,0.0
8,8,Barbell roll-out,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,8.9
9,9,Barbell Ab Rollout - On Knees,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,8.9
10,10,Decline bar press sit-up,The decline bar press sit-up is a weighted cor...,Strength,Abdominals,Barbell,Intermediate,8.5
11,11,Bench barbell roll-out,The bench barbell roll-out is a challenging ex...,Strength,Abdominals,Barbell,Beginner,8.3
