In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import itertools
import ast
import pickle
from surprise import Dataset, Reader, KNNWithMeans, KNNWithZScore, SVD, SVDpp, SlopeOne, NMF, CoClustering, KNNBaseline
from surprise.model_selection import GridSearchCV, cross_validate

In [2]:
exercise = pd.read_csv("Data/Exercises.csv")

In [3]:
exercise.head()

Unnamed: 0,id,Title,Desc,Type,BodyPart,Equipment,Level,Rating
0,1,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,6.039267
1,2,Barbell roll-out,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,6.039267
2,3,Barbell Ab Rollout - On Knees,The barbell roll-out is an abdominal exercise ...,Strength,Abdominals,Barbell,Intermediate,6.884721
3,4,Decline bar press sit-up,The decline bar press sit-up is a weighted cor...,Strength,Abdominals,Barbell,Intermediate,6.884721
4,5,Bench barbell roll-out,The bench barbell roll-out is a challenging ex...,Strength,Abdominals,Barbell,Beginner,6.821085


In [4]:
exercise.describe()

Unnamed: 0,id,Rating
count,595.0,595.0
mean,298.0,6.115508
std,171.905982,0.22945
min,1.0,6.039267
25%,149.5,6.039267
50%,298.0,6.039267
75%,446.5,6.039267
max,595.0,6.893812


In [5]:
unique_type = exercise['Type'].unique()

In [6]:
print(len(unique_type))
print(unique_type)

7
['Strength' 'Plyometrics' 'Stretching' 'Powerlifting' 'Strongman' 'Cardio'
 'Olympic Weightlifting']


In [7]:
unique_bodypart = exercise['BodyPart'].unique()

In [8]:
print(len(unique_bodypart))
print(unique_bodypart)

16
['Abdominals' 'Abductors' 'Adductors' 'Biceps' 'Calves' 'Chest' 'Forearms'
 'Glutes' 'Hamstrings' 'Lats' 'Lower Back' 'Middle Back' 'Traps'
 'Quadriceps' 'Shoulders' 'Triceps']


In [9]:
unique_equipment = exercise['Equipment'].unique()

In [10]:
print(len(unique_equipment))
print(unique_equipment)

13
['Bands' 'Barbell' 'Kettlebells' 'Dumbbell' 'Other' 'Cable' 'Machine'
 'Body Only' 'Medicine Ball' nan 'Exercise Ball' 'Foam Roll'
 'E-Z Curl Bar']


In [48]:
exercise = exercise.dropna(subset=['Equipment'])

In [50]:
unique_equipment = exercise['Equipment'].unique()
print(len(unique_equipment))
print(unique_equipment)

12
['Bands' 'Barbell' 'Kettlebells' 'Dumbbell' 'Other' 'Cable' 'Machine'
 'Body Only' 'Medicine Ball' 'Exercise Ball' 'Foam Roll' 'E-Z Curl Bar']


In [11]:
unique_level = exercise['Level'].unique()

In [12]:
print(len(unique_level))
print(unique_level)

3
['Intermediate' 'Beginner' 'Expert']


In [42]:
def getExerciseWithConfiguration(exercises, user_id, user_ratings_count, colab_filter=None, type=None, body_part=None, equipment=None, level=None):
    conditions = []
    if not (type is None):
        conditions.append(f"Type == '{type}'")
    if not (body_part is None):
        conditions.append(f"BodyPart == '{body_part}'")
    if not (equipment is None):
        conditions.append(f"Equipment == '{equipment}'")
    if not (level is None):
        conditions.append(f"Level == '{level}'")

    query_string = " and ".join(conditions)

    if query_string:
        return exercises.query(query_string).sort_values(by='Rating', ascending=False)
    else:
        return exercises

In [43]:
results = getExerciseWithConfiguration(exercise, 0, 0, type='Cardio', level='Intermediate')

In [47]:
exercise[exercise.isna().any(axis=1)][:20]

Unnamed: 0,id,Title,Desc,Type,BodyPart,Equipment,Level,Rating
82,83,Decline oblique crunch,The decline oblique crunch is a popular bodywe...,Strength,Abdominals,,Intermediate,6.039267
268,269,Glute ham raise-,The glute ham raise is an exercise targeting t...,Strength,Hamstrings,,Beginner,6.039267
269,270,Lying hamstring stretch with band,The lying hamstring stretch with band is a sim...,Stretching,Hamstrings,,Beginner,6.039267
270,271,Alternating lunge jump,The alternating lunge jump is an explosive bod...,Stretching,Hamstrings,,Beginner,6.039267
466,467,Dumbbell lateral hop to sprint,The dumbbell lateral hop to sprint is a multi-...,Plyometrics,Quadriceps,,Intermediate,6.039267
467,468,Smith machine lunge sprint,The Smith machine lunge sprint is a lower-body...,Strength,Quadriceps,,Intermediate,6.039267


In [44]:
print(len(results))
results.head()

14


Unnamed: 0,id,Title,Desc,Type,BodyPart,Equipment,Level,Rating
333,334,Rower,The rower is a popular gym machine that is ser...,Cardio,Middle Back,Machine,Intermediate,6.039267
414,415,Stair climber,The stair climber is a popular cardio machine ...,Cardio,Quadriceps,Machine,Intermediate,6.039267
416,417,Elliptical trainer,The elliptical trainer is a popular machine fo...,Cardio,Quadriceps,Machine,Intermediate,6.039267
418,419,Stairmaster,The stair climber is a popular cardio machine ...,Cardio,Quadriceps,Machine,Intermediate,6.039267
421,422,Treadmill running,Treadmill running is a popular cardiovascular ...,Cardio,Quadriceps,Machine,Intermediate,6.039267


In [11]:
results = getRecipesWithConfiguration(calories=600, protein="high", fat="mid", tags=['breakfast', '15-minutes-or-less'])
print(len(results))
results.head()

136


Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV),bayesian_avg
86488,french toast breakfast sandwich with canadian ...,86489,15,"['ham', '15-minutes-or-less', 'time-to-make', ...",7,['in a nonstick skillet scramble 2 eggs in 1 t...,"ham, cheese, and eggs on french toast. i am qu...","['eggs', 'milk', 'butter', 'bread', 'deli ham'...",8,596.4,53.0,12.0,52.0,75.0,89.0,10.0,4.832253
215835,tortilla omelet wrap,215836,10,"['15-minutes-or-less', 'time-to-make', 'course...",10,"['whip eggs , half and half , salt and pepper ...",another one for the omelet lovers among us!,"['eggs', 'half-and-half', 'salt and pepper', '...",10,706.6,64.0,13.0,35.0,80.0,96.0,13.0,4.73834
79068,egg in a hole with smoked salmon,79069,10,"['15-minutes-or-less', 'time-to-make', 'course...",5,"['using a cookie cutter or shot glass , cut a ...","i love making ""egg in a hole"". this is made ev...","['country bread', 'unsalted butter', 'eggs', '...",8,488.4,39.0,10.0,42.0,51.0,64.0,13.0,4.73834
23761,biscuits and sage sausage gravy,23762,15,"['15-minutes-or-less', 'time-to-make', 'course...",18,"['fry sausage in large skillet', 'crumble saus...",warning:you will want to lick the plate clean ...,"['sausage', 'whole milk', 'heavy cream', 'flou...",7,637.6,76.0,37.0,46.0,43.0,107.0,8.0,4.73834
60835,cranberry ricotta pancakes,60836,10,"['15-minutes-or-less', 'time-to-make', 'course...",6,['place the egg whites in a small bowl and set...,mmmmmmmm.....pancakes.,"['ricotta cheese', 'dried cranberries', 'flour...",9,674.6,41.0,58.0,18.0,65.0,73.0,24.0,4.722949


In [12]:
user_interactions.head()

Unnamed: 0,user_id,id,rating
0,1,225878,4
1,2,225878,5
2,3,71600,4
3,4,14907,5
4,5,14907,5


In [13]:
user_interactions.describe()

Unnamed: 0,user_id,id,rating
count,966111.0,966111.0,966111.0
mean,9391.486762,114691.657311,4.529013
std,13530.586557,66961.314904,1.058641
min,1.0,1.0,0.0
25%,610.0,58218.0,4.0
50%,2908.0,114819.0,5.0
75%,12233.0,171831.0,5.0
max,60314.0,231635.0,5.0


In [None]:
from flask_sqlalchemy import SQLAlchemy
database_interactions = pd.read_sql

In [14]:
existing_interactions = pd.read_csv("Data/Interactions.csv")

In [15]:
data = Dataset.load_from_df(existing_interactions[["user_id", "id", "rating"]], Reader(rating_scale=(0, 5)))

In [16]:
existing_trainset = data.build_full_trainset()

In [17]:
interactions_reader = Reader(line_format="user item rating", sep=',', rating_scale=(0, 5), skip_lines=1)
interactions_data = Dataset.load_from_file("Data/Interactions.csv", interactions_reader)

In [18]:
interactions_trainset = interactions_data.build_full_trainset()

In [33]:
svd_sim_options = {
    "n_factors": [20, 50, 100, 200, 500],
    "n_epochs": [10, 20, 40, 100],
    "biased": [True, False],
    "lr_all": [0.0005, 0.005, 0.05, 0.5],
    "reg_all": [0.002, 0.02, 0.2]
}

In [35]:
svd_gs = GridSearchCV(SVD, svd_sim_options, measures=["rmse", "mae"], cv=5, n_jobs=-1, joblib_verbose=1)

In [36]:
svd_gs.fit(interactions_data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 418 tasks      | elapsed: 11.1min
[Parallel(n_jobs=-1)]: Done 768 tasks      | elapsed: 21.9min
[Parallel(n_jobs=-1)]: Done 1218 tasks      | elapsed: 42.0min
[Parallel(n_jobs=-1)]: Done 1768 tasks      | elapsed: 76.4min
[Parallel(n_jobs=-1)]: Done 2400 out of 2400 | elapsed: 191.8min finished


In [38]:
print(svd_gs.best_score)

{'rmse': 1.0088828446129101, 'mae': 0.47107160727416597}


In [39]:
print(svd_gs.best_params)

{'rmse': {'n_factors': 20, 'n_epochs': 20, 'biased': True, 'lr_all': 0.005, 'reg_all': 0.2}, 'mae': {'n_factors': 500, 'n_epochs': 20, 'biased': True, 'lr_all': 0.5, 'reg_all': 0.002}}


In [61]:
cross_validate(CoClustering(), interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm CoClustering on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.1277  1.1262  1.1288  1.1315  1.1286  1.1286  0.0017  
MAE (testset)     0.6320  0.6306  0.6296  0.6328  0.6322  0.6314  0.0012  
Fit time          28.20   27.80   27.87   28.21   28.70   28.16   0.32    
Test time         2.08    1.98    1.98    2.07    0.77    1.78    0.51    


{'test_rmse': array([1.12772181, 1.12616034, 1.12875994, 1.13152073, 1.12862757]),
 'test_mae': array([0.63196315, 0.63060332, 0.62961786, 0.63284676, 0.63216222]),
 'fit_time': (28.19971752166748,
  27.797827005386353,
  27.869712352752686,
  28.20687246322632,
  28.701749324798584),
 'test_time': (2.0848894119262695,
  1.979933500289917,
  1.9849200248718262,
  2.071107864379883,
  0.770427942276001)}

In [58]:
cross_validate(NMF(), interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.2128  1.2100  1.2060  1.2079  1.2089  1.2091  0.0023  
MAE (testset)     0.8190  0.8198  0.8213  0.8177  0.8195  0.8195  0.0012  
Fit time          31.07   31.54   31.69   30.76   31.28   31.27   0.33    
Test time         1.81    1.90    1.74    0.73    0.71    1.38    0.54    


{'test_rmse': array([1.21278714, 1.21002853, 1.20597892, 1.20788336, 1.20886785]),
 'test_mae': array([0.81901635, 0.81980505, 0.82130037, 0.81768266, 0.81952913]),
 'fit_time': (31.069397926330566,
  31.544697523117065,
  31.691863536834717,
  30.758241176605225,
  31.28497004508972),
 'test_time': (1.80690336227417,
  1.9009323120117188,
  1.7432210445404053,
  0.7293896675109863,
  0.7133259773254395)}

In [40]:
svdpp = SVDpp()

In [42]:
svdpp.fit(interactions_trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x29613c9bf50>

In [45]:
cross_validate(svdpp, interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0242  1.0215  1.0203  1.0170  1.0239  1.0214  0.0026  
MAE (testset)     0.6017  0.5998  0.6007  0.5978  0.5997  0.5999  0.0013  
Fit time          538.21  531.07  533.73  533.60  542.21  535.76  3.96    
Test time         63.88   62.46   63.48   62.01   61.01   62.57   1.03    


{'test_rmse': array([1.02420445, 1.02152263, 1.02027472, 1.01698547, 1.02386378]),
 'test_mae': array([0.60167718, 0.59975385, 0.60069562, 0.59784337, 0.5996951 ]),
 'fit_time': (538.2060167789459,
  531.0698781013489,
  533.7280797958374,
  533.6034462451935,
  542.2100763320923),
 'test_time': (63.88419532775879,
  62.46167182922363,
  63.479918479919434,
  62.01494216918945,
  61.00875973701477)}

In [48]:
svd = SVD(n_factors=20, n_epochs=20, biased=True, lr_all=0.005, reg_all=0.2)

In [49]:
cross_validate(svd, interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0111  1.0100  1.0094  1.0015  1.0139  1.0092  0.0041  
MAE (testset)     0.6042  0.6051  0.6038  0.6016  0.6052  0.6040  0.0013  
Fit time          6.19    6.67    6.97    6.74    6.71    6.66    0.26    
Test time         1.49    0.86    1.57    0.86    1.53    1.26    0.33    


{'test_rmse': array([1.01110033, 1.00997953, 1.00937236, 1.00152769, 1.01391235]),
 'test_mae': array([0.60421846, 0.60513131, 0.60377468, 0.60158843, 0.60522776]),
 'fit_time': (6.1899402141571045,
  6.674145698547363,
  6.972915887832642,
  6.736252546310425,
  6.710755109786987),
 'test_time': (1.4884147644042969,
  0.8562014102935791,
  1.565445899963379,
  0.8630378246307373,
  1.5334908962249756)}

In [23]:
cross_validate(SVD(n_factors=10, n_epochs=1, biased=True, lr_all=0.005, reg_all=0.2), interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0313  1.0316  1.0273  1.0359  1.0326  1.0317  0.0027  
MAE (testset)     0.6367  0.6366  0.6354  0.6388  0.6376  0.6370  0.0011  
Fit time          0.61    0.83    0.84    0.84    0.82    0.79    0.09    
Test time         1.46    0.78    1.52    0.78    1.50    1.21    0.35    


{'test_rmse': array([1.03126502, 1.03157465, 1.02731372, 1.03587843, 1.03257837]),
 'test_mae': array([0.63669705, 0.6365708 , 0.63542586, 0.63883009, 0.63759461]),
 'fit_time': (0.6120710372924805,
  0.8256895542144775,
  0.8439140319824219,
  0.8384573459625244,
  0.8244237899780273),
 'test_time': (1.4608609676361084,
  0.7804417610168457,
  1.5245096683502197,
  0.7763094902038574,
  1.4980242252349854)}

In [25]:
cross_validate(SVD(n_factors=1, n_epochs=1, biased=True, lr_all=0.005, reg_all=0.2), interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0284  1.0302  1.0320  1.0304  1.0362  1.0314  0.0026  
MAE (testset)     0.6362  0.6364  0.6375  0.6364  0.6387  0.6370  0.0009  
Fit time          0.55    0.75    0.74    0.74    0.77    0.71    0.08    
Test time         1.48    1.50    0.76    1.48    1.45    1.34    0.29    


{'test_rmse': array([1.02836844, 1.03018195, 1.0319786 , 1.03043578, 1.03621062]),
 'test_mae': array([0.63623913, 0.63636127, 0.63746073, 0.63637599, 0.63866931]),
 'fit_time': (0.550480842590332,
  0.7548167705535889,
  0.742328405380249,
  0.7430908679962158,
  0.7743268013000488),
 'test_time': (1.4789493083953857,
  1.5026335716247559,
  0.7636160850524902,
  1.4844772815704346,
  1.4514143466949463)}