In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import itertools
import ast
import pickle
from surprise import Dataset, Reader, KNNWithMeans, KNNWithZScore, SVD, SVDpp, SlopeOne, NMF, CoClustering, KNNBaseline
from surprise.model_selection import GridSearchCV, cross_validate

In [2]:
recipes = pd.read_csv("Data/Recipes.csv")
user_interactions = pd.read_csv("Data/Interactions.csv")
recipe_ratings = pd.read_csv("Data/Recipe_Bayesian_Ratings.csv")

In [3]:
recipes.head()

Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
0,arriba baked winter squash mexican style,1,55,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,a bit different breakfast pizza,2,30,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,all in the kitchen chili,3,130,"['time-to-make', 'course', 'preparation', 'mai...",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,alouette potatoes,4,45,"['60-minutes-or-less', 'time-to-make', 'course...",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,amish tomato ketchup for canning,5,190,"['weeknight', 'time-to-make', 'course', 'main-...",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0


In [4]:
recipes.describe()

Unnamed: 0,id,minutes,n_steps,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
count,231637.0,231637.0,231637.0,231637.0,231637.0,231637.0,231637.0,231637.0,231637.0,231637.0,231637.0
mean,115819.0,9398.546,9.765499,9.051153,473.942425,36.0807,84.296865,30.147485,34.68186,45.58915,15.560403
std,66867.98649,4461963.0,5.995128,3.734796,1189.711374,77.79884,800.080897,131.961589,58.47248,98.235758,81.82456
min,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,57910.0,20.0,6.0,6.0,174.4,8.0,9.0,5.0,7.0,7.0,4.0
50%,115819.0,40.0,9.0,9.0,313.4,20.0,25.0,14.0,18.0,23.0,9.0
75%,173728.0,65.0,12.0,11.0,519.7,41.0,68.0,33.0,51.0,52.0,16.0
max,231637.0,2147484000.0,145.0,43.0,434360.2,17183.0,362729.0,29338.0,6552.0,10395.0,36098.0


In [5]:
unique_tags = sorted(list(set([tag for tag_list in recipes['tags'] for tag in ast.literal_eval(tag_list)])))

In [6]:
print(len(unique_tags))
print(unique_tags)

552
['', '1-day-or-more', '15-minutes-or-less', '3-steps-or-less', '30-minutes-or-less', '4-hours-or-less', '5-ingredients-or-less', '60-minutes-or-less', 'Throw the ultimate fiesta with this sopaipillas recipe from Food.com.', 'a1-sauce', 'african', 'american', 'amish-mennonite', 'angolan', 'appetizers', 'apples', 'april-fools-day', 'argentine', 'artichoke', 'asian', 'asparagus', 'australian', 'austrian', 'avocado', 'bacon', 'baja', 'baked-beans', 'baking', 'bananas', 'bar-cookies', 'barbecue', 'bass', 'bean-soup', 'beans', 'beans-side-dishes', 'bear', 'beef', 'beef-barley-soup', 'beef-crock-pot', 'beef-kidney', 'beef-liver', 'beef-organ-meats', 'beef-ribs', 'beef-sauces', 'beef-sausage', 'beginner-cook', 'beijing', 'belgian', 'berries', 'beverages', 'birthday', 'biscotti', 'bisques-cream-soups', 'black-bean-soup', 'black-beans', 'blueberries', 'bok-choys', 'brazilian', 'bread-machine', 'bread-pudding', 'breads', 'breakfast', 'breakfast-casseroles', 'breakfast-eggs', 'breakfast-potato

In [7]:
recipes = pd.merge(recipes, recipe_ratings, how='left', left_on='id', right_on='id', suffixes=(False, False))

In [8]:
recipes.head()

Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV),bayesian_avg
0,arriba baked winter squash mexican style,1,55,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0,4.637702
1,a bit different breakfast pizza,2,30,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0,4.176164
2,all in the kitchen chili,3,130,"['time-to-make', 'course', 'preparation', 'mai...",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0,
3,alouette potatoes,4,45,"['60-minutes-or-less', 'time-to-make', 'course...",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0,4.480921
4,amish tomato ketchup for canning,5,190,"['weeknight', 'time-to-make', 'course', 'main-...",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0,4.57183


In [88]:
def get_time_tags():
    from datetime import datetime
    tags = []

    month = int(datetime.today().strftime("%m"))
    if 3 <= month <= 5:
        tags.append('spring')
    elif 6 <= month <= 8:
        tags.append('summer')
    elif 9 <= month <= 11:
        tags.append('fall')
    else:
        tags.append('winter')

    hour_of_day = int(datetime.today().strftime("%H"))
    if 5 <= hour_of_day <= 12:
        tags.append('breakfast')
    if 11 <= hour_of_day <= 4:
        tags.append('lunch')
    return tags

In [98]:
DVP_HIGH = 40.0
DVP_MED = 25.0
DVP_LOW = 10.0

def parse_pdv(dvp, multiplier):
    low = 0.0
    high = float("inf")
    match dvp:
        case "high":
            low = DVP_HIGH * multiplier
        case "med":
            high = DVP_HIGH * multiplier
            low = DVP_LOW * multiplier
        case "low":
            high = DVP_MED * multiplier
    return low, high

def getRecipesWithConfiguration(calories=None, daily=2000, fat="NULL", sat_fat="NULL", sugar="NULL", sodium="NULL", protein="NULL", carbs="NULL", tags=[]):
    high_calorie_lim = float("inf")
    low_calorie_lim = 0

    multiplier = 2000 / daily

    if calories != None:
        high_calorie_lim = max(calories+100, calories * 1.2)
        low_calorie_lim = min(calories-100, calories * 0.8)        
    
    low_fat_lim, high_fat_lim = parse_pdv(fat, multiplier)
    low_sat_fat_lim, high_sat_fat_lim = parse_pdv(sat_fat, multiplier)
    low_sugar_lim, high_sugar_lim = parse_pdv(sugar, multiplier)
    low_sodium_lim, high_sodium_lim = parse_pdv(sodium, multiplier)
    low_protein_lim, high_protein_lim = parse_pdv(protein, multiplier)
    low_carbs_lim, high_carbs_lim = parse_pdv(carbs, multiplier)

    recipes_filter = ((low_calorie_lim <= recipes['calories']) & (recipes['calories'] <= high_calorie_lim) &
                      (low_fat_lim <= recipes['total fat (PDV)']) & (recipes['total fat (PDV)'] <= high_fat_lim) &
                      (low_sat_fat_lim <= recipes['saturated fat (PDV)']) & (recipes['saturated fat (PDV)'] <= high_sat_fat_lim) &
                      (low_sugar_lim <= recipes['sugar (PDV)']) & (recipes['sugar (PDV)'] <= high_sugar_lim) &
                      (low_sodium_lim <= recipes['sodium (PDV)']) & (recipes['sodium (PDV)'] <= high_sodium_lim) &
                      (low_protein_lim <= recipes['protein (PDV)']) & (recipes['protein (PDV)'] <= high_protein_lim) &
                      (low_carbs_lim <= recipes['carbohydrates (PDV)']) & (recipes['carbohydrates (PDV)'] <= high_carbs_lim))

    tags_filter = pd.Series(True, index=recipes.index)

    if tags:
        for tag in tags:
            if type(tag) != str:
                continue
            tags_filter = tags_filter & recipes['tags'].str.contains(tag, case=False)

        recipes_filter = recipes_filter & tags_filter
    
    recipes_found = recipes[recipes_filter]

    recipes_found_sorted = recipes_found.sort_values(by='bayesian_avg', ascending=False)
    print(len(recipes_found_sorted))

    time_tags = get_time_tags()
    recipes_with_time_context = []
    for tag in time_tags:
        if tag in tags:
            continue
        recp = getRecipesWithConfiguration(calories=calories, daily=daily, fat=fat, sat_fat=sat_fat, sugar=sugar, sodium=sodium, protein=protein, carbs=carbs, tags=tags + [tag])
        if not(type(recp) == bool and recp == False):
            recipes_with_time_context.append(recp[:5])

    if recipes_with_time_context == []:
        return recipes_found_sorted
        
    recipes_with_time_context = pd.concat(recipes_with_time_context, ignore_index=True)
    recipes_found_sorted = pd.concat([recipes_with_time_context[:2], recipes_found_sorted], ignore_index=True).drop_duplicates()

    return recipes_found_sorted

In [100]:
results = getRecipesWithConfiguration(calories=1000, fat="mid", sugar="low", sodium="mid", protein="high", carbs="low", tags=['vegetarian'])
print(len(results))
results[:10]

83
2
83


Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV),bayesian_avg
0,macaroni cheese with tomato and basil,128092,20,"['30-minutes-or-less', 'time-to-make', 'course...",7,"['cook the macaroni until done and drain', 'co...",a decadent dish. good with a juicy steak.,"['macaroni', 'tasty cheese', 'swiss cheese', '...",11,1023.3,101.0,20.0,37.0,83.0,198.0,22.0,4.57183
1,byessar moroccan broad bean dip,32881,25,"['30-minutes-or-less', 'time-to-make', 'course...",14,"['split the broad bean pods along the seams , ...","did you know that for the ancient egyptians, b...","['broad bean', 'lemon rind', 'garlic cloves', ...",7,849.0,84.0,3.0,94.0,55.0,38.0,22.0,4.520723
2,easy hash brown quiche,76694,65,"['time-to-make', 'course', 'main-ingredient', ...",12,['press hash browns between paper towels to re...,this is a great recipe for those who love a gr...,"['hash browns', 'garlic powder', 'salt', 'butt...",11,1017.5,129.0,9.0,92.0,46.0,223.0,14.0,4.705633
3,baked stuffed avocado,15281,22,"['30-minutes-or-less', 'time-to-make', 'course...",10,"['preheat oven to 400f', 'fry the onion in the...",this is really tasty and so easy to make!,"['onion', 'butter', 'avocados', 'brazil nuts',...",9,1173.3,154.0,24.0,17.0,69.0,149.0,11.0,4.705633
4,pecan blue cheese spread,156903,5,"['15-minutes-or-less', 'time-to-make', 'course...",8,['place nuts in food processor and process unt...,i bought my cuisinart back in 1983. this is th...,"['pecans', 'cream cheese', 'blue cheese']",3,1039.4,159.0,6.0,37.0,46.0,273.0,3.0,4.703466
5,stuffed focaccia,202013,40,"['60-minutes-or-less', 'time-to-make', 'course...",12,"['thaw dough', 'preheat the oven to 400 degree...",found on net. plan on trying this soon. soun...,"['frozen bread dough', 'gorgonzola', 'mozzarel...",7,891.8,119.0,6.0,84.0,87.0,176.0,1.0,4.69948
6,frittata di spaghetti spaghetti omelet,88055,17,"['30-minutes-or-less', 'time-to-make', 'course...",8,"['combine the pasta , eggs and cheese , stirri...",a delicious and simple way to use up leftover ...,"['spaghetti', 'eggs', 'parmesan cheese', 'butt...",5,891.3,82.0,10.0,34.0,69.0,150.0,21.0,4.69948
7,paul s favorite spinach walnut pesto,154570,15,"['15-minutes-or-less', 'time-to-make', 'course...",6,['place all ingredients in a food processor an...,i got this recipe from my brother-in-law - - i...,"['spinach leaves', 'extra virgin olive oil', '...",7,894.5,136.0,5.0,46.0,45.0,94.0,2.0,4.686008
8,cheese stuffed portobello mushrooms with a bal...,39637,25,"['30-minutes-or-less', 'time-to-make', 'course...",7,"['wipe mushroooms and coat with olive oil', 'p...",simple but elegant! glazed with balsamic vineg...,"['portabella mushrooms', 'mozzarella cheese', ...",11,881.8,123.0,23.0,59.0,62.0,114.0,3.0,4.682785
9,classic bistro style gratin dauphinois frenc...,54687,105,"['time-to-make', 'course', 'main-ingredient', ...",7,['preheat the oven to 300f / 150c / gas mark 2...,this classic french potato gratin dish is worl...,"['potatoes', 'gruyere cheese', 'double cream',...",9,805.4,82.0,11.0,10.0,43.0,163.0,20.0,4.682253


In [11]:
results = getRecipesWithConfiguration(calories=600, protein="high", fat="mid", tags=['breakfast', '15-minutes-or-less'])
print(len(results))
results.head()

136


Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV),bayesian_avg
86488,french toast breakfast sandwich with canadian ...,86489,15,"['ham', '15-minutes-or-less', 'time-to-make', ...",7,['in a nonstick skillet scramble 2 eggs in 1 t...,"ham, cheese, and eggs on french toast. i am qu...","['eggs', 'milk', 'butter', 'bread', 'deli ham'...",8,596.4,53.0,12.0,52.0,75.0,89.0,10.0,4.832253
215835,tortilla omelet wrap,215836,10,"['15-minutes-or-less', 'time-to-make', 'course...",10,"['whip eggs , half and half , salt and pepper ...",another one for the omelet lovers among us!,"['eggs', 'half-and-half', 'salt and pepper', '...",10,706.6,64.0,13.0,35.0,80.0,96.0,13.0,4.73834
79068,egg in a hole with smoked salmon,79069,10,"['15-minutes-or-less', 'time-to-make', 'course...",5,"['using a cookie cutter or shot glass , cut a ...","i love making ""egg in a hole"". this is made ev...","['country bread', 'unsalted butter', 'eggs', '...",8,488.4,39.0,10.0,42.0,51.0,64.0,13.0,4.73834
23761,biscuits and sage sausage gravy,23762,15,"['15-minutes-or-less', 'time-to-make', 'course...",18,"['fry sausage in large skillet', 'crumble saus...",warning:you will want to lick the plate clean ...,"['sausage', 'whole milk', 'heavy cream', 'flou...",7,637.6,76.0,37.0,46.0,43.0,107.0,8.0,4.73834
60835,cranberry ricotta pancakes,60836,10,"['15-minutes-or-less', 'time-to-make', 'course...",6,['place the egg whites in a small bowl and set...,mmmmmmmm.....pancakes.,"['ricotta cheese', 'dried cranberries', 'flour...",9,674.6,41.0,58.0,18.0,65.0,73.0,24.0,4.722949


In [12]:
user_interactions.head()

Unnamed: 0,user_id,id,rating
0,1,225878,4
1,2,225878,5
2,3,71600,4
3,4,14907,5
4,5,14907,5


In [13]:
user_interactions.describe()

Unnamed: 0,user_id,id,rating
count,966111.0,966111.0,966111.0
mean,9391.486762,114691.657311,4.529013
std,13530.586557,66961.314904,1.058641
min,1.0,1.0,0.0
25%,610.0,58218.0,4.0
50%,2908.0,114819.0,5.0
75%,12233.0,171831.0,5.0
max,60314.0,231635.0,5.0


In [None]:
from flask_sqlalchemy import SQLAlchemy
database_interactions = pd.read_sql

In [14]:
existing_interactions = pd.read_csv("Data/Interactions.csv")

In [15]:
data = Dataset.load_from_df(existing_interactions[["user_id", "id", "rating"]], Reader(rating_scale=(0, 5)))

In [16]:
existing_trainset = data.build_full_trainset()

In [17]:
interactions_reader = Reader(line_format="user item rating", sep=',', rating_scale=(0, 5), skip_lines=1)
interactions_data = Dataset.load_from_file("Data/Interactions.csv", interactions_reader)

In [18]:
interactions_trainset = interactions_data.build_full_trainset()

In [33]:
svd_sim_options = {
    "n_factors": [20, 50, 100, 200, 500],
    "n_epochs": [10, 20, 40, 100],
    "biased": [True, False],
    "lr_all": [0.0005, 0.005, 0.05, 0.5],
    "reg_all": [0.002, 0.02, 0.2]
}

In [35]:
svd_gs = GridSearchCV(SVD, svd_sim_options, measures=["rmse", "mae"], cv=5, n_jobs=-1, joblib_verbose=1)

In [36]:
svd_gs.fit(interactions_data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 418 tasks      | elapsed: 11.1min
[Parallel(n_jobs=-1)]: Done 768 tasks      | elapsed: 21.9min
[Parallel(n_jobs=-1)]: Done 1218 tasks      | elapsed: 42.0min
[Parallel(n_jobs=-1)]: Done 1768 tasks      | elapsed: 76.4min
[Parallel(n_jobs=-1)]: Done 2400 out of 2400 | elapsed: 191.8min finished


In [38]:
print(svd_gs.best_score)

{'rmse': 1.0088828446129101, 'mae': 0.47107160727416597}


In [39]:
print(svd_gs.best_params)

{'rmse': {'n_factors': 20, 'n_epochs': 20, 'biased': True, 'lr_all': 0.005, 'reg_all': 0.2}, 'mae': {'n_factors': 500, 'n_epochs': 20, 'biased': True, 'lr_all': 0.5, 'reg_all': 0.002}}


In [61]:
cross_validate(CoClustering(), interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm CoClustering on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.1277  1.1262  1.1288  1.1315  1.1286  1.1286  0.0017  
MAE (testset)     0.6320  0.6306  0.6296  0.6328  0.6322  0.6314  0.0012  
Fit time          28.20   27.80   27.87   28.21   28.70   28.16   0.32    
Test time         2.08    1.98    1.98    2.07    0.77    1.78    0.51    


{'test_rmse': array([1.12772181, 1.12616034, 1.12875994, 1.13152073, 1.12862757]),
 'test_mae': array([0.63196315, 0.63060332, 0.62961786, 0.63284676, 0.63216222]),
 'fit_time': (28.19971752166748,
  27.797827005386353,
  27.869712352752686,
  28.20687246322632,
  28.701749324798584),
 'test_time': (2.0848894119262695,
  1.979933500289917,
  1.9849200248718262,
  2.071107864379883,
  0.770427942276001)}

In [58]:
cross_validate(NMF(), interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.2128  1.2100  1.2060  1.2079  1.2089  1.2091  0.0023  
MAE (testset)     0.8190  0.8198  0.8213  0.8177  0.8195  0.8195  0.0012  
Fit time          31.07   31.54   31.69   30.76   31.28   31.27   0.33    
Test time         1.81    1.90    1.74    0.73    0.71    1.38    0.54    


{'test_rmse': array([1.21278714, 1.21002853, 1.20597892, 1.20788336, 1.20886785]),
 'test_mae': array([0.81901635, 0.81980505, 0.82130037, 0.81768266, 0.81952913]),
 'fit_time': (31.069397926330566,
  31.544697523117065,
  31.691863536834717,
  30.758241176605225,
  31.28497004508972),
 'test_time': (1.80690336227417,
  1.9009323120117188,
  1.7432210445404053,
  0.7293896675109863,
  0.7133259773254395)}

In [40]:
svdpp = SVDpp()

In [42]:
svdpp.fit(interactions_trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x29613c9bf50>

In [45]:
cross_validate(svdpp, interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0242  1.0215  1.0203  1.0170  1.0239  1.0214  0.0026  
MAE (testset)     0.6017  0.5998  0.6007  0.5978  0.5997  0.5999  0.0013  
Fit time          538.21  531.07  533.73  533.60  542.21  535.76  3.96    
Test time         63.88   62.46   63.48   62.01   61.01   62.57   1.03    


{'test_rmse': array([1.02420445, 1.02152263, 1.02027472, 1.01698547, 1.02386378]),
 'test_mae': array([0.60167718, 0.59975385, 0.60069562, 0.59784337, 0.5996951 ]),
 'fit_time': (538.2060167789459,
  531.0698781013489,
  533.7280797958374,
  533.6034462451935,
  542.2100763320923),
 'test_time': (63.88419532775879,
  62.46167182922363,
  63.479918479919434,
  62.01494216918945,
  61.00875973701477)}

In [48]:
svd = SVD(n_factors=20, n_epochs=20, biased=True, lr_all=0.005, reg_all=0.2)

In [49]:
cross_validate(svd, interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0111  1.0100  1.0094  1.0015  1.0139  1.0092  0.0041  
MAE (testset)     0.6042  0.6051  0.6038  0.6016  0.6052  0.6040  0.0013  
Fit time          6.19    6.67    6.97    6.74    6.71    6.66    0.26    
Test time         1.49    0.86    1.57    0.86    1.53    1.26    0.33    


{'test_rmse': array([1.01110033, 1.00997953, 1.00937236, 1.00152769, 1.01391235]),
 'test_mae': array([0.60421846, 0.60513131, 0.60377468, 0.60158843, 0.60522776]),
 'fit_time': (6.1899402141571045,
  6.674145698547363,
  6.972915887832642,
  6.736252546310425,
  6.710755109786987),
 'test_time': (1.4884147644042969,
  0.8562014102935791,
  1.565445899963379,
  0.8630378246307373,
  1.5334908962249756)}

In [23]:
cross_validate(SVD(n_factors=10, n_epochs=1, biased=True, lr_all=0.005, reg_all=0.2), interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0313  1.0316  1.0273  1.0359  1.0326  1.0317  0.0027  
MAE (testset)     0.6367  0.6366  0.6354  0.6388  0.6376  0.6370  0.0011  
Fit time          0.61    0.83    0.84    0.84    0.82    0.79    0.09    
Test time         1.46    0.78    1.52    0.78    1.50    1.21    0.35    


{'test_rmse': array([1.03126502, 1.03157465, 1.02731372, 1.03587843, 1.03257837]),
 'test_mae': array([0.63669705, 0.6365708 , 0.63542586, 0.63883009, 0.63759461]),
 'fit_time': (0.6120710372924805,
  0.8256895542144775,
  0.8439140319824219,
  0.8384573459625244,
  0.8244237899780273),
 'test_time': (1.4608609676361084,
  0.7804417610168457,
  1.5245096683502197,
  0.7763094902038574,
  1.4980242252349854)}

In [25]:
cross_validate(SVD(n_factors=1, n_epochs=1, biased=True, lr_all=0.005, reg_all=0.2), interactions_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0284  1.0302  1.0320  1.0304  1.0362  1.0314  0.0026  
MAE (testset)     0.6362  0.6364  0.6375  0.6364  0.6387  0.6370  0.0009  
Fit time          0.55    0.75    0.74    0.74    0.77    0.71    0.08    
Test time         1.48    1.50    0.76    1.48    1.45    1.34    0.29    


{'test_rmse': array([1.02836844, 1.03018195, 1.0319786 , 1.03043578, 1.03621062]),
 'test_mae': array([0.63623913, 0.63636127, 0.63746073, 0.63637599, 0.63866931]),
 'fit_time': (0.550480842590332,
  0.7548167705535889,
  0.742328405380249,
  0.7430908679962158,
  0.7743268013000488),
 'test_time': (1.4789493083953857,
  1.5026335716247559,
  0.7636160850524902,
  1.4844772815704346,
  1.4514143466949463)}

In [15]:
def get_season():
    from datetime import datetime
    month = datetime.today().strftime("%m")
    print(month)
get_season()

03
