In [1]:
import numpy as np

n_users = 25
n_recipes = 32

authors = np.random.randint(0, n_users - 1, size=n_recipes)
ratings = np.random.randint(1, 6, size=(n_users, n_recipes))

ratings = ratings.astype(float)
ratings.ravel()[np.random.choice(ratings.size, ratings.size//2, replace=False)] = np.nan

In [2]:
import dask_distance 
import dask.array as da

from dask.distributed import Client
client = Client(memory_limit='8GB')

Perhaps you already have a cluster running?
Hosting the HTTP server on port 62401 instead


In [11]:
import sparse
x = da.random.random((25000, 150000), chunks=(5000, 5000))
x[x < 0.99983] = 0
s = x.map_blocks(sparse.COO).compute()
s

0,1
Format,coo
Data Type,float64
Shape,"(25000, 150000)"
nnz,637595
Density,0.00017002533333333335
Read-only,True
Size,14.6M
Storage ratio,0.0


In [12]:
import time
start = time.time()
d = s.map_blocks(lambda x: x.todense(), dtype=np.ndarray).compute()
print(time.time() - start)

AttributeError: 'COO' object has no attribute 'map_blocks'

In [4]:
import time
start = time.time()
n = da.matmul(s, s.T)
n.compute()
print(time.time() - start)

16.60005784034729


In [4]:
ratings
scaled_ratings = (ratings.T - np.nanmean(ratings, axis=1))/np.nanstd(ratings, axis=1).T
np.nanmean(scaled_ratings, axis=1)

array([ 0.12600867,  0.29151563,  0.4238517 , -0.493085  , -0.29394966,
       -0.09939557,  0.06140401,  0.1329473 , -0.18627578,  0.31956392,
        0.37752506,  0.09802381,  0.28486257, -0.4408008 , -0.40466311,
       -0.08532526,  0.28700318, -0.05893703, -0.02846952, -0.23463539,
        0.34778174,  0.05415893, -0.18259516, -0.04947513, -0.10681291,
        0.0105371 ,  0.35452878, -0.23785417, -0.45455139,  0.10355083,
        0.19604748, -0.04021061])

In [5]:
trust = np.zeros((n_users, n_users))


In [3]:
import pandas as pd

recipe_df = pd.read_csv("data/RAW_recipes.csv")
rating_df = pd.read_csv("data/RAW_interactions.csv")

In [4]:
recipe_df = recipe_df.set_index("id")

In [5]:
user_mean_ratings = rating_df.groupby("user_id").rating.mean()
user_std_ratings = rating_df.groupby("user_id").rating.std()
rating_df["normalized_rating"] = rating_df.apply(
    lambda row: (row.rating - user_mean_ratings[row.user_id]) / user_std_ratings[row.user_id], axis=1
)

  lambda row: (row.rating - user_mean_ratings[row.user_id]) / user_std_ratings[row.user_id], axis=1


In [6]:
rating_df = rating_df.dropna()

In [7]:
recipe_df["review_mean_normalized"] = rating_df.groupby("recipe_id").normalized_rating.mean()
recipe_df["review_count"] = rating_df.groupby("recipe_id").rating.count()
recipe_df["review_mean"] = rating_df.groupby("recipe_id").rating.mean()
recipe_df = recipe_df.dropna()

In [8]:
def bayesian_avg(mean, count, prior=0, C=10):
    return (mean * count + prior * C) / (count + C)

prior = recipe_df.review_mean_normalized.mean()
print(prior)
recipe_df["review_bayesian_mean"] = recipe_df.apply(
    lambda row: bayesian_avg(row["review_mean_normalized"], row["review_count"], prior=prior, C=100), axis=1
)

-0.11125552957582048


In [12]:
best_recipe = recipe_df.loc[recipe_df.review_bayesian_mean.idxmax()]
print(best_recipe.description)

cooking the meatballs in simmering pasta sauce  will not only add so much extra flavor to the sauce the meatballs will be melt-in-your-mouth tender, you may of course oven-bake the meatballs for about 25 minutes,  --- don't be afraid to add in more parmesan cheese and a little more milk, there is no need to measure exactly, this recipe is pretty much foolproof, leftover cooked meatballs make a wonderful pizza topping just slice thinly and freeze until ready to use ----- these also make wonderful juicy burgers shaped into patties-- for more of my tried and true recipes check out my food blog at www.kittencalskitchen.com


In [31]:
recipe_df.sort_values("review_bayesian_mean", ascending=False).head(20)

Unnamed: 0_level_0,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,review_mean_normalized,review_count,review_mean,review_bayesian_mean
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
69173,kittencal s italian melt in your mouth meatballs,50,89831,2003-08-20,"['60-minutes-or-less', 'time-to-make', 'course...","[1312.6, 129.0, 8.0, 108.0, 214.0, 174.0, 8.0]",5,['mix all ingredients together in a large bowl...,cooking the meatballs in simmering pasta sauce...,"['ground beef', 'egg', 'parmesan cheese', 'bre...",10,0.268764,732.0,4.706284,0.223088
106251,roasted cauliflower 16 roasted cloves of garlic,40,58104,2004-12-16,"['60-minutes-or-less', 'time-to-make', 'course...","[126.9, 14.0, 11.0, 17.0, 6.0, 6.0, 3.0]",4,"['mix oil , rosemary , salt , pepper and garli...","i find it totally amazing, that children and m...","['cauliflower', 'garlic cloves', 'fresh rosema...",6,0.295571,306.0,4.748366,0.195368
39087,creamy cajun chicken pasta,25,30534,2002-09-02,"['30-minutes-or-less', 'time-to-make', 'course...","[719.1, 63.0, 12.0, 34.0, 78.0, 120.0, 15.0]",4,['place chicken and cajun seasoning in a bowl ...,n'awlin's style of chicken with an updated alf...,"['boneless skinless chicken breast halves', 'l...",12,0.224818,911.0,4.563117,0.191576
31128,yummy crunchy apple pie,70,29450,2002-06-13,"['weeknight', 'time-to-make', 'course', 'main-...","[360.6, 22.0, 161.0, 6.0, 5.0, 38.0, 19.0]",15,['use foil on bottom of oven rather than a coo...,great pie!,"['pastry for single-crust pie', 'sugar', 'flou...",12,0.278085,319.0,4.733542,0.185164
8739,wholly guacamole,20,7802,2001-04-09,"['30-minutes-or-less', 'time-to-make', 'course...","[500.7, 68.0, 14.0, 0.0, 13.0, 32.0, 10.0]",13,"['cut avocados in half , remove pit , and spoo...",this is a true tex-mex recipe concocted by a d...,"['avocados', 'garlic cloves', 'lime', 'tomatoe...",8,0.301005,253.0,4.754941,0.184217
66596,kittencal s famous greek salad,10,89831,2003-07-14,"['15-minutes-or-less', 'time-to-make', 'course...","[489.7, 72.0, 23.0, 33.0, 15.0, 55.0, 4.0]",8,"['for the dressing', 'in a processor or use a ...",be prepared for the best greek salad on the pl...,"['olive oil', 'lemon, juice of', 'dried oregan...",16,0.283527,297.0,4.760943,0.184086
43509,crumb topped banana muffins,35,27643,2002-10-18,"['60-minutes-or-less', 'time-to-make', 'course...","[220.9, 10.0, 90.0, 12.0, 5.0, 20.0, 12.0]",9,"['in a large bowl , mix the flour , baking sod...",these are so good. we love the topping.,"['flour', 'baking soda', 'baking powder', 'sal...",11,0.252595,431.0,4.728538,0.184073
28148,oven fried chicken chimichangas,45,6258,2002-05-13,"['60-minutes-or-less', 'time-to-make', 'course...","[353.9, 25.0, 8.0, 29.0, 37.0, 32.0, 10.0]",7,"['mix chicken , picante sauce or salsa , cumin...",a fast and healthier way to serve chimichangas...,"['picante sauce', 'ground cumin', 'dried orega...",9,0.22583,634.0,4.675079,0.179906
129345,chicken scaloppine with lemon glaze low fat ...,35,89831,2005-07-11,"['60-minutes-or-less', 'time-to-make', 'course...","[301.6, 14.0, 8.0, 26.0, 62.0, 9.0, 7.0]",14,['in a small bowl or shallow dish whisk togeth...,this is a wonderful lower-fat restaurant-quali...,"['boneless skinless chicken breasts', 'dijon m...",13,0.306733,223.0,4.654709,0.177325
73825,why i joined zaar carrot cake,65,20371,2003-10-22,"['weeknight', 'time-to-make', 'course', 'main-...","[662.1, 58.0, 224.0, 24.0, 13.0, 56.0, 25.0]",13,"['preheat oven to 350 degrees f', 'grease and ...","on october 2, 2001, i discovered the recipezaa...","['all-purpose flour', 'baking soda', 'baking p...",16,0.258304,344.0,4.735465,0.17507


In [14]:
rating_df[rating_df.user_id == 89831]

Unnamed: 0,user_id,recipe_id,date,rating,review,normalized_rating
227,89831,225171,2007-05-29,5,These were buttery rich delicious! I made thes...,0.187821
467,89831,33096,2004-03-15,5,Merlot...this is the second time that I made y...,0.187821
805,89831,175042,2006-07-04,5,This is a wonderful rice recipe! I doubled the...,0.187821
1102,89831,53518,2004-07-12,5,"Uncle Bill, I made your perogies over the week...",0.187821
1282,89831,109578,2006-11-29,5,"This is a great cake recipe, perfect to serve ...",0.187821
...,...,...,...,...,...,...
1129861,89831,99870,2006-07-01,5,"I chose this for the ZWT game, it's a wonderfu...",0.187821
1130273,89831,200623,2006-12-20,5,This is simple and so delicious! thanks for sh...,0.187821
1131189,89831,63474,2006-07-25,5,Geema my gazpacho-loving DS said this is a fa...,0.187821
1131337,89831,99731,2006-08-08,5,"Thanks for posting this great recipe Barb, I h...",0.187821


In [15]:
recipe_df[recipe_df.description.str.contains("delicious recipe that will amaze your friends and family")]

Unnamed: 0_level_0,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,review_mean_normalized,review_count,review_mean,review_bayesian_mean
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
90880,maui ahi poke,45,140434,2004-05-07,"['60-minutes-or-less', 'time-to-make', 'course...","[149.0, 7.0, 2.0, 35.0, 45.0, 6.0, 0.0]",4,"['cut ahi into at least 1 / 2"" cubes - set asi...",here is a simple delicious recipe that will am...,"['fresh ahi tuna', 'round onion', 'green onion...",10,0.413931,14.0,4.5,-0.046759


In [9]:
rating_df[rating_df.recipe_id == 90880]

Unnamed: 0,user_id,recipe_id,date,rating,review,normalized_rating
360985,178220,90880,2006-05-25,5,I served this at recent party and it was nearl...,0.559017
360986,68460,90880,2006-06-18,5,This is a wonderful recipe! My husband and I ...,0.323823
360988,215829,90880,2007-08-25,4,I found this recipe (although pretty authentic...,-0.645119
360990,755811,90880,2008-09-17,4,"Really great recipe, but I agree with other re...",0.028603
360991,899881,90880,2008-11-07,5,"Great authentic recipe. Yes, to other comments...",0.749025
360993,35848,90880,2009-01-06,5,"Absolutely fabulous! Be sure to use a ""sushi""...",0.396784
360998,290012,90880,2009-07-22,5,onolicious!!!! This is even better that shoyu ...,0.728219
361003,511799,90880,2009-12-10,5,Memories of Hawaii in a dish -- I wouldn't cha...,0.447214
361004,1471923,90880,2010-01-04,5,My friends Loved it! But Well I have made this...,0.707107
361008,2687969,90880,2013-02-11,5,This was the perfect poke recipe for my cravin...,0.755929


In [12]:
rating_df[rating_df.user_id == 178220]

Unnamed: 0,user_id,recipe_id,date,rating,review,normalized_rating
65391,178220,65246,2006-05-25,5,Thank you for the great recipe. I served this...,0.559017
76182,178220,27527,2006-03-25,5,I just made this for our Irish Night Dinner. ...,0.559017
188320,178220,32607,2006-05-03,5,I made this tonight as part of a dinner party ...,0.559017
360985,178220,90880,2006-05-25,5,I served this at recent party and it was nearl...,0.559017
372411,178220,15682,2006-12-25,5,I made this for a dessert offering at a holida...,0.559017
401856,178220,121490,2006-06-03,5,I just made this bread. I can't tell you how ...,0.559017
512246,178220,118691,2006-05-03,4,I made this a while back and forgot to leave f...,-1.677051
555835,178220,95725,2006-05-30,4,"This was quite good. Instead of basting, I add...",-1.677051
621923,178220,2708,2006-01-16,5,Thanks for the quick and easy recipe. I didn'...,0.559017
652261,178220,121360,2006-05-03,5,Made this tonight. I quadrupled the garlic (w...,0.559017


In [26]:
rating_df[rating_df.user_id == 290012]

Unnamed: 0,user_id,recipe_id,date,rating,review,normalized_rating
186762,290012,57680,2006-05-26,5,It was a heat at my little kid's birthday part...,0.728219
219142,290012,15690,2010-01-04,3,"Not as sweet as I would have liked it to be, b...",-0.520156
360998,290012,90880,2009-07-22,5,onolicious!!!! This is even better that shoyu ...,0.728219
471791,290012,137030,2010-01-06,1,This recipe was very bland. Would definitely r...,-1.768532
612360,290012,53629,2009-01-13,4,I marinated boneless skinless chicken thighs f...,0.104031
711597,290012,213252,2009-01-15,5,"Simple, but very delicious. It was so tender. ...",0.728219


In [30]:
recipe_df.loc[53629]

name                                  caramelized lemongrass chicken thighs
minutes                                                                 285
contributor_id                                                        41525
submitted                                                        2003-02-05
tags                      ['weeknight', 'time-to-make', 'course', 'main-...
nutrition                        [380.0, 36.0, 69.0, 61.0, 45.0, 30.0, 6.0]
n_steps                                                                  13
steps                     ['remove skins and extra fat from chicken', 'r...
description               this is a recipe i came across in a cookbook i...
ingredients               ['chicken thighs', 'lemongrass', 'garlic', 'fi...
n_ingredients                                                             8
review_mean_normalized                                            -0.836055
review_count                                                            6.0
review_mean 