In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

tf.__version__

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


'2.0.0'

## Load Data

In [2]:
recipes = pd.read_csv('../../data/generated/recipes.csv')
train_matrix = pd.read_csv('../../data/generated/interactions_train_alt.csv').pivot_table(index='u', columns='i', values='rating', dropna=False)
print(f'Shape of train User-Movie-Matrix:\t{train_matrix.shape}')
test_matrix = pd.read_csv('../../data/generated/interactions_test_alt.csv').pivot_table(index='u', columns='i', values='rating', dropna=False)
print(f'Shape of test User-Movie-Matrix:\t{test_matrix.shape}')

Shape of train User-Movie-Matrix:	(6384, 3148)
Shape of test User-Movie-Matrix:	(6384, 3148)


In [3]:
#remplace rating from 0 to 5 to a boolean information : > 3 like (1), otherwise dislike (0)
train_matrix = train_matrix.applymap(lambda x : x if np.isnan(x) else int(x>3))
test_matrix = test_matrix.applymap(lambda x : x if np.isnan(x) else int(x>3))

#normalize data by dividing by 5 as ratings go from 0 to 5
#train_matrix = train_matrix / 5
#test_matrix = test_matrix /5

In [4]:
#replace all missing rating by -1 (as rating are from 0 to 5)
#the -1 will be then used in the model loss function as a mask
train_matrix.fillna(-1, inplace=True)
test_matrix.fillna(-1, inplace=True)

## Load the model trained on Colab

In [5]:
my_model = tf.keras.models.load_model('ae_v3.h5')

W1125 16:31:28.326725 4597020096 hdf5_format.py:177] No training configuration found in save file: the model was *not* compiled. Compile it manually.


## get recommandations from one user

In [6]:
#getting user train ratings
#id = 24240
id=33

user = train_matrix[train_matrix.index == id].T
user_ratings_train = user[user[id]!=-1]

#getting user test ratings
user = test_matrix[test_matrix.index == id].T
user_ratings_test = user[user[id]!=-1]

In [7]:
#get predictions from model
user_train = train_matrix[train_matrix.index == id].values
preds = my_model.predict(user_train, verbose=1)
preds = pd.DataFrame(preds, columns = train_matrix.columns)



In [8]:
print('recipes rated by user in train set')

summary = user_ratings_train.join(recipes[recipes['i']
                                .isin(user_ratings_train.T.columns)][['i','name']].set_index('i'))

summary = summary.join(preds.T)
summary.columns = ['rating', 'name', 'pred']

#get user mean square error
print('MSE for user', id, ':', np.square(preds[user_ratings_train.T.columns].values - user_ratings_train.T.values)
      .mean())

display(summary[['name', 'rating', 'pred']].sort_values('pred', ascending=False))

recipes rated by user in train set
MSE for user 33 : 0.06876645840369734


Unnamed: 0_level_0,name,rating,pred
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
19812,simply sour cream chicken enchiladas,1.0,1.031206
135400,payday bars,1.0,1.022895
78451,crispy coconut chicken fingers,1.0,1.009696
129723,spinach garlic pizza,1.0,1.00255
101200,tortellini tomato spinach soup,1.0,0.983494
97450,roasted parmesan garlic cauliflower,1.0,0.947091
72296,ziti pepperoni casserole,1.0,0.932472
87565,chicken pesto,1.0,0.623234
164348,new mexico style chili chicken casserole,0.0,0.515592
13616,tofu egg salad,0.0,0.445417


In [9]:
#top 10 recipes
reco = preds[set(preds.columns) - set (user_ratings_train.columns)].T.sort_values([0],ascending=False)
reco_top10 = reco.head(10)
reco_top10 = reco_top10.join(recipes[['i','name']].set_index('i'))
reco_top10.columns = ['pred', 'name']
print('The top 10 recommandations from Auto Encoder are :')
display(reco_top10[['name', 'pred']])

The top 10 recommandations from Auto Encoder are :


Unnamed: 0_level_0,name,pred
i,Unnamed: 1_level_1,Unnamed: 2_level_1
108522,white chili football soup,1.366778
163428,auntie anne s pretzels copycat,1.353319
92697,old fashioned linguine with white clam sauce,1.319224
66747,divine meatball sandwiches,1.319195
96302,the best pasta salad,1.310785
31840,cucumber and garbanzo bean salad,1.289305
42272,layer cookies magic layer bars,1.28157
54328,softest ever bread machine bread,1.277629
157749,spinach garlic soup,1.2751
174816,crock pot garlic rosemary chicken breast,1.26774


In [10]:
print('recipes rated by user in test set')

summary = user_ratings_test.join(recipes[recipes['i']
                                .isin(user_ratings_test.T.columns)][['i','name']].set_index('i'))

summary = summary.join(preds.T)
summary.columns = ['rating', 'name', 'pred']
summary['position'] = pd.Series(summary.index).apply(
                            lambda i : str(reco.index.get_loc(i)) 
                            + '/' + str(test_matrix.shape[1])).values

#get user mean square error
print('MSE for user', id, ':', np.square(preds[user_ratings_test.T.columns].values - user_ratings_test.T.values)
      .mean())

display(summary[['name', 'rating', 'pred', 'position']].sort_values('pred', ascending=False))


recipes rated by user in test set
MSE for user 33 : 0.48352646321720294


Unnamed: 0_level_0,name,rating,pred,position
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2949,oreo balls,1.0,0.943919,1245/3148
149428,wonderful salsa,1.0,0.778163,2171/3148
166633,pumpkin cream cheese muffins like starbucks,1.0,0.759723,2242/3148
134610,crock pot chicken with black beans cream cheese,1.0,-0.350561,3146/3148


# Get measure of performance for all train set

In [11]:
%timeit my_model.predict(train_matrix, verbose=1)

1.37 s ± 44.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
