# RECIPES REVIEW 

# RECOMMENDER SYSTEM WITH MATRIX FACTORIZATION & ML<br>

The following project implements a collaborative recommender system with matrix factorization and machine learning methods for food recipe recommendations. The code takes two datasets, preprocess them, and calculates the weights of a factorized matrix using ML  algorithm.<br>
- **NOTE:** Project inspired by Google Recommender Systems Online Course : https://developers.google.com/machine-learning/recommendation

# Index
- [ 1 - Preprocessing](#1)
- [ 2 - Model Training](#2)
- [ 3 - Predictions](#3)
- [ 4 - Recommendation](#4)

# 1.- PREPROCESSING

## Libraries

In [None]:
##########################################################################################################################

In [1]:
import numpy       as     np
import pandas      as     pd
import tensorflow  as     tf
from   tensorflow  import keras

## Import Datasets
- RAW_recipes
- RAW_interactions

In [None]:
##########################################################################################################################

In [2]:
recipes_id = pd.read_csv        ('raw_data/RAW_recipes.csv', usecols=['id','name'])
recipes_id = recipes_id         [['id','name']]
recipes_id = recipes_id.rename  (columns={'id':'recipe_id', 'name':'name_of_recipe'})

In [3]:
recipes_id

Unnamed: 0,recipe_id,name_of_recipe
0,137739,arriba baked winter squash mexican style
1,31490,a bit different breakfast pizza
2,112140,all in the kitchen chili
3,59389,alouette potatoes
4,44061,amish tomato ketchup for canning
...,...,...
231632,486161,zydeco soup
231633,493372,zydeco spice mix
231634,308080,zydeco ya ya deviled eggs
231635,298512,cookies by design cookies on a stick


In [None]:
##########################################################################################################################

In [4]:
reviews = pd.read_csv(('raw_data/RAW_interactions.csv'), usecols=['user_id','recipe_id','rating'])

In [5]:
reviews

Unnamed: 0,user_id,recipe_id,rating
0,38094,40893,4
1,1293707,40893,5
2,8937,44394,4
3,126440,85009,5
4,57222,85009,5
...,...,...,...
1132362,116593,72730,0
1132363,583662,386618,5
1132364,157126,78003,5
1132365,53932,78003,4


## Filter Datasets<br>
The users/recipes with little/negligible interactions are removed to create a denser less sparse Utility Matrix:<br>
- Users with 7 reviews or less
- Recipes with 7 reviews or less

In [None]:
##########################################################################################################################

In [6]:
# Calculate the number of recipes rated by each user

user_NumRecipes_count = reviews.groupby('user_id')['recipe_id'].count()         
user_NumRecipes_count

user_id
1533          128
1535          794
1581            1
1634           60
1676           31
             ... 
2002371755      1
2002371792      1
2002371843      1
2002372464      1
2002372706      1
Name: recipe_id, Length: 226570, dtype: int64

In [7]:
# Creates a list with 'users' that have rated > 7 recipes

users_filtered = user_NumRecipes_count[user_NumRecipes_count > 7].index

In [8]:
# Maintain in 'reviews - user-id' the 'user_id's' contained in 'users_filtered'

reviews_filtered = reviews[reviews['user_id'].isin(users_filtered)]

In [None]:
##########################################################################################################################

In [9]:
# Calculate the number of times each recipe was rated

recipe_NumUsers_count = reviews_filtered.groupby('recipe_id')['user_id'].count() 
recipe_NumUsers_count

recipe_id
38        2
40        5
41        2
43        1
45        3
         ..
537319    1
537458    1
537459    1
537485    1
537716    1
Name: user_id, Length: 206961, dtype: int64

In [10]:
# Creates a list with 'recipe_ids' that have been rated > 7 times

recipes_filtered = recipe_NumUsers_count[recipe_NumUsers_count > 7].index

In [11]:
# Maintain in 'reviews_filtered - recipe_id' the 'recipe_id's' contained in 'recipe_NumUsers_count'

reviews_filtered = reviews_filtered[reviews_filtered['recipe_id'].isin(recipes_filtered)]

In [None]:
##########################################################################################################################

In [12]:
reviews_filtered['rating'].value_counts()

rating
5    328804
4     64632
3     13306
0      9658
2      3907
1      1766
Name: count, dtype: int64

In [13]:
# Remove rows with rating value equal to 0

reviews_filtered = reviews_filtered[reviews_filtered['rating'] != 0]
reviews_filtered['rating'].value_counts()

rating
5    328804
4     64632
3     13306
2      3907
1      1766
Name: count, dtype: int64

## Utility Matrix

In [None]:
##########################################################################################################################

In [14]:
UtilityMatrix = reviews_filtered.pivot(index='recipe_id', columns= 'user_id', values='rating')
UtilityMatrix = UtilityMatrix.fillna(0)

In [None]:
##########################################################################################################################

In [15]:
# From utility matrix, Get the unique values in the index and put them in a list

recipes_UtilityMatrix = UtilityMatrix.index.unique().tolist()
recipes_UtilityMatrix = pd.DataFrame(recipes_UtilityMatrix, columns=['recipe_id'])
recipes_UtilityMatrix

Unnamed: 0,recipe_id
0,49
1,62
2,66
3,142
4,150
...,...
20805,518151
20806,518229
20807,522889
20808,524479


In [None]:
#########################################################################################################################

In [16]:
# Concatenate DataFrames based on 'key_column' while including values from both DataFrames

recipes_UtilityMatrix =  pd.merge(   recipes_UtilityMatrix,   recipes_id,    on='recipe_id',   how='inner'  )
recipes_UtilityMatrix

Unnamed: 0,recipe_id,name_of_recipe
0,49,chicken breasts lombardi
1,62,black bean corn and tomato salad
2,66,black coffee barbecue sauce
3,142,almond fudge banana cake
4,150,all purpose crock pot chicken
...,...,...
20805,518151,a 1 pot stickers with chili pineapple dipping...
20806,518229,charge me up a1 steak egg veggie breakfast w...
20807,522889,swiss eggs
20808,524479,ragu ratatouille hummus plate ragu


In [None]:
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################

# HERE

In [None]:
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################

In [17]:
num_recipes        =   recipes_UtilityMatrix.shape[0]
my_recipes_review  =   np.zeros( num_recipes )

## INTRODUCE YOUR RECIPE REVIEWS  HERE<br>
1.- Check the data frame titled **recipes_UtilityMatrix** <br>
2.- **Find** the name of the recipe you want to review; and **log** its corresponding **index number**<br>

    
**Example:**<br>

1. **Recipe Name:** _"almond fudge banana cake"_<br> 
2. **Index ID:** "3"

**In order to add a review of 5 to this recipe, introduce:**<br>

**my_recipes_review[3]      =**    5 
    
    


In [18]:
recipes_UtilityMatrix

Unnamed: 0,recipe_id,name_of_recipe
0,49,chicken breasts lombardi
1,62,black bean corn and tomato salad
2,66,black coffee barbecue sauce
3,142,almond fudge banana cake
4,150,all purpose crock pot chicken
...,...,...
20805,518151,a 1 pot stickers with chili pineapple dipping...
20806,518229,charge me up a1 steak egg veggie breakfast w...
20807,522889,swiss eggs
20808,524479,ragu ratatouille hummus plate ragu


## Add your Review HERE<br>
In a scale from **1 to 5**
- **1=** Least you enoyed 
- **5=** Most  you enjoyed

In [19]:
recipes_UtilityMatrix.iloc[3]

recipe_id                              142
name_of_recipe    almond fudge banana cake
Name: 3, dtype: object

In [20]:

#  Introduce Recipe Index       Review              Recipe Name

my_recipes_review[3]            =  5          #  almond fudge banana cake


my_recipes_review[7]            =  3          #  chicken pot pie lasagna
my_recipes_review[12]           =  2          #  chicken fried brown rice                                    
my_recipes_review[5207]         =  4          #  sesame sunflower poppy flax seed buttermilk bread                          
my_recipes_review[7777]         =  3          #  eloise s easy sugar cookies         
my_recipes_review[9093]         =  5          #  mexi cashews                                        
my_recipes_review[11457]        =  2          #  homemade flour tortillas                         
my_recipes_review[12123]        =  5          #  gingerbread truffles                   
my_recipes_review[15003]        =  1          #  vegan soft sugar cookies               
my_recipes_review[17782]        =  3          #  light italian feta omelet                            
my_recipes_review[20001]        =  4          #  bananas and milk                                                            
my_recipes_review[20808]        =  3          #  ragu ratatouille hummus plate  ragu                                      

In [21]:
print('\nMy Reviews:\n')
for i in range(len(my_recipes_review)):
    if my_recipes_review[i] > 0 :
        print(f'I Reviewed:  {my_recipes_review[i]}      to Recipe:  {recipes_UtilityMatrix.loc[i,"name_of_recipe"]}');


My Reviews:

I Reviewed:  5.0      to Recipe:  almond fudge banana cake
I Reviewed:  3.0      to Recipe:  chicken pot pie lasagna
I Reviewed:  2.0      to Recipe:  chicken fried brown rice
I Reviewed:  4.0      to Recipe:  sesame sunflower poppy flax seed buttermilk bread
I Reviewed:  3.0      to Recipe:  eloise s easy sugar cookies
I Reviewed:  5.0      to Recipe:  mexi cashews
I Reviewed:  2.0      to Recipe:  homemade flour tortillas
I Reviewed:  5.0      to Recipe:  gingerbread truffles
I Reviewed:  1.0      to Recipe:  vegan soft sugar cookies
I Reviewed:  3.0      to Recipe:  light italian feta omelet
I Reviewed:  4.0      to Recipe:  bananas and milk
I Reviewed:  3.0      to Recipe:  ragu ratatouille hummus plate  ragu


In [22]:
#  It will create a new list my_index_reviewed that contains 
#  the indices of the recipes in my_recipes_review 
#  that have a rating number

my_index_reviewed = [i for i in range(len(my_recipes_review)) if my_recipes_review[i] > 0] 

## Update Utility Matrix<br>
YOUR Rersonal Reviews are added to the Utility Matrix

In [None]:
######################################################################################################

In [23]:
UtilityMatrix_Reviewed = UtilityMatrix.astype(bool)
UtilityMatrix_Reviewed = UtilityMatrix_Reviewed.astype(int)

In [24]:
UtilityMatrix          =  UtilityMatrix.to_numpy()
UtilityMatrix_Reviewed =  UtilityMatrix_Reviewed.to_numpy()

In [None]:
######################################################################################################

In [None]:
# Add new Reviews to the existing utility matrix

In [25]:
UtilityMatrix          = np.c_[ my_recipes_review, UtilityMatrix ]
UtilityMatrix_Reviewed = np.c_[ (my_recipes_review != 0).astype(int), UtilityMatrix_Reviewed ]

## Normalize Utility Matrix<br>
Mean is subrtracted from actual Utility Matrix to normalize values

In [None]:
######################################################################################################

In [26]:
# Calculate the average rating review for each recipe row
# Normalize the Dataset

mean_recipe              =  (np.sum(UtilityMatrix*UtilityMatrix_Reviewed,axis=1)/(np.sum(UtilityMatrix_Reviewed, axis=1)+1e-12)).reshape(-1,1)
normalized_UtilityMatrix =  UtilityMatrix - np.multiply(mean_recipe, UtilityMatrix_Reviewed)    

## Save Files<br> 

In [None]:
######################################################################################################

In [27]:
my_recipes_review = pd.DataFrame(my_recipes_review)
my_recipes_review.to_csv('my_recipes_review.csv',index= False)

recipes_UtilityMatrix = pd.DataFrame(recipes_UtilityMatrix)
recipes_UtilityMatrix.to_csv('recipes_UtilityMatrix.csv',index= False)

my_index_reviewed = pd.DataFrame(my_index_reviewed)
my_index_reviewed.to_csv('my_index_reviewed.csv',index= False)

mean_recipe = pd.DataFrame(mean_recipe)
mean_recipe.to_csv('mean_recipe.csv',index= False)
###########################################################################################################################

In [28]:
# It may take some time

UtilityMatrix_Reviewed = np.transpose(UtilityMatrix_Reviewed)
UtilityMatrix_Reviewed = pd.DataFrame(UtilityMatrix_Reviewed)
UtilityMatrix_Reviewed.to_csv('UtilityMatrix_Reviewed.csv',index= False, chunksize=1000)

In [29]:
# It may take some time

UtilityMatrix = np.transpose(UtilityMatrix)
UtilityMatrix = pd.DataFrame(UtilityMatrix)
UtilityMatrix.to_csv('UtilityMatrix.csv',index= False, chunksize=1000)

In [30]:
# It may take some time

normalized_UtilityMatrix  = np.transpose(normalized_UtilityMatrix )
normalized_UtilityMatrix  = pd.DataFrame(normalized_UtilityMatrix )
normalized_UtilityMatrix .to_csv('normalized_UtilityMatrix .csv',index= False, chunksize=1000)

In [None]:
###########################################################################################################################

In [None]:
###########################################################################################################################

In [None]:
###########################################################################################################################

# Continue with TRAINING notebook