In [237]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from  tensorflow.keras.preprocessing.sequence import pad_sequences

In [239]:
class MF():

    def __init__(self, R, K, alpha, beta, iterations):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - R (ndarray)   : user-item rating matrix
        - K (int)       : number of latent dimensions
        - alpha (float) : learning rate
        - beta (float)  : regularization parameter
        """

        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))

        # Initialize the biases
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # Create a list of training samples
        self.samples = [
            (i, j, self.R[i, j])
            for i in range(self.num_users)
            for j in range(self.num_items)
            if self.R[i, j] > 0
        ]

        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 10 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for i, j, r in self.samples:
            # Computer prediction and error
            prediction = self.get_rating(i, j)
            e = (r - prediction)

            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])

    def get_rating(self, i, j):
        """
        Get the predicted rating of user i and item j
        """
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction

    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P and Q
        """
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)

In [240]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [241]:
# Implement Content Based Filtering
from sklearn.metrics.pairwise import cosine_similarity
from numpy import dot
from numpy.linalg import norm



item = np.array([[1, 0, 1],[1, 1, 1], [0, 1, 1], [0, 0, 1]])

user = np.array([[0, 1, 1], [1, 1, 0]])

In [242]:
def get_item_user(user_arr, item):
    user_item = []
    for user in user_arr:
         user_item.append(list(dot(user, item.T)))

    return user_item

In [243]:
item_user = sigmoid(np.array(get_item_user(user, item))).tolist()
item_user

[[0.7310585786300049,
  0.8807970779778823,
  0.8807970779778823,
  0.7310585786300049],
 [0.7310585786300049, 0.8807970779778823, 0.7310585786300049, 0.5]]

In [244]:
def item_item(item):
    item_item = []
    for i in range(0, len(item)):
        item_similarity = []
        for j in range(0, len(item)):
            #if(i!=j):
            cos_sim = dot(item[i], item[j])/(norm(item[i])*norm(item[j]))
            item_similarity.append(cos_sim)
        item_item.append(item_similarity)

    return item_item

item_item_matrix = item_item(item)
    

In [245]:
print(item_item_matrix)

print(np.shape(item_item_matrix))

[[0.9999999999999998, 0.8164965809277259, 0.4999999999999999, 0.7071067811865475], [0.8164965809277259, 1.0000000000000002, 0.8164965809277259, 0.5773502691896258], [0.4999999999999999, 0.8164965809277259, 0.9999999999999998, 0.7071067811865475], [0.7071067811865475, 0.5773502691896258, 0.7071067811865475, 1.0]]
(4, 4)


In [246]:
# Collaborative 
user_swiped = [[-1 , 0, 1, 1], [1 ,0, 0, 1]]
R = np.array(user_swiped)

mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20)

In [247]:
training_process = mf.train()
print()
print("P x Q:")
print(mf.full_matrix())
print()
print("Global bias:")
print(mf.b)
print()
print("User bias:")
print(mf.b_u)
print()
print("Item bias:")
print(mf.b_i)

Iteration: 10 ; error = 1.9678
Iteration: 20 ; error = 1.9761

P x Q:
[[0.97611133 0.90124218 0.999546   0.99284541]
 [0.99547338 0.98584579 1.1367328  1.00430155]]

Global bias:
0.6

User bias:
[0.16856639 0.24855886]

Item bias:
[ 0.11069865  0.         -0.0026351   0.31103181]


In [248]:
"""
user_user: [user_1: [item_1, item_2, item_3, item_4], user_2: [item1, item_2, item_3, item_4]
"""
user_user = sigmoid(mf.full_matrix())
user_user

array([[0.72633594, 0.7112047 , 0.73096931, 0.72964958],
       [0.73016766, 0.72826661, 0.75707927, 0.73190347]])

In [249]:
'''
item_item: [item1: [item_1, item_2, item_3, item_4], item_2: [item_1, item_2, item_3, item_4] ...]
'''
item_item_matrix

[[0.9999999999999998,
  0.8164965809277259,
  0.4999999999999999,
  0.7071067811865475],
 [0.8164965809277259,
  1.0000000000000002,
  0.8164965809277259,
  0.5773502691896258],
 [0.4999999999999999,
  0.8164965809277259,
  0.9999999999999998,
  0.7071067811865475],
 [0.7071067811865475, 0.5773502691896258, 0.7071067811865475, 1.0]]

In [250]:
'''
item_user: [user_1: [item_1, item_2, item_3, item_4], user_2:[ item_1, item_2, item_3, item_4]]
'''
item_user

[[0.7310585786300049,
  0.8807970779778823,
  0.8807970779778823,
  0.7310585786300049],
 [0.7310585786300049, 0.8807970779778823, 0.7310585786300049, 0.5]]

In [251]:
def get_collab_content(item_user_arr, user_user_arr):
    collab_content = item_user_arr + user_user_arr
    return collab_content

In [252]:
collab_content = get_collab_content(item_user, user_user).tolist()
collab_content

[[1.4573945173793326,
  1.5920017825021056,
  1.611766385024854,
  1.460708155477369],
 [1.4612262398537526,
  1.609063685944038,
  1.4881378517097175,
  1.231903474433051]]

In [253]:
def get_recommendation(user_index, item_item_matrix, collab_content):
    user_arr = collab_content[user_index]
    recommendation = user_arr.index(max(user_arr))
    print(recommendation)
    n_recommendation = sorted(item_item_matrix[recommendation], reverse=True)[:3]
    print(n_recommendation)
    

In [254]:
get_recommendation(0, item_item_matrix, collab_content)

2
[0.9999999999999998, 0.8164965809277259, 0.7071067811865475]


In [255]:
df_grab = pd.read_csv("complete_grab.csv")

In [43]:
import json
from ast import literal_eval

df_grab

df_tran = pd.DataFrame({})
ind = 0
for cuisine_arr in df_grab['cuisine']:
    i = 1
    dict_foodie = {}
    
    if(type(cuisine_arr) != float):
        cuisine_arr = literal_eval(cuisine_arr)
        for cuisine in cuisine_arr:
            string_iter = 'cuisine'+str(i)
            dict_foodie[string_iter] = 1
            i=i+1
    df_foodie = pd.DataFrame.from_records(dict_foodie, index = [ind])
    df_foodie = pd.concat([df_grab, df_foodie])
    ind+=1

In [21]:
df_foodie

Unnamed: 0,cuisine1,cuisine10,cuisine11,cuisine12,cuisine13,cuisine14,cuisine15,cuisine16,cuisine17,cuisine18,...,cuisine42,cuisine43,cuisine44,cuisine45,cuisine46,cuisine5,cuisine6,cuisine7,cuisine8,cuisine9
9661,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
