In [2]:
import json
from collections import defaultdict
import tensorflow as tf
from tensorflow.keras.optimizers import *
import random
import numpy as np





In [3]:
X = []
with open('filter_all_t.json',"r+") as f:
    X = json.load(f)

In [4]:
X.keys()

dict_keys(['train', 'val', 'test'])

In [5]:
Xtrain = []
Xvalid = []
Xtest = []

In [6]:
usersVisitedByRestaurant  = defaultdict(set)
restaurantsVisitedByUser = defaultdict(set)

In [215]:
reviewsPerUser = defaultdict(int)
reviewsPerRestaurant = defaultdict(int)
ratingPerUser =  defaultdict(list)
ratingPerRestaurant = defaultdict(list)
for x in X['train']+X['val']+X['test']:
    reviewsPerUser[x['user_id']]+=1
    reviewsPerRestaurant[x['business_id']]+=1
    
avgReviewsPerUser = np.mean([reviewsPerUser[k] for k in reviewsPerUser])
avgReviewsPerRestaurant = np.mean([reviewsPerRestaurant[k] for k in reviewsPerRestaurant])
avgRating = np.mean([x['rating'] for x in X['train']+X['val']+X['test']])

In [216]:
avgReviewsPerRestaurant, avgRating, avgReviewsPerUser

(3.531769971781648, 4.464798692234222, 2.9432371067142395)

In [7]:
X['val'][0]

{'business_id': '6049974fb1a0aaee3eefb0dd',
 'user_id': '112777069092124620875',
 'rating': 5,
 'review_text': "It's really the best hot chicken I've had anywhere.",
 'pics': ['AF1QipNhbk-hwCwq2O6JBZQq6UXgIpwtzr-tQFTKxMIG'],
 'history_reviews': [['112777069092124620875_6043a4a88be5d4454df9dd76',
   'We hand the Amish Chicken and Hangar Steak. Plus to start some Lazy Pierogi. Dessert was carrot cake and snores cheesecake.'],
  ['112777069092124620875_6043a42bad733fba1bcfdde4',
   'The Mezzaluna, and Tagliatelle were the standout.'],
  ['112777069092124620875_604244e27dfa7f187183b871',
   "Delicious horchata. Great guacamole and esquites. The cochinita pobil is one of the best things I've ever eaten. All followed by the amazing churros."],
  ['112777069092124620875_6045a3809ced5955401d3a1e',
   'Their Nashville hot chicken sandwich is awesome. Deviled eggs are great too.']]}

In [230]:
restaurantIDs = []
restaurantDict = {}
userDict = {}
userIDs = []
for x in X['train']+X['val']+X['test']:
    usersVisitedByRestaurant[x['business_id']].add(x['user_id'])
    restaurantsVisitedByUser[x['user_id']].add(x['business_id'])
    if not x['user_id'] in userIDs: 
        userIDs.append(x['user_id'])
        userDict[x['user_id']] = len(userDict)
    if not x['business_id'] in restaurantIDs: 
        restaurantIDs.append(x['business_id'])
        restaurantDict[x['business_id']] = len(restaurantDict)

In [9]:
# Rui = np.zeros([len(userIDs), len(restaurantIDs)])
# for x in X['train']+X['val']+X['test']:
#     Rui[userIDs.index(x['user_id'])][restaurantIDs.index(x['business_id'])] = 1

In [223]:
len(userIDs), len(restaurantIDs), len(X['train']+X['val'])

(33296, 29449, 97873)

In [10]:
max([len(v) for k,v in restaurantsVisitedByUser.items()]), len(usersVisitedByRestaurant.keys())

(46, 30831)

In [11]:
max([len(v) for k,v in usersVisitedByRestaurant.items()]), len(restaurantsVisitedByUser.keys())

(243, 36996)

In [12]:
len(X['train'])

87013

In [187]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

In [188]:
class LatentFactorModel(tf.keras.Model):
    def __init__(self, K, lamb):
        super(LatentFactorModel, self).__init__()
        # Initialize variables
        self.alpha = tf.Variable(0.0) 
        self.betaU = tf.Variable(tf.random.normal([len(userIDs)],stddev=0.001))
        self.betaI = tf.Variable(tf.random.normal([len(restaurantIDs)],stddev=0.001))
        self.gammaU = tf.Variable(tf.random.normal([len(userIDs),K],stddev=0.001))
        self.gammaI = tf.Variable(tf.random.normal([len(restaurantIDs),K],stddev=0.001))
        # Regularization coefficient
        self.lamb = lamb

    # Prediction for a single instance
    def predict(self, u, i):
        p = self.alpha + self.betaI[i] + self.betaU[u] + tf.tensordot(self.gammaU[u], self.gammaI[i], 1)
        return p

    # Regularizer
    def reg(self):
        return self.lamb * (tf.nn.l2_loss(self.betaU) +\
                            tf.nn.l2_loss(self.betaI) +\
                            tf.nn.l2_loss(self.gammaU) +\
                            tf.nn.l2_loss(self.gammaI))
    
    # Prediction for a sample of instances
    def predictSample(self, sampleU, sampleI):
        u = tf.convert_to_tensor(sampleU, dtype=tf.int32)
        i = tf.convert_to_tensor(sampleI, dtype=tf.int32)
        beta_u = tf.nn.embedding_lookup(self.betaU, u)
        beta_i = tf.nn.embedding_lookup(self.betaI, i)
        gamma_u = tf.nn.embedding_lookup(self.gammaU, u)
        # print(gamma_u)
        gamma_i = tf.nn.embedding_lookup(self.gammaI, i)
        pred = self.alpha + beta_u + beta_i +\
               tf.reduce_sum(tf.multiply(gamma_u, gamma_i), 1)
        return pred
    
    # Loss
    def call(self, sampleU, sampleI, sampleR):
        pred = self.predictSample(sampleU, sampleI)
        r = tf.convert_to_tensor(sampleR, dtype=tf.float32)
        return 2 * tf.nn.l2_loss(pred - r) / len(sampleR)

In [189]:
modelLFM = LatentFactorModel(5, 0.00001)


In [190]:

def trainingStep(model, interactions):
    Nsamples = 25000
    with tf.GradientTape() as tape:
        sampleU, sampleI, sampleR = [], [], []
        for _ in range(Nsamples):
            x = random.choice(interactions)
            sampleU.append(userDict[x['user_id']])
            sampleI.append(restaurantDict[x['business_id']])
            sampleR.append(x['rating'])   

        # print(sampleU)
        loss = model(sampleU,sampleI,sampleR)
        loss += model.reg()
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients((grad, var) for
                              (grad, var) in zip(gradients, model.trainable_variables)
                              if grad is not None)
    return loss.numpy()

In [191]:
for i in range(500):
    obj = trainingStep(modelLFM, X['train'])
    print("iteration " + str(i+1) + ", objective = " + str(obj))

iteration 1, objective = 20.655422
iteration 2, objective = 18.624773
iteration 3, objective = 16.638021
iteration 4, objective = 14.717076
iteration 5, objective = 12.924836
iteration 6, objective = 11.116019
iteration 7, objective = 9.4493065
iteration 8, objective = 7.923042
iteration 9, objective = 6.4753923
iteration 10, objective = 5.1854353
iteration 11, objective = 4.0183077
iteration 12, objective = 3.1574912
iteration 13, objective = 2.4483843
iteration 14, objective = 2.0309958
iteration 15, objective = 1.7822745
iteration 16, objective = 1.7214575
iteration 17, objective = 1.7763882
iteration 18, objective = 1.8283151
iteration 19, objective = 1.8427674
iteration 20, objective = 1.8844432
iteration 21, objective = 1.8542197
iteration 22, objective = 1.7371328
iteration 23, objective = 1.6209068
iteration 24, objective = 1.4674484
iteration 25, objective = 1.3208632
iteration 26, objective = 1.2133058
iteration 27, objective = 1.105624
iteration 28, objective = 1.0118048
ite

In [193]:
testU = []
testI = []
testR = []
for x in X['test']:
    testU.append(userDict[x['user_id']])
    testI.append(restaurantDict[x['business_id']])
    testR.append(x['rating']) 
pred =  modelLFM.predictSample(testU, testI).numpy()
print("MSE", 2 * tf.nn.l2_loss(testR - pred).numpy() / len(pred))
# print("Loss in test data: ",)

MSE 0.7229179012205612


In [19]:
len([x for x in X['test'] if x['user_id'] not in userIDs])

0

In [107]:
def getIu(u, i):
    iu = [restaurantDict[item] for item in restaurantsVisitedByUser[userIDs[u]]]
    # print(iu, sampleI[t])
    if i not in iu:
        return iu
    else:
        iu.remove(i)
        return iu        

In [108]:
b = [[1,2],[2,3,4]]
# padItemHistory(b, 5)

In [148]:
def padItemHistory(descriptions, maxlen):
    # for i in range(descriptions.__len__()):
    #     descriptions[i].extend([len(restaurantIDs) * (maxlen - descriptions[i].__len__())])
    # return descriptions
    hist = []
    for idx in range(len(descriptions)):
        temp = [len(restaurantIDs)] * maxlen
        for t in range(len(descriptions[idx])):
            temp[t] = descriptions[idx][t]
        hist.append(temp)
    return hist

In [204]:
a = tf.convert_to_tensor([[1,2,3],[2,3,4],[1,1,1],[2,2,2]])
# b = tf.convert_to_tensor([[1,1]])
# z = tf.convert_to_tensor([1,2,3])
# # tf.matmul(a,tf.transpose(a))
# tf.reduce_sum(tf.multiply(a, tf.transpose(a, perm=[1,0,2])),2)
# z[0] = tf.add(z[0], tf.reduce_sum(tf.multiply(a,b)))
# tf.stack([[1,2,3],[1,2]])
tf.divide(a, [2])
# tf.reduce_sum(tf.gather([[1,2,3],[2,3,4],[1,1,1],[2,2,2]], [[0,1,2],[0,1,3]]), 1), tf.gather([[1,2,3],[2,3,4],[1,1,1],[2,2,2]], [[0,2],[1,3]])

<tf.Tensor: shape=(4, 3), dtype=float64, numpy=
array([[0.5, 1. , 1.5],
       [1. , 1.5, 2. ],
       [0.5, 0.5, 0.5],
       [1. , 1. , 1. ]])>

In [224]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

In [225]:
class FactoredItemSimModel(tf.keras.Model):
    def __init__(self, K, lamb):
        super(FactoredItemSimModel, self).__init__()
        # Initialize variables
        self.alpha = tf.Variable(0.0) 
        self.betaU = tf.Variable(tf.random.normal([len(userIDs)],stddev=0.001))
        self.betaI = tf.Variable(tf.random.normal([len(restaurantIDs)],stddev=0.001))
        self.gammaU = tf.Variable(tf.random.normal([len(restaurantIDs),K],stddev=0.001))
        self.gammaI = tf.Variable(tf.random.normal([len(restaurantIDs),K],stddev=0.001))
        self.mask = tf.Variable(tf.random.normal([1,K],stddev=0.001))
        # Regularization coefficient
        self.lamb = lamb

    # Prediction for a single instance
    def predict(self, u, i):
        p = self.alpha + self.betaI[i] + self.betaU[u] 
        q = 0
        for j in getIu(u, i):
            # if j != i:
            q += tf.tensordot(self.gammaU[j], self.gammaI[i], 1)
        return p + q / (len(restaurantsVisitedByUser[userIDs[u]])-1)

    # Regularizer
    def reg(self):
        return self.lamb * (tf.nn.l2_loss(self.betaU) +\
                            tf.nn.l2_loss(self.betaI) +\
                            # tf.nn.l2_loss(self.gammaU) +\
                            tf.nn.l2_loss(self.gammaI))
    
    # Prediction for a sample of instances
    def predictSample(self, sampleU, sampleI, itemHistory):
        u = tf.convert_to_tensor(sampleU, dtype=tf.int32)
        i = tf.convert_to_tensor(sampleI, dtype=tf.int32)
        beta_u = tf.nn.embedding_lookup(self.betaU, u)
        beta_i = tf.nn.embedding_lookup(self.betaI, i)

        gamma_i = tf.nn.embedding_lookup(self.gammaI, i)
        gamma_u = tf.concat([self.gammaU, self.mask], axis=0)
        

        # idx = [getIu(sampleU[t].numpy()) if sampleI[t] not in getIu(sampleU[t].numpy()) else getIu(sampleU[t].numpy()).remove(sampleI[t]) for t in range(len(sampleU))]
        # print(gamma_u, itemHistory)
        # print(tf.gather(gamma_u, itemHistory))
        # print(tf.reduce_sum(tf.gather(gamma_u,idx[0]), 1))
        neighbors = tf.reduce_sum(tf.gather(gamma_u, itemHistory) ,1)
        # neighbors = tf.convert_to_tensor([tf.reduce_sum(tf.gather(gamma_u,index), 1) for index in idx])
        # print(neighbors, gamma_i)
        pred = self.alpha + beta_u + beta_i + tf.divide(tf.reduce_sum(tf.multiply(neighbors, gamma_i), 1), [len(itemHistory[0])])      
        return pred

        # reverse_identity = tf.ones([len(restaurantIDs), len(restaurantIDs)]) - tf.eye(len(restaurantIDs), len(restaurantIDs))
        # gamma_ij = tf.multiply(reverse_identity, tf.matmul(self.gammaI, tf.transpose(self.gammaI)))
        # print(gamma_ij.shape)
        # Xui = tf.matmul(tf.gather(Rui, indices=sampleU), gamma_ij)
        # return pred + tf.reduce_sum(tf.multiply(Xui, tf.transpose(tf.one_hot(sampleI, len(sampleU)))), 1)
        
        
        # gamma_ui = []
        # for t in range(len(sampleU)):
        #     sampleJ = getIu(sampleU[t])
        #     if sampleI[t] in sampleJ:
        #         sampleJ.remove(sampleI[t])
        #     j = tf.convert_to_tensor(sampleJ, dtype=tf.int32)    
        #     gamma_j = tf.nn.embedding_lookup(self.gammaI, j)
        #     gamma_i = tf.nn.embedding_lookup(self.gammaI, sampleI[t])
        #     gamma_ui.append(tf.reduce_sum(tf.multiply(gamma_j, gamma_i)))
        # return pred + tf.stack(gamma_ui)
    
    # Loss
    def call(self, sampleU, sampleI, sampleR, itemHistory):
        pred = self.predictSample(sampleU, sampleI, itemHistory)
        # pred = tf.convert_to_tensor([self.predict(sampleU[t], sampleI[t]) for t in range(len(sampleU))])
        r = tf.convert_to_tensor(sampleR, dtype=tf.float32)
        return 2 * tf.nn.l2_loss(pred - r) / len(sampleR)

In [226]:
modelFSIM = FactoredItemSimModel(5, 0.0001)

In [227]:

def trainingStep(model, interactions):
    Nsamples = 25000
    with tf.GradientTape() as tape:
        sampleU, sampleI, sampleR, itemHistory, histLen = [], [], [], [], []
        for _ in range(Nsamples):
            x = random.choice(interactions)
            sampleU.append(userDict[x['user_id']])
            sampleI.append(restaurantDict[x['business_id']])
            # idx = []
            # for t in range(len(sampleU)):
            itemHistory.append(getIu(sampleU[-1], sampleI[-1]))
            # itemHistory.append(idx)
            histLen.append(len(itemHistory[-1]))
            sampleR.append(x['rating'])  
        # print(sampleU) 
        itemHistory = padItemHistory(itemHistory, max(histLen))
        # print(max(histLen))
        # print(itemHistory)
        # print(padItemHistory(itemHistory, max(histLen) ))
        loss = model(sampleU,sampleI,sampleR, itemHistory)
        loss += model.reg()
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients((grad, var) for
                              (grad, var) in zip(gradients, model.trainable_variables)
                              if grad is not None)
    return loss.numpy()

In [228]:
for i in range(500):
    obj = trainingStep(modelFSIM, X['train']+X['val'])
    print("iteration " + str(i+1) + ", objective = " + str(obj))

iteration 1, objective = 20.596462
iteration 2, objective = 18.656736
iteration 3, objective = 16.813566
iteration 4, objective = 14.838133
iteration 5, objective = 12.844305
iteration 6, objective = 10.838593
iteration 7, objective = 8.8012
iteration 8, objective = 6.91752
iteration 9, objective = 5.4150777
iteration 10, objective = 4.231898
iteration 11, objective = 3.6448126
iteration 12, objective = 3.7463355
iteration 13, objective = 4.2071586
iteration 14, objective = 4.623149
iteration 15, objective = 4.7378244
iteration 16, objective = 4.513008
iteration 17, objective = 4.0967226
iteration 18, objective = 3.6270978
iteration 19, objective = 3.2342253
iteration 20, objective = 2.9702868
iteration 21, objective = 2.820657
iteration 22, objective = 2.8055444
iteration 23, objective = 2.8111608
iteration 24, objective = 2.8532157
iteration 25, objective = 2.9105415
iteration 26, objective = 2.9139771
iteration 27, objective = 2.906792
iteration 28, objective = 2.8457317
iteration 2

In [229]:
testU = []
testI = []
testR = []
# history = []
# histLen = []
for x in X['test']:
    testU.append(userDict[x['user_id']])
    testI.append(restaurantDict[x['business_id']])
    testR.append(x['rating']) 
    # history.append(getIu(testU[-1], testI[-1]))
    # histLen.append(len(history[-1]))
# history = padItemHistory(history, max(histLen))
    
pred =  [modelFSIM.predict(testU[t], testI[t]) for t in range(len(X['test']))]
pred = tf.convert_to_tensor(pred)
print("MSE", 2 * tf.nn.l2_loss(testR - pred).numpy() / len(pred))
# print("Loss in test data: ",)

KeyError: '108919790647235091207'