In [87]:
import pandas as pd
from collections import defaultdict
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [79]:
ratingsDataFrame = pd.read_csv('yelp_train.csv',skipinitialspace=True)
ratingsDataFrame.head(5)

Unnamed: 0,user_id,business_id,stars
0,vxR_YV0atFxIxfOnF9uHjQ,gTw6PENNGl68ZPUpYWP50A,5.0
1,o0p-iTC5yTBV5Yab_7es4g,iAuOpYDfOTuzQ6OPpEiGwA,4.0
2,-qj9ouN0bzMXz1vfEslG-A,5j7BnXXvlS69uLVHrY9Upw,2.0
3,E43QxgV87Ij6KxMCHcijKw,jUYp798M93Mpcjys_TTgsQ,5.0
4,T13IBpJITI32a1k41rc-tg,3MntE_HWbNNoyiLGxywjYA,5.0


In [62]:
def idToInteger(allId):
    count = 0
    groupedId = defaultdict(int)
    for _id in allId:
        groupedId.setdefault(_id,0)
        groupedId[_id] += 1
    idToInt = dict()
    for _id in groupedId:
        count += 1
        idToInt[_id] = count
    return idToInt

In [83]:
convertedUserID = idToInteger(ratingsDataFrame['user_id']) 
ratingsDataFrame['user_id'] = ratingsDataFrame['user_id'].map(convertedUserID)

convertedBusinessID = idToInteger(ratingsDataFrame['business_id']) 
ratingsDataFrame['business_id'] = ratingsDataFrame['business_id'].map(convertedBusinessID)

print(ratingsDataFrame.shape)
ratingsDataFrame.dtypes

(455854, 3)


user_id          int64
business_id      int64
stars          float64
dtype: object

In [86]:
userCount, businessCount = len(ratingsDataFrame.user_id.unique()), len(ratingsDataFrame.business_id.unique())
latentFactors = 30

In [94]:
businessInput = tf.keras.layers.Input(shape=[1],name='Businesses', dtype='int64')
businessEmbedding = tf.keras.layers.Embedding(businessCount + 1,latentFactors,name='Business-Encoding')(businessInput)
businessVector = tf.keras.layers.Flatten()(businessEmbedding)

userInput = tf.keras.layers.Input(shape=[1],name='Users', dtype='int64')
userEmbedding = tf.keras.layers.Embedding(userCount + 1,latentFactors,name='User-Encoding')(userInput)
userVector = tf.keras.layers.Flatten()(userEmbedding)

dotProduct = tf.keras.layers.dot([userVector,businessVector],axes=1)
recSysModel = tf.keras.Model(inputs=[userInput,businessInput],outputs=dotProduct)
recSysModel.compile(loss='mae', optimizer='adam', metrics=["mae"])

In [95]:
recSysModel.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Users (InputLayer)              [(None, 1)]          0                                            
__________________________________________________________________________________________________
Businesses (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
User-Encoding (Embedding)       (None, 1, 30)        338130      Users[0][0]                      
__________________________________________________________________________________________________
Business-Encoding (Embedding)   (None, 1, 30)        741990      Businesses[0][0]                 
____________________________________________________________________________________________

In [96]:
def convertToArray(entries):
    return np.array([[element] for element in entries])

In [98]:
recSysModelHistory = recSysModel.fit([convertToArray(ratingsDataFrame['user_id']),convertToArray(ratingsDataFrame['business_id'])]
                                     ,convertToArray(ratingsDataFrame['stars']),epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [99]:
recSysModel.save_weights("first_model.h5")