In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import sys

In [2]:
rating = pd.read_csv('../data/ratings.csv')
books = pd.read_csv('../data/books.csv')

In [3]:
books = books[['id','book_id','authors','original_title','language_code','average_rating','ratings_count']]

In [4]:
rating['book_id'] = rating['book_id']-1
rating['user_id'] = rating['user_id']-1
books['id'] = books['id']-1

In [5]:
rating.drop_duplicates(inplace=True)
rating = rating.reset_index(drop=True)

In [6]:
num_user = len(rating['user_id'].unique())
num_book = len(rating['book_id'].unique())

In [7]:
# Create a sparse iteration matrix from rating data
def create_sparse_matrix(num_user,num_item,df):
    indices = df[['user_id','book_id']].values.astype(np.int32)
    values = df['rating'].values.astype(np.int32)
    dense_shape = (num_user,num_item)
    resp =  tf.SparseTensor(indices,values,dense_shape)
    return resp

In [8]:
rating_matrix = create_sparse_matrix(num_user,num_book,rating)
rating_matrix = tf.Session().run(rating_matrix)

In [9]:
sparsity = 1-len(rating_matrix.values)/np.prod(rating_matrix.dense_shape)
print("Sparsity of rating matrix:",sparsity)

Sparsity of rating matrix: 0.9981654088050315


In [12]:
observed_user = rating_matrix.indices[:,0]
observed_item = rating_matrix.indices[:,1]
observed_rating = np.ones_like(observed_user)

#### Negative sampling un-observed items

In [13]:
# Get the number of book each user has read to sample accordingly
users,books_read = rating.groupby('user_id')['book_id'].agg('count').reset_index().values.T

In [14]:
alpha = 4 # For each observed entry, samples 4 negative entries
unobserved_user = []
unobserved_item = []
for u,k in zip(users,books_read):
    neg_sample = np.random.choice(num_book,size=alpha*k,replace=False)
    neg_user = u*np.ones_like(neg_sample)
    unobserved_user += neg_user.tolist()
    unobserved_item += neg_sample.tolist()
unobserved_user = np.array(unobserved_user)
unobserved_item = np.array(unobserved_item)
unobserved_rating = np.zeros_like(unobserved_item)

In [15]:
train_user = np.concatenate([observed_user,unobserved_user])
train_item = np.concatenate([observed_item,unobserved_item])
train_rating = np.concatenate([observed_rating,unobserved_rating])

In [16]:
tf.reset_default_graph()

In [17]:
class NeuralCF:
    def __init__(self,mf_latent,mlp_latent,mlp_layers,num_user,num_item):
        self.mf_latent = mf_latent
        self.mlp_latent = mlp_latent
        self.mlp_layers = mlp_layers
        self.num_user = num_user
        self.num_item = num_item
        self._create_mf_model()

    def _create_mf_model(self):
        user_input = tf.keras.layers.Input(shape=1,name='user_input')
        item_input = tf.keras.layers.Input(shape=1,name='item_input')
        
        mf_user_latent = tf.keras.layers.Embedding(input_dim=self.num_user,
                                                   output_dim=self.mf_latent,
                                                   embeddings_initializer='normal',
                                                   input_length=1,
                                                   name='MF_user')(user_input)
        mf_item_latent = tf.keras.layers.Embedding(input_dim=self.num_item,
                                                   output_dim=self.mf_latent,
                                                   embeddings_initializer='normal',
                                                   input_length=1,
                                                   name='MF_item')(item_input)
        
        mlp_user_latent = tf.keras.layers.Embedding(input_dim=self.num_user,
                                                    output_dim=self.mlp_latent,
                                                    embeddings_initializer='normal',
                                                    input_length=1,
                                                    name='MLP_user')(user_input)
        mlp_item_latent = tf.keras.layers.Embedding(input_dim=self.num_item,
                                                    output_dim=self.mlp_latent,
                                                    embeddings_initializer='normal',
                                                    input_length=1,
                                                    name='MLP_item')(item_input)
        
        
        flatten_mf_user = tf.keras.layers.Flatten()(mf_user_latent)
        flatten_mf_item = tf.keras.layers.Flatten()(mf_item_latent)
        
        def element_wise_prod(values):
            return tf.multiply(values[0],values[1])
        
        mf_out = tf.keras.layers.Lambda(element_wise_prod,name='mf_out')([flatten_mf_user,flatten_mf_item])
        
        def concatenate(values):
            return tf.concat(values,axis=-1)
        
        flatten_mlp_user = tf.keras.layers.Flatten()(mlp_user_latent)
        flatten_mlp_item = tf.keras.layers.Flatten()(mlp_item_latent)
        
        mlp = tf.keras.layers.Lambda(concatenate,name='mlp_concat')([flatten_mlp_user,flatten_mlp_item])
        for i,l in enumerate(self.mlp_layers[:-1]):
            mlp = tf.keras.layers.Dense(l, activation='selu',
                                        kernel_initializer='glorot_normal',
                                        name = f'MLP_dense_{i}')(mlp)
            
        mlp_out = tf.keras.layers.Dense(self.mlp_layers[-1],
                                        activation='selu',
                                        kernel_initializer='glorot_normal',
                                        name='mlp_out')(mlp)
        self.mf_model = tf.keras.models.Model(inputs=[user_input,item_input],outputs=[mf_out])
        self.mlp_model = tf.keras.models.Model(inputs=[user_input,item_input],outputs=[mlp_out])
        
        mf_mlp_concat = tf.keras.layers.Lambda(concatenate,name='mf_mlp_concat')([mf_out,mlp_out])
        final_out = tf.keras.layers.Dense(1, activation='sigmoid',
                                          kernel_initializer='glorot_normal',
                                          name='prediction')(mf_mlp_concat)
        
        self.neucf_model = tf.keras.models.Model(inputs=[user_input,item_input],outputs=[final_out])
        
    def compiles(self):
        return
    def train(self):
        return
    def predict(self):
        return

In [18]:
neucf = NeuralCF(64,64,[256,128,64],num_user,num_book)

W0726 13:30:02.625466 4340221376 deprecation.py:506] From /Users/trungdoan/.conda/envs/myenv/lib/python3.7/site-packages/tensorflow/python/keras/initializers.py:143: calling RandomNormal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0726 13:30:02.705241 4340221376 deprecation.py:506] From /Users/trungdoan/.conda/envs/myenv/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1288: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0726 13:30:02.726862 4340221376 deprecation.py:323] From /Users/trungdoan/.conda/envs/myenv/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:4075: add_

In [19]:
neucf.neucf_model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['acc'])

In [None]:
neucf.neucf_model.fit(x=[train_user,train_item],y=train_rating,batch_size=1024,epochs=30,)

Epoch 1/30
Epoch 2/30
 831488/4900560 [====>.........................] - ETA: 4:07 - loss: 0.2847 - acc: 0.8763

In [None]:
observed_user = rating_matrix.indices[:,0]
observed_item = rating_matrix.indices[:,1]
observed_rating = rating_matrix.values

In [None]:
unobserved_user = 