In [110]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout, Multiply
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2

In [111]:
# Load and preprocess data
train_data = pd.read_csv('neumf_train_data.csv')
test_data = pd.read_csv('neumf_test_data.csv')

# num_users = len(train_data.customer_id.unique()) + len(test_data.customer_id.unique())
# num_items = len(train_data.product_id.unique()) + len(test_data.product_id.unique())
concat_df = pd.concat([train_data, test_data], axis=0)
num_users, num_items = len(concat_df.customer_id.unique()), len(concat_df.product_id.unique())

In [None]:
print(num_users, num_items)

7968 5946


In [116]:
# full NCF model
def get_model(num_users, num_items, latent_dim=8, dense_layers=[64, 32, 16, 8],
              reg_layers=[0, 0, 0, 0], reg_mf=0):

    # input layer
    input_user = Input(shape=(1,), dtype='int32', name='user_input')
    input_item = Input(shape=(1,), dtype='int32', name='item_input')
    
    # embedding layer
    mf_user_embedding = Embedding(input_dim=num_users, output_dim=latent_dim,
                        name='mf_user_embedding',
                        embeddings_initializer='RandomNormal',
                        embeddings_regularizer=l2(reg_mf), input_length=1)
    mf_item_embedding = Embedding(input_dim=num_items, output_dim=latent_dim,
                        name='mf_item_embedding',
                        embeddings_initializer='RandomNormal',
                        embeddings_regularizer=l2(reg_mf), input_length=1)
    mlp_user_embedding = Embedding(input_dim=num_users, output_dim=int(dense_layers[0]/2),
                         name='mlp_user_embedding',
                         embeddings_initializer='RandomNormal',
                         embeddings_regularizer=l2(reg_layers[0]), 
                         input_length=1)
    mlp_item_embedding = Embedding(input_dim=num_items, output_dim=int(dense_layers[0]/2),
                         name='mlp_item_embedding',
                         embeddings_initializer='RandomNormal',
                         embeddings_regularizer=l2(reg_layers[0]), 
                         input_length=1)

    # MF latent vector
    mf_user_latent = Flatten()(mf_user_embedding(input_user))
    mf_item_latent = Flatten()(mf_item_embedding(input_item))
    mf_cat_latent = Multiply()([mf_user_latent, mf_item_latent])


    # MLP latent vector
    mlp_user_latent = Flatten()(mlp_user_embedding(input_user))
    mlp_item_latent = Flatten()(mlp_item_embedding(input_item))
    mlp_cat_latent = Concatenate()([mlp_user_latent, mlp_item_latent])
    
    mlp_vector = mlp_cat_latent
    
    # build dense layer for model
    for i in range(1,len(dense_layers)):
        layer = Dense(dense_layers[i],
                      activity_regularizer=l2(reg_layers[i]),
                      activation='relu',
                      name='layer%d' % i)
        mlp_vector = layer(mlp_vector)

    predict_layer = Concatenate()([mf_cat_latent, mlp_vector])

    result = Dense(1,activation='linear',name='result')

    model = Model(inputs=[input_user,input_item], outputs=[result(predict_layer)])
    return model

In [117]:
epochs = 5
verbose = 1
batch_size = 64
latent_dim = 8
dense_layers = [64, 32, 16, 8]
reg_layers = [0, 0, 0, 0]
reg_mf = 0
learning_rate = 0.001
learner = 'adam'

# get model
model = get_model(num_users, num_items, latent_dim, dense_layers, reg_layers, reg_mf)
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error', metrics=['accuracy'])
print(model.summary())

Model: "model_21"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 user_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 item_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 mlp_user_embedding (Embedding)  (None, 1, 32)       254976      ['user_input[0][0]']             
                                                                                                  
 mlp_item_embedding (Embedding)  (None, 1, 32)       190272      ['item_input[0][0]']             
                                                                                           

In [118]:
# Train NeuMF model
model.fit(x=[np.array(train_data.customer_id), np.array(train_data.product_id)],
                y=np.array(train_data.star_rating),
                batch_size=64,
                epochs=10,
                verbose=1,
                shuffle=True)
# neumf_model.save_weights('neumf_model_weights.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa2802cdb80>

In [119]:
predictions = model.predict([test_data.customer_id.head(10), test_data.product_id.head(10)])
[print(predictions[i], test_data.star_rating.iloc[i]) for i in range(0,10)]

[0.10171491] 5.0
[0.3631218] 5.0
[0.15589114] 1.0
[3.3329406] 3.0
[2.4835858] 4.0
[2.8021092] 2.0
[0.77561784] 1.0
[0.95302355] 5.0
[0.31207603] 1.0
[0.34244716] 5.0


[None, None, None, None, None, None, None, None, None, None]

In [120]:
def recommend_items(user_id, items=10):
    item_ids = np.arange(items)
    user_ids = np.repeat(user_id, items)
    rating_predictions = model.predict([user_ids, item_ids])
    item_ratings = list(zip(item_ids, rating_predictions.flatten()))
    # item_ratings = [(item_id, rating, verified_purchase) for item_id, rating, verified_purchase in item_ratings
    #                 if verified_purchase >= 0.5 and rating > 3.0]
    item_ratings = [(item_id, rating) for item_id, rating in item_ratings]
    item_ratings.sort(key=lambda x: x[1], reverse=True)
    recommended_items = [(item_id, rating) for item_id, rating in item_ratings[:num_items]]
    return recommended_items

In [121]:
res = recommend_items(user_id=train_data.customer_id[0])
print(res)

[(3, 1.8718044), (0, 1.384535), (7, 1.2942575), (2, 0.49711728), (9, 0.42683178), (6, 0.4166601), (4, 0.40114656), (8, 0.31048316), (5, 0.2866903), (1, -0.034992784)]
