# Neural Collaborative Filtering Demo Notebook
This demo notebook is intended to show our code/model definitions and as as a sanity check. It uses a small dataset and has not been tuned for performance.

## Training Section
## Select model type here:

In [44]:
# Options are: "GMF", "MLP", "NeuMF", "Outer", "ResNet"
model_type = "GMF"

## Imports

In [45]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

import heapq
import math

## Data Set-up

In [46]:
np.random.seed(0)
ratings_dir = './Data/ml-100k/u.data'

# Read in ratings data
ratings = pd.read_csv(ratings_dir, sep='\t')
ratings = ratings.sort_values(by=['userId', 'movieId'])
print(ratings)

num_users = len(ratings['userId'].unique())
num_items = len(ratings['movieId'].unique())

print('Num Users:', num_users)
print('Num Movies:', num_items)

       userId  movieId  rating  timestamp
32236       1        1       5  874965758
23171       1        2       3  876893171
83307       1        3       4  878542960
62631       1        4       3  876893119
47638       1        5       3  889751712
...       ...      ...     ...        ...
68857     943     1067       2  875501756
74200     943     1074       4  888640250
78704     943     1188       3  888640250
86600     943     1228       3  888640275
92115     943     1330       3  888692465

[100000 rows x 4 columns]
Num Users: 943
Num Movies: 1682


In [47]:
# Find the latest rating each user has made
ratings['latest'] = ratings.groupby(['userId'])['timestamp'].rank(method='first', ascending=False)

# Separate the latest rating into the test dataset
# Keep all other ratings in the train dataset
train_ratings = ratings[ratings['latest'] != 1]
test_ratings = ratings[ratings['latest'] == 1]

# Remove timestamp field
train_ratings = train_ratings[['userId', 'movieId', 'rating']]
test_ratings = test_ratings[['userId', 'movieId', 'rating']]

print('Columns:', train_ratings.columns.values)
print('Interactions in Training Set:', train_ratings.shape[0])
print('Interactions in Testing Set:', test_ratings.shape[0])

Columns: ['userId' 'movieId' 'rating']
Interactions in Training Set: 99057
Interactions in Testing Set: 943


### Convert to Implicit Feedback

In [48]:
# Convert rating to 1 for everything to mark that the user has watched this item
train_ratings.loc[:, 'rating'] = 1
print(train_ratings)

# Convert Test Rating dataframe into list
test_ratings_list = []
for user, item in zip(test_ratings['userId'], test_ratings['movieId']):
  test_ratings_list.append([user, item])

test_ratings = test_ratings_list


       userId  movieId  rating
32236       1        1       1
23171       1        2       1
83307       1        3       1
62631       1        4       1
47638       1        5       1
...       ...      ...     ...
68857     943     1067       1
74200     943     1074       1
78704     943     1188       1
86600     943     1228       1
92115     943     1330       1

[99057 rows x 3 columns]


In [56]:
# Define model to generate negative samples for each training epoch
def get_train_instances(ratings, num_negatives):
    # Add negative samples with rating = 0
    all_movies = ratings['movieId'].unique()

    users, items, labels = [], [], []
    user_item_set = set(zip(train_ratings['userId'], train_ratings['userId']))
    num_negatives = 4

    for (u, i) in user_item_set:
        users.append(u)
        items.append(i)
        labels.append(1)
        for _ in range(num_negatives):
            negative_item = np.random.choice(all_movies)
            while (u, negative_item) in user_item_set:
                negative_item = np.random.choice(all_movies)
            users.append(u)
            items.append(negative_item)
            labels.append(0)
    
    return users, items, labels

In [51]:
# Generate 100 Negative Test Examples
# Add negative samples
all_movies = ratings['movieId'].unique()

users, items, labels = [], [], []
user_item_set = set(zip(train_ratings['userId'], train_ratings['movieId']))
user_set = set(train_ratings['userId'])
num_test_negatives = 99
test_negatives = []

for u in user_set:
  negatives = []

  for _ in range(num_test_negatives):
    negative_item = np.random.choice(all_movies)
    while (u, negative_item) in user_item_set:
      negative_item = np.random.choice(all_movies)
    
    negatives.append(negative_item)
  
  test_negatives.append(negatives)

print('Number of users:', len(test_negatives))
print('Number of negative points:', len(test_negatives[138]))

Number of users: 943
Number of negative points: 99


## Define Models

Each model starts with the same inputs and embeddings. The GMF passes the embedding vectors straight into element-wise multiplication. The MLF concatenates the vectors and passes them through an MLP. The NeuMF ensembles the MLP and GMF models. The Outer Product takes the outer product of the two vectors and passes the resulting 2D matrix through a CNN. Finally, the ResNet also takes the outer product of the two vectors and passes the resulting 2D matrix through a ResNet.

In [57]:
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Reshape, Multiply, Flatten, Layer, Lambda, Concatenate, Conv2D, MaxPool2D
from keras import initializers, regularizers
from keras.regularizers import l2
import sys

def get_GMFmodel(num_users, num_items, latent_dim):
  user_input = Input(shape = (1,), dtype = 'int32', name = 'user')
  item_input = Input(shape = (1,), dtype = 'int32', name = 'item')

  user_embedding = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embed',
                             embeddings_initializer = initializers.RandomNormal(stddev = 0.01), 
                             embeddings_regularizer = regularizers.l2(0), input_length = 1)
  item_embedding = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embed',
                             embeddings_initializer = initializers.RandomNormal(stddev = 0.01), 
                             embeddings_regularizer = regularizers.l2(0), input_length = 1)

  user_latent = Flatten()(user_embedding(user_input))
  item_latent = Flatten()(item_embedding(item_input))

  prediction_vec = Multiply()([user_latent, item_latent])

  prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'prediction')(prediction_vec)

  return Model(inputs=[user_input, item_input], outputs=prediction)


def get_MLPmodel(num_users, num_items, latent_dim):
  user_input = Input(shape = (1,), dtype = 'int32', name = 'user')
  item_input = Input(shape = (1,), dtype = 'int32', name = 'item')
  user_embedding = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embed', embeddings_initializer = initializers.RandomNormal(stddev = 0.01), embeddings_regularizer = regularizers.l2(0), input_length = 1)
  item_embedding = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embed',embeddings_initializer = initializers.RandomNormal(stddev = 0.01), embeddings_regularizer = regularizers.l2(0), input_length = 1)
  user_latent = Flatten()(user_embedding(user_input))
  item_latent = Flatten()(item_embedding(item_input))
  inputs = Concatenate()([user_latent, item_latent])
  layer = Dense(64,activation='relu', name='Layer1', kernel_initializer='glorot_uniform', kernel_regularizer=l2())(inputs)
  layer = Dense(32, activation='relu', name='Layer3')(layer)
  layer = Dense(16, activation='relu')(layer)
  output = Dense(1, activation='sigmoid', name='Layer4')(layer)
  return Model(inputs=[user_input, item_input], outputs=output)


def get_NeuMFmodel(num_users, num_items, latent_dim):
  user_input = Input(shape = (1,), dtype = 'int32', name = 'user')
  movie_input = Input(shape = (1,), dtype = 'int32', name = 'item')
  user_embedding = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embed', embeddings_initializer = initializers.RandomNormal(stddev = 0.01), embeddings_regularizer = regularizers.l2(0), input_length = 1)
  movie_embedding = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embed',embeddings_initializer = initializers.RandomNormal(stddev = 0.01), embeddings_regularizer = regularizers.l2(0), input_length = 1)
  user_latent = Flatten()(user_embedding(user_input))
  item_latent = Flatten()(movie_embedding(movie_input))
  prediction_vec = Multiply()([user_latent, item_latent])
  prediction_GMF = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'prediction')(prediction_vec)
  inputs = Concatenate()([user_latent, item_latent])
  layer = Dense(64,activation='relu', name='Layer1', kernel_initializer='glorot_uniform', kernel_regularizer=l2())(inputs)
  layer = Dense(32, activation='relu', name='Layer3', kernel_regularizer=l2())(layer)
  layer = Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0))(layer)
  prediction_MLP = Dense(1, activation='sigmoid', name='Layer4')(layer)
  predictions = Combine()([prediction_GMF, prediction_MLP])
  return Model(inputs=[user_input, movie_input], outputs=predictions)

class Combine(Layer):
  def __init__(self):
        super(Combine, self).__init__()
        random_alpha = tf.random.uniform(shape=[1])
        self.alpha = tf.Variable(initial_value=random_alpha, trainable=True)
  
  def call(self, inputs):
    return (1 - self.alpha)*inputs[0] + self.alpha*inputs[1]


def get_OuterProductmodel(num_users, num_items, latent_dim):
    user_input = Input(shape = (1,), dtype = 'int32', name = 'user')
    item_input = Input(shape = (1,), dtype = 'int32', name = 'item')

    user_embedding = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embed',
                             embeddings_initializer = initializers.RandomNormal(stddev = 0.01), 
                             embeddings_regularizer = regularizers.l2(0), input_length = 1)
    item_embedding = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embed',
                             embeddings_initializer = initializers.RandomNormal(stddev = 0.01), 
                             embeddings_regularizer = regularizers.l2(0), input_length = 1)

    user_latent = Flatten()(user_embedding(user_input))
    item_latent = Flatten()(item_embedding(item_input))

    latent_map = tf.linalg.matmul(tf.expand_dims(user_latent, -1), tf.expand_dims(item_latent, 1))

    x = tf.expand_dims(latent_map, -1)

    x = Conv2D(64, 3, activation='relu', padding='same')(x)
    x = MaxPool2D(pool_size=2)(x)
    x = Conv2D(64, 3, activation='relu', padding='same')(x)
    x = MaxPool2D(pool_size=2)(x)
    x = Conv2D(64, 3, activation='relu', padding='same')(x)
    x = MaxPool2D(pool_size=2)(x)
    x = Conv2D(64, 3, activation='relu', padding='same')(x)
    x = Flatten()(x)
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'prediction')(x)

    return Model(inputs=[user_input, item_input], outputs=prediction)


# https://towardsdatascience.com/building-a-resnet-in-keras-e8f1322a49ba
def relu_bn(inputs):
    relu = tf.keras.layers.ReLU()(inputs)
    bn = tf.keras.layers.BatchNormalization()(relu)
    return bn

def residual_block(x, downsample = False, filters = 16, kernel_size = 3):
    y = tf.keras.layers.Conv2D(kernel_size=kernel_size,
               strides= (1 if not downsample else 2),
               filters=filters,
               padding="same")(x)
    y = relu_bn(y)
    y = tf.keras.layers.Conv2D(kernel_size=kernel_size,
               strides=1,
               filters=filters,
               padding="same")(y)

    if downsample:
        x = tf.keras.layers.Conv2D(kernel_size=1,
                   strides=2,
                   filters=filters,
                   padding="same")(x)
    out = tf.keras.layers.Add()([x, y])
    out = relu_bn(out)
    return out

def get_ResidualModel(num_users, num_items, embedding_dim, num_filters):
    user_input = Input(shape = (1,), dtype = 'int32', name = 'user')
    item_input = Input(shape = (1,), dtype = 'int32', name = 'item')

    user_embedding = Embedding(input_dim = num_users, output_dim = embedding_dim, name = 'user_embed',
                             embeddings_initializer = initializers.RandomNormal(stddev = 0.01), 
                             embeddings_regularizer = regularizers.l2(0), input_length = 1)
    item_embedding = Embedding(input_dim = num_items, output_dim = embedding_dim, name = 'item_embed',
                             embeddings_initializer = initializers.RandomNormal(stddev = 0.01), 
                             embeddings_regularizer = regularizers.l2(0), input_length = 1)

    user_latent = Flatten()(user_embedding(user_input))
    item_latent = Flatten()(item_embedding(item_input))

    latent_map = tf.linalg.matmul(tf.expand_dims(user_latent, -1), tf.expand_dims(item_latent, 1))

    x = tf.expand_dims(latent_map, -1)
    
    layers = [2, 2, 2]
    
    for i, num_layers in enumerate(layers):
        for _ in range(num_layers):
            x = residual_block(x, downsample = False, filters = num_filters, kernel_size = 3)
          
        if i == len(layers) - 1:
            x = residual_block(x, downsample = False, filters = num_filters, kernel_size = 3)
        else:
            x = residual_block(x, downsample = True, filters = num_filters, kernel_size = 3)
    
    x = Flatten()(x)
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'prediction')(x)
    
    return Model(inputs=[user_input, item_input], outputs=prediction)

In [58]:
from keras.optimizers import Adam

topK = 10

if model_type == "GMF":
  model = get_GMFmodel(num_users + 1, num_items + 1, 8)
  model.compile(optimizer=Adam(0.01), loss='binary_crossentropy')
elif model_type == "MLP":
  model = get_MLPmodel(num_users + 1, num_items + 1, 8)
  model.compile(optimizer=Adam(0.01), loss='binary_crossentropy')
elif model_type == "NeuMF":
  model = get_NEUMFmodel(num_users + 1, num_items + 1, 8)
  model.compile(optimizer=Adam(0.01), loss='binary_crossentropy')
elif model_type == "Outer":
  model = get_OuterProductmodel(num_users + 1, num_items + 1, 8)
  model.compile(optimizer=Adam(0.01), loss='binary_crossentropy')
elif model_type == "ResNet":
  model = get_ResidualModel(num_users + 1, num_items + 1, 8, 32)
  model.compile(optimizer=Adam(0.001), loss='binary_crossentropy')

model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
item (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embed (Embedding)          (None, 1, 8)         7552        user[0][0]                       
__________________________________________________________________________________________________
item_embed (Embedding)          (None, 1, 8)         13464       item[0][0]                       
____________________________________________________________________________________________

### Define Evaluation Metrics

In [59]:
def evaluateNDCG(ranked_list, target_item):
    for i in range(len(ranked_list)):
        if ranked_list[i] == target_item:
            return math.log(2) / math.log(i + 2)
  
    return 0

def hitRate(ranked_list, target_item):
    for rank in ranked_list:
        if target_item == rank:
            return 1
    return 0

# This method calculates all the evaluation metrics. Individual methods are called from here.
def evaluate(model, testPosRatings, testNegRatings, N):
    hits = []
    ndcgs = []
    for i in range(len(testPosRatings)):
        hit, ncdg = evaluate_one(model, testPosRatings[i], testNegRatings[i], N)
        hits.append(hit)
        ndcgs.append(ncdg)
        
    return np.array(hits).mean(), np.array(ndcgs).mean()

def evaluate_one(model, posRating, negRatings, N):
    user = posRating[0]
    movie = posRating[1]
    negRatings.append(movie)

    user_input = np.full(len(negRatings), user)

    predictions = model.predict([user_input, np.array(negRatings)], batch_size = 100)

  # associate item with predictions
    items = {}
    for i in range(len(predictions)):
        items[negRatings[i]] = predictions[i]
    negRatings.pop()

    rankedList = heapq.nlargest(N, items, items.get)
    ndcg = evaluateNDCG(rankedList, movie)
    hit = hitRate(rankedList, movie)

    return hit, ndcg

In [55]:
NUM_EPOCHS = 20
best_hr = 0
best_ndcg = 0
best_epoch = -1
model_path = "ResNet_Model.h5"

# Get examples for untrained model metrics.
user_input, item_input, labels = get_train_instances(ratings, num_negatives = 4)

# Get metrics for untrained model
model.evaluate([np.array(user_input), np.array(item_input)],
                      np.array(labels),
                      batch_size = 16)

hit_rate, ndcg = evaluate(model, test_ratings, test_negatives, N = 10)
print('Initial Model', 'Hit Rate:', hit_rate, 'NDCG:', ndcg)

for epoch in range(1, NUM_EPOCHS + 1):
    # Get training examples
    user_input, item_input, labels = get_train_instances(ratings, num_negatives = 4)

    # Train 1 epoch
    hist = model.fit([np.array(user_input), np.array(item_input)],
                      np.array(labels),
                      batch_size = 16, epochs = 1)
    
    # Evaluate metrics
    hit_rate, ndcg = evaluate(model, test_ratings, test_negatives, N = 10)
    print('Epoch', epoch, 'Hit Rate:', hit_rate, 'NDCG:', ndcg)
    model.save(model_path)

    # Save best model
    if hit_rate > best_hr:
        best_hr, best_ncdg, best_iter = hit_rate, ndcg, epoch
        model.save(model_path, overwrite=True)

print("Best Iteration %d:  HR = %.4f, NDCG = %.4f. " %(best_iter, best_hr, best_ncdg))
print("The best Outer Product model is saved to %s" %(model_path))

Initial Model Hit Rate: 0.11134676564156946 NDCG: 0.05026819582143126
Epoch 1 Hit Rate: 0.10922587486744433 NDCG: 0.0491398473648851
Epoch 2 Hit Rate: 0.11134676564156946 NDCG: 0.04935754014999356
Epoch 3 Hit Rate: 0.12513255567338283 NDCG: 0.057905456709228004
Epoch 4 Hit Rate: 0.1420996818663839 NDCG: 0.07000469104203644
Epoch 5 Hit Rate: 0.1474019088016967 NDCG: 0.07264460091797906
Epoch 6 Hit Rate: 0.15058324496288442 NDCG: 0.07191234692780876
Epoch 7 Hit Rate: 0.16224814422057265 NDCG: 0.07508707331118025
Epoch 8 Hit Rate: 0.16224814422057265 NDCG: 0.0761648033801764
Epoch 9 Hit Rate: 0.17709437963944857 NDCG: 0.08401593698231274
Epoch 10 Hit Rate: 0.1728525980911983 NDCG: 0.0795644352244002
Epoch 11 Hit Rate: 0.176033934252386 NDCG: 0.08009140014702135
Epoch 12 Hit Rate: 0.17179215270413573 NDCG: 0.07667335561285937
Epoch 13 Hit Rate: 0.16436903499469777 NDCG: 0.07363337963271752
Epoch 14 Hit Rate: 0.1633085896076352 NDCG: 0.07390591683653423
Epoch 15 Hit Rate: 0.1675503711558854

## Prediction

In [60]:
# Set desired user and item values from the dataset above
user = [1]
item = [1]

# Make prediction
result = model.predict([np.array(user), np.array(item)])
print('Predicted probability for user', user, 'interacting with item', item, result[0][0])

Predicted probability for user [1] interacting with item [1] 0.4999733
