In [1]:
from keras.models import Model, Sequential
from keras.layers import Embedding, Flatten, Input, Concatenate, Dropout, Dense, BatchNormalization, Dot
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG
from tensorflow import keras
import numpy as np

In [2]:
!git clone https://github.com/SyedMa3/ncf.git
%cd ncf

Cloning into 'ncf'...
remote: Enumerating objects: 158, done.[K
remote: Counting objects: 100% (158/158), done.[K
remote: Compressing objects: 100% (128/128), done.[K
remote: Total 158 (delta 83), reused 78 (delta 27), pack-reused 0[K
Receiving objects: 100% (158/158), 35.32 MiB | 29.77 MiB/s, done.
Resolving deltas: 100% (83/83), done.
/content/ncf


In [16]:
def get_model(num_users, num_items, latent_dim):

  user_input = Input(shape=(1,), dtype = 'int32', name = 'user_input')
  item_input = Input(shape=(1,), dtype = 'int32', name = 'item_input')

  GMF_user_embedding = Embedding(num_users, latent_dim, name='gmf_user_embedding', input_length=1)
  GMF_item_embedding = Embedding(num_items, latent_dim, name='gmf_item_embedding', input_length=1)

  user_latent = Flatten()(GMF_user_embedding(user_input))
  item_latent = Flatten()(GMF_item_embedding(item_input))

  mf_pred = Dot(axes=1)([user_latent, item_latent])

  mlp_user = Embedding(num_users+1, 10, name='user_embedding')(user_input)
  mlp_item = Embedding(num_items+1, 10, name='item_embedding')(item_input)

  mlp_user = Flatten()(mlp_user)
  mlp_item = Flatten()(mlp_item)

  concat = Concatenate()([mlp_item, mlp_user])
  concat = Dropout(0.2)(concat)

  x = Dense(64, activation='relu')(concat)
  x = BatchNormalization()(x)
  x = Dropout(0.2)(x)
  

  x = Dense(32, activation='relu')(x)
  x = BatchNormalization()(x)
  x = Dropout(0.2)(x)

  x = Dense(16, activation='relu')(x)
  x = BatchNormalization()(x)
  x = Dropout(0.2)(x)

  x = Dense(8, activation='relu')(x)

  merged = Concatenate()([mf_pred, x])

  prediction = Dense(1, activation='sigmoid', name='prediction')(merged)

  model = Model([user_input, item_input], prediction)
  # model.summary()
  return model

In [3]:
import imp
dataset = imp.new_module('dataset')
exec(open("./dataset.py").read(), dataset.__dict__)

In [5]:
a = np.array(dataset.users).reshape(-1,1)
b = np.array(dataset.items).reshape(-1,1)
c = np.array(dataset.labels).reshape(-1,1)

In [17]:
m = get_model(dataset.num_users,dataset.num_movies,10)
m.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01), loss='binary_crossentropy')
m.summary()
m.fit([a,b], c, batch_size=512, epochs=10)

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 user_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 item_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 item_embedding (Embedding)     (None, 1, 10)        39530       ['item_input[0][0]']             
                                                                                                  
 user_embedding (Embedding)     (None, 1, 10)        60410       ['user_input[0][0]']             
                                                                                            

<keras.callbacks.History at 0x7f08a8041550>

In [18]:
from sklearn.metrics import mean_squared_error
a1 = np.array(dataset.test['userId']).reshape(-1,1)
b1 = np.array(dataset.test['movieId']).reshape(-1,1)
c1 = np.array(dataset.test['rating']).reshape(-1,1)

y_hat = m.predict([a1, b1])

print(mean_squared_error(c1, y_hat))

0.3669889206399357


In [19]:
m.save('model.h5')

In [4]:
loaded_model = keras.models.load_model('model.h5')

In [None]:
# from sklearn.metrics import accuracy_score
# a1 = np.array([11121111]).reshape(-1,1)
# b1 = np.array([2233]).reshape(-1,1)
# c1 = np.array([1]).reshape(-1,1)
# y_hat = loaded_model.fit([a1, b1], c1)

# y_hat = loaded_model.predict([a1, b1])

# print(y_hat)

In [6]:
def predict(userID):

  l = np.arange(1,dataset.num_movies)
  base = list(set(l) - set(dataset.train[dataset.train['userId'] == userID]['movieId']))

  # print(np.random.choice(base,100))

  movies = np.random.choice(base, 100)
  movies.reshape(-1,1)

  u = np.full((100,1), userID, dtype=int)
  predictions = loaded_model.predict([u,movies])

  pred_movies = {}

  for x in range(len(predictions)):
    pred_movies[x] = predictions[x]

  return(nlargest(10, pred_movies, key=pred_movies.get))

In [16]:
from heapq import nlargest
import pandas as pd

vec = predict(69)

movies = pd.read_csv('data/movies.dat', sep = '::', names = ['movieId', 'name', 'genre'] ,engine='python', encoding='latin-1')

vec = movies.iloc[vec]['name']

print(vec)

84                Angels and Insects (1995)
86                 Dunston Checks In (1996)
48             When Night Is Falling (1995)
78                        Juror, The (1996)
99                     Bottle Rocket (1996)
4        Father of the Bride Part II (1995)
17                        Four Rooms (1995)
66                          Two Bits (1995)
18    Ace Ventura: When Nature Calls (1995)
98                         City Hall (1996)
Name: name, dtype: object
