In [1]:
from keras.models import Model, Sequential
from keras.layers import Embedding, Flatten, Input, Concatenate, Dropout, Dense, BatchNormalization, Dot
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG
from tensorflow import keras
import numpy as np

In [2]:
!git clone https://github.com/SyedMa3/ncf.git
%cd ncf

Cloning into 'ncf'...
remote: Enumerating objects: 124, done.[K
remote: Counting objects: 100% (124/124), done.[K
remote: Compressing objects: 100% (101/101), done.[K
remote: Total 124 (delta 65), reused 59 (delta 22), pack-reused 0[K
Receiving objects: 100% (124/124), 29.63 MiB | 31.05 MiB/s, done.
Resolving deltas: 100% (65/65), done.
/content/ncf


In [None]:
def get_model(num_users, num_items, latent_dim):

  user_input = Input(shape=(1,), dtype = 'int32', name = 'user_input')
  item_input = Input(shape=(1,), dtype = 'int32', name = 'item_input')

  GMF_user_embedding = Embedding(num_users, latent_dim, name='gmf_user_embedding', input_length=1)
  GMF_item_embedding = Embedding(num_items, latent_dim, name='gmf_item_embedding', input_length=1)

  user_latent = Flatten()(GMF_user_embedding(user_input))
  item_latent = Flatten()(GMF_item_embedding(item_input))

  mf_pred = Dot(axes=1)([user_latent, item_latent])

  mlp_user = Embedding(num_users+1, 10, name='user_embedding')(user_input)
  mlp_item = Embedding(num_items+1, 10, name='item_embedding')(item_input)

  mlp_user = Flatten()(mlp_user)
  mlp_item = Flatten()(mlp_item)

  concat = Concatenate()([mlp_item, mlp_user])
  concat = Dropout(0.2)(concat)

  x = Dense(32, activation='relu')(concat)
  x = BatchNormalization()(x)
  x = Dropout(0.2)(x)

  x = Dense(16, activation='relu')(x)
  x = BatchNormalization()(x)
  x = Dropout(0.2)(x)

  x = Dense(8, activation='relu')(x)

  merged = Concatenate()([mf_pred, x])

  prediction = Dense(1, activation='sigmoid', name='prediction')(merged)

  model = Model([user_input, item_input], prediction)
  # model.summary()
  return model

In [3]:
import imp
dataset = imp.new_module('dataset')
exec(open("./dataset.py").read(), dataset.__dict__)

       userId  movieId  rating   timestamp  sort_latest
0           1     2492       1   965719662          1.0
232         2    80489       1  1445715340          1.0
261         3     2424       1  1306464293          1.0
300         4     4246       1  1007574542          1.0
516         5      247       1   847435337          1.0
...       ...      ...     ...         ...          ...
97364     606     2355       1  1368460577          1.0
98479     607     4015       1   997847387          1.0
98666     608    52245       1  1189563917          1.0
99497     609      650       1   847221080          1.0
99534     610     3917       1  1495959411          1.0

[610 rows x 5 columns]


In [None]:
a = np.array(dataset.users).reshape(-1,1)
b = np.array(dataset.items).reshape(-1,1)
c = np.array(dataset.labels).reshape(-1,1)

In [None]:
m = get_model(dataset.num_users,dataset.num_movies,10)
m.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01), loss='binary_crossentropy')
m.summary()
m.fit([a,b], c, batch_size=256, epochs=10)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 user_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 item_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 item_embedding (Embedding)     (None, 1, 10)        1936100     ['item_input[0][0]']             
                                                                                                  
 user_embedding (Embedding)     (None, 1, 10)        6110        ['user_input[0][0]']             
                                                                                              

<keras.callbacks.History at 0x7fa0e62e6110>

In [None]:
m.save('model.h5')

In [4]:
loaded_model = keras.models.load_model('model.h5')

In [None]:
# from sklearn.metrics import accuracy_score
# a1 = np.array([11121111]).reshape(-1,1)
# b1 = np.array([2233]).reshape(-1,1)
# c1 = np.array([1]).reshape(-1,1)
# y_hat = loaded_model.fit([a1, b1], c1)

# y_hat = loaded_model.predict([a1, b1])

# print(y_hat)

[[0.23200744]]


In [13]:
def predict(userID):

  l = np.arange(1,dataset.num_movies)
  base = list(set(l) - set(dataset.train[dataset.train['userId'] == userID]['movieId']))

  # print(np.random.choice(base,100))

  movies = np.random.choice(base, 100)
  movies.reshape(-1,1)

  u = np.full((100,1), userID, dtype=int)
  predictions = loaded_model.predict([u,movies])

  pred_movies = {}

  for x in range(len(predictions)):
    pred_movies[x] = predictions[x]

  print(nlargest(10, pred_movies, key=pred_movies.get))
  print(pred_movies)

In [15]:
from heapq import nlargest

predict(1)

[71, 70, 80, 25, 23, 62, 68, 3, 19, 52]
{0: array([0.2450674], dtype=float32), 1: array([0.25901416], dtype=float32), 2: array([0.25667176], dtype=float32), 3: array([0.27454293], dtype=float32), 4: array([0.2619181], dtype=float32), 5: array([0.25857764], dtype=float32), 6: array([0.26396355], dtype=float32), 7: array([0.2518815], dtype=float32), 8: array([0.26505172], dtype=float32), 9: array([0.2539822], dtype=float32), 10: array([0.25713903], dtype=float32), 11: array([0.2607812], dtype=float32), 12: array([0.264434], dtype=float32), 13: array([0.26024276], dtype=float32), 14: array([0.25316495], dtype=float32), 15: array([0.25855878], dtype=float32), 16: array([0.26441807], dtype=float32), 17: array([0.00048548], dtype=float32), 18: array([0.26345587], dtype=float32), 19: array([0.27313435], dtype=float32), 20: array([0.26434103], dtype=float32), 21: array([0.25372455], dtype=float32), 22: array([0.20884185], dtype=float32), 23: array([0.27604607], dtype=float32), 24: array([0.259