In [1]:
#!rm -rf ./saved_model
#!python -m torch.distributed.launch --nproc_per_node=1 --use_env ../ncf.py --epochs 50 --data /data/cache/ml-20m --checkpoint_dir ./saved_model

DLL 2020-08-07 12:45:29.297777 - PARAMETER data : /data/cache/ml-20m  epochs : 50  batch_size : 1048576  valid_batch_size : 1048576  factors : 64  layers : [256, 256, 128, 64]  negative_samples : 4  learning_rate : 0.0045  topk : 10  seed : None  threshold : 1.0  beta1 : 0.25  beta2 : 0.5  eps : 1e-08  dropout : 0.5  checkpoint_dir : ./saved_model  load_checkpoint_path : None  mode : train  grads_accumulated : 1  amp : False  log_path : log.json  world_size : 1  distributed : False  local_rank : 0 
Saving results to ./saved_model
NeuMF(
  (mf_user_embed): Embedding(138493, 64)
  (mf_item_embed): Embedding(26744, 64)
  (mlp_user_embed): Embedding(138493, 128)
  (mlp_item_embed): Embedding(26744, 128)
  (mlp): ModuleList(
    (0): Linear(in_features=256, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=128, bias=True)
    (2): Linear(in_features=128, out_features=64, bias=True)
  )
  (final): Linear(in_features=128, out_features=1, bias=True)
)
31832577 paramete

DLL 2020-08-07 12:50:23.730312 - (27,) train_throughput : 10176735.060483348  hr@10 : 0.9577234950502913  train_epoch_time : 9.758419513702393  validation_epoch_time : 0.47522592544555664  eval_throughput : 29433985.50254911 
DLL 2020-08-07 12:50:33.975841 - (28,) train_throughput : 10163073.06497709  hr@10 : 0.956315481648892  train_epoch_time : 9.77153754234314  validation_epoch_time : 0.4738273620605469  eval_throughput : 29520863.757573806 
DLL 2020-08-07 12:50:44.223208 - (29,) train_throughput : 10164592.367117403  hr@10 : 0.9579978771490256  train_epoch_time : 9.770076990127563  validation_epoch_time : 0.47710537910461426  eval_throughput : 29318036.669909175 
DLL 2020-08-07 12:50:54.479742 - (30,) train_throughput : 10153282.908698201  hr@10 : 0.9561999523441618  train_epoch_time : 9.780959606170654  validation_epoch_time : 0.47541284561157227  eval_throughput : 29422412.81260726 
DLL 2020-08-07 12:51:04.729210 - (31,) train_throughput : 10160314.879842326  hr@10 : 0.9573263630

In [1]:
!ls -l saved_model

total 124356
-rw-r--r-- 1 26622 dip 127332376 Aug  8 02:30 model.pth


In [2]:
import sys
sys.path.insert(0, "/mnt/dldata/vinhn/DeepLearningExamples/PyTorch/Recommendation/NCF")

import torch.jit
import time
from argparse import ArgumentParser
import numpy as np
import torch

from neumf import NeuMF

In [3]:
def parse_args():
    parser = ArgumentParser(description="Benchmark inference performance of the NCF model")
    parser.add_argument('-f', '--file', help='Path for input file. First line should contain number of lines to search in')
    parser.add_argument('--load_checkpoint_path', default=None, type=str,
                        help='Path to the checkpoint file to be loaded before training/evaluation')
    parser.add_argument('--n_users', default=138493, type=int,
                        help='Number of users. Defaults to the number of users in the ml-20m dataset after preprocessing')
    parser.add_argument('--n_items', default=26744, type=int,
                        help='Number of items. Defaults to the number of users in the ml-20m dataset after preprocessing')
    parser.add_argument('-fac', '--factors', type=int, default=64,
                        help='Number of predictive factors')
    parser.add_argument('--dropout', type=float, default=0.5,
                        help='Dropout probability, if equal to 0 will not use dropout at all')
    parser.add_argument('--layers', nargs='+', type=int,
                        default=[256, 256, 128, 64],
                        help='Sizes of hidden layers for MLP')
    parser.add_argument('--batch_sizes', default='1,4,16,64,256,1024,4096,16384,65536,262144,1048576', type=str,
                        help='A list of comma-separated batch size values to benchmark')
    parser.add_argument('--num_batches', default=200, type=int,
                        help='Number of batches for which to measure latency and throughput')
    parser.add_argument('--fp16', action='store_true', help='Cast the model to FP16 precision', default=False)
    parser.add_argument('--log_path', default='log.json', type=str,
                        help='Path for the JSON training log')

    return parser.parse_args()

In [4]:
args = parse_args()

In [10]:
from scipy.spatial.distance import cdist
def find_similar_movies(nn_movie_id, item_embedding, item_embedding_norm=None, k=10):
    sim = 1-cdist(item_embedding, item_embedding[nn_movie_id].reshape(1, -1), metric="cosine")

    return sim.squeeze().argsort()[-k:][::-1]

In [13]:
model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors,
              mlp_layer_sizes=args.layers, dropout=args.dropout)

model = model.cuda()

state_dict = torch.load("./saved_model/model.pth")
model.load_state_dict(state_dict)

#find nearest neighbor
item_embedding = model.mf_item_embed.weight
item_embedding = item_embedding.detach().cpu().numpy()

import pickle

with open('./mappings.pickle', 'rb') as handle:
    movies_mapping = pickle.load(handle)["items"]

nn_to_movies = movies_mapping
movies_to_nn = {}
for i in range(len(movies_mapping)):
    movies_to_nn[movies_mapping[i]] = i

import pandas as pd
movies = pd.read_csv("/mnt/dldata/vinhn/DeepLearningExamples/PyTorch/Recommendation/NCF/data/ml-20m/movies.csv", index_col="movieId")

movie_ID = 2
print("Query: ", movies.loc[movie_ID]["title"], movies.loc[movie_ID]["genres"])

print("Similar movies: ")
similar_movies = find_similar_movies(movies_to_nn[movie_ID], item_embedding)

for i in similar_movies:
    print(nn_to_movies[i], movies.loc[nn_to_movies[i]]["title"], movies.loc[nn_to_movies[i]]["genres"])

Query:  Jumanji (1995) Adventure|Children|Fantasy
Similar movies: 
2 Jumanji (1995) Adventure|Children|Fantasy
367 Mask, The (1994) Action|Comedy|Crime|Fantasy
2054 Honey, I Shrunk the Kids (1989) Adventure|Children|Comedy|Fantasy|Sci-Fi
586 Home Alone (1990) Children|Comedy
1917 Armageddon (1998) Action|Romance|Sci-Fi|Thriller
2012 Back to the Future Part III (1990) Adventure|Comedy|Sci-Fi|Western
317 Santa Clause, The (1994) Comedy|Drama|Fantasy
208 Waterworld (1995) Action|Adventure|Sci-Fi
788 Nutty Professor, The (1996) Comedy|Fantasy|Romance|Sci-Fi
2617 Mummy, The (1999) Action|Adventure|Comedy|Fantasy|Horror|Thriller


In [23]:
movie_ID = 318  
print("Query: ", movies.loc[movie_ID]["title"], movies.loc[movie_ID]["genres"])

print("Similar movies: ")
similar_movies = find_similar_movies(movies_to_nn[movie_ID], item_embedding)

for i in similar_movies:
    print(nn_to_movies[i], movies.loc[nn_to_movies[i]]["title"], movies.loc[nn_to_movies[i]]["genres"])

Query:  Shawshank Redemption, The (1994) Crime|Drama
Similar movies: 
318 Shawshank Redemption, The (1994) Crime|Drama
527 Schindler's List (1993) Drama|War
593 Silence of the Lambs, The (1991) Crime|Horror|Thriller
296 Pulp Fiction (1994) Comedy|Crime|Drama|Thriller
50 Usual Suspects, The (1995) Crime|Mystery|Thriller
356 Forrest Gump (1994) Comedy|Drama|Romance|War
110 Braveheart (1995) Action|Drama|War
47 Seven (a.k.a. Se7en) (1995) Mystery|Thriller
480 Jurassic Park (1993) Action|Adventure|Sci-Fi|Thriller
457 Fugitive, The (1993) Thriller


In [18]:
!cat /mnt/dldata/vinhn/DeepLearningExamples/PyTorch/Recommendation/NCF/data/ml-20m/movies.csv| grep Star

124,"Star Maker, The (Uomo delle stelle, L') (1995)",Drama
131,Frankie Starlight (1995),Drama|Romance
197,"Stars Fell on Henrietta, The (1995)",Drama
260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
316,Stargate (1994),Action|Adventure|Sci-Fi
329,Star Trek: Generations (1994),Adventure|Drama|Sci-Fi
800,Lone Star (1996),Drama|Mystery|Western
1038,Unhook the Stars (1996),Drama
1118,North Star (a.k.a. Tashunga) (1995),Action|Adventure|Crime|Drama|Western
1196,Star Wars: Episode V - The Empire Strikes Back (1980),Action|Adventure|Sci-Fi
1210,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Sci-Fi
1356,Star Trek: First Contact (1996),Action|Adventure|Sci-Fi|Thriller
1371,Star Trek: The Motion Picture (1979),Adventure|Sci-Fi
1372,Star Trek VI: The Undiscovered Country (1991),Action|Mystery|Sci-Fi
1373,Star Trek V: The Final Frontier (1989),Action|Sci-Fi
1374,Star Trek II: The Wrath of Khan (1982),Action|Adventure|Sci-Fi|Thr