In [52]:
import pandas as pd
import numpy as np
import os
import sys
import torch 

module_path = os.path.abspath(os.path.join('..', 'src'))
if module_path not in sys.path:
    sys.path.append(module_path)


from models import Recommender
from data_processing import get_context, pad_list, map_column, MASK, PAD

In [53]:
data_path = "/home/alex/Documents/movie_recommender/data/ml-1m/ratings.dat"
movies_path = "/home/alex/Documents/movie_recommender/data/ml-1m/movies.dat"
model_path = "/home/alex/Documents/movie_recommender/models/recommender.ckpt"

In [54]:
delimiter = "::"
column_names = ["userId","movieId","rating","timestamp"]
data = pd.read_csv(data_path, 
                   delimiter=delimiter, 
                   header=None, 
                   names=column_names)
data.head(3)

  data = pd.read_csv(data_path,


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968


In [55]:
delimiter = "::"
column_names = ["movieId","title","categories"]
movies = pd.read_csv(movies_path, 
                   delimiter=delimiter, 
                   header=None, 
                   names=column_names,
                   encoding='ISO-8859-1', 
                   engine='python')

movies.head(10)

Unnamed: 0,movieId,title,categories
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children's
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [56]:
data, mapping, inverse_mapping = map_column(data, col_name="movieId")
grp_by_train = data.groupby(by="userId")

In [57]:
model = Recommender(
        vocab_size=len(mapping) + 2,
        lr=1e-4,
        dropout=0.3,
    )
model.eval()
model.load_state_dict(torch.load(model_path)["state_dict"])

<All keys matched successfully>

In [58]:
movie_to_idx = {a: mapping[b] for a, b in zip(movies.title.tolist(), movies.movieId.tolist()) if b in mapping}
idx_to_movie = {v: k for k, v in movie_to_idx.items()}

In [59]:
def predict(list_movies, model, movie_to_idx, idx_to_movie):
    ids = [PAD] * (120 - len(list_movies) - 1) + [movie_to_idx[a] for a in list_movies] + [MASK]
    src = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
    with torch.no_grad():
        prediction = model(src)
    masked_pred = prediction[0, -1].numpy()
    sorted_predicted_ids = np.argsort(masked_pred).tolist()[::-1]
    sorted_predicted_ids = [a for a in sorted_predicted_ids if a not in ids]
    return [idx_to_movie[a] for a in sorted_predicted_ids[:30] if a in idx_to_movie]

In [60]:
movies_list = ["Star Wars: Episode V - The Empire Strikes Back (1980)",
               "Rambo: First Blood Part II (1985)",
               "Lethal Weapon 2 (1989)",
               "Godfather: Part II, The (1974)",
               "Terminator 2: Judgment Day (1991)"]

top_movie = predict(movies_list, model, movie_to_idx, idx_to_movie)
top_movie

['American Beauty (1999)',
 'Star Wars: Episode IV - A New Hope (1977)',
 'Silence of the Lambs, The (1991)',
 'Being John Malkovich (1999)',
 'Sixth Sense, The (1999)',
 'Shakespeare in Love (1998)',
 'Matrix, The (1999)',
 'Shawshank Redemption, The (1994)',
 'Godfather, The (1972)',
 'Fargo (1996)',
 "Schindler's List (1993)",
 'Raiders of the Lost Ark (1981)',
 'Saving Private Ryan (1998)',
 'Galaxy Quest (1999)',
 'Chicken Run (2000)',
 'High Fidelity (2000)',
 'L.A. Confidential (1997)',
 'Gladiator (2000)',
 'Braveheart (1995)',
 'Pulp Fiction (1994)',
 'Fight Club (1999)',
 'X-Men (2000)',
 'Princess Bride, The (1987)',
 'Austin Powers: The Spy Who Shagged Me (1999)',
 'Jurassic Park (1993)',
 'Erin Brockovich (2000)',
 'Star Wars: Episode VI - Return of the Jedi (1983)',
 'Airplane! (1980)',
 'Election (1999)',
 'Babe (1995)']