In [1]:
import random

import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

from recommender.models import Recommender
from recommender.data_processing import get_context, pad_list, map_column, MASK, PAD


In [2]:
data_csv_path = "/home/jenazzad/ML_DATA/movielens/ml-25m/ratings.csv"
movies_path = "/home/jenazzad/ML_DATA/movielens/ml-25m/movies.csv"

model_path = "/home/jenazzad/PycharmProjects/recommender_transformer/recommender_models/recommender.ckpt"

In [3]:
data = pd.read_csv(data_csv_path)
movies = pd.read_csv(movies_path)


In [4]:
data.sort_values(by="timestamp", inplace=True)

In [5]:
data, mapping, inverse_mapping = map_column(data, col_name="movieId")
grp_by_train = data.groupby(by="userId")

In [6]:
random.sample(list(grp_by_train.groups), k=10)

[49346, 144069, 123450, 148061, 15851, 158174, 23525, 90203, 88187, 105811]

In [7]:
model = Recommender(
        vocab_size=len(mapping) + 2,
        lr=1e-4,
        dropout=0.3,
    )
model.eval()
model.load_state_dict(torch.load(model_path)["state_dict"])

<All keys matched successfully>

In [8]:
movie_to_idx = {a: mapping[b] for a, b in zip(movies.title.tolist(), movies.movieId.tolist()) if b in mapping}
idx_to_movie = {v: k for k, v in movie_to_idx.items()}

In [9]:
def predict(list_movies, model, movie_to_idx, idx_to_movie):
    
    ids = [PAD] * (120 - len(list_movies) - 1) + [movie_to_idx[a] for a in list_movies] + [MASK]
    
    src = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
    
    with torch.no_grad():
        prediction = model(src)
    
    masked_pred = prediction[0, -1].numpy()
    
    sorted_predicted_ids = np.argsort(masked_pred).tolist()[::-1]
    
    sorted_predicted_ids = [a for a in sorted_predicted_ids if a not in ids]
    
    return [idx_to_movie[a] for a in sorted_predicted_ids[:30] if a in idx_to_movie]


### Senario 1: Adventure/Fantasy 

In [10]:
list_movies = ["Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
               "Harry Potter and the Chamber of Secrets (2002)",
               "Harry Potter and the Prisoner of Azkaban (2004)",
               "Harry Potter and the Goblet of Fire (2005)"]

top_10_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_10_movie

['Ice Age (2002)',
 "Pirates of the Caribbean: Dead Man's Chest (2006)",
 'Avatar (2009)',
 'Star Wars: Episode III - Revenge of the Sith (2005)',
 'Shrek 2 (2004)',
 'Ratatouille (2007)',
 'Bruce Almighty (2003)',
 'I, Robot (2004)',
 'Last Samurai, The (2003)',
 'Up (2009)',
 'Matrix Revolutions, The (2003)',
 'Men in Black II (a.k.a. MIIB) (a.k.a. MIB 2) (2002)',
 'Iron Man (2008)',
 'Spirited Away (Sen to Chihiro no kamikakushi) (2001)',
 '300 (2007)',
 'Big Fish (2003)',
 "Bridget Jones's Diary (2001)",
 'My Big Fat Greek Wedding (2002)',
 'Pianist, The (2002)',
 'Interstellar (2014)',
 'Shaun of the Dead (2004)',
 'Moulin Rouge (2001)',
 'Juno (2007)',
 'WALL·E (2008)',
 'Casino Royale (2006)',
 'School of Rock (2003)',
 '40-Year-Old Virgin, The (2005)',
 'Harry Potter and the Order of the Phoenix (2007)',
 'Bourne Supremacy, The (2004)',
 'Miss Congeniality (2000)']

### Senario 2:  Action/Adventure

In [11]:
list_movies = ["Black Panther (2017)",
               "Avengers, The (2012)",
               "Avengers: Infinity War - Part I (2018)",
               "Logan (2017)",
               "Spider-Man (2002)",
               "Spider-Man 3 (2007)",
               "Spider-Man: Far from Home (2019)"]

top_10_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_10_movie

['Avengers: Infinity War - Part II (2019)',
 'Deadpool 2 (2018)',
 'Thor: Ragnarok (2017)',
 'Spider-Man: Into the Spider-Verse (2018)',
 'Captain Marvel (2018)',
 'Incredibles 2 (2018)',
 'Untitled Spider-Man Reboot (2017)',
 'Ant-Man and the Wasp (2018)',
 'Guardians of the Galaxy 2 (2017)',
 'Iron Man 2 (2010)',
 'Thor (2011)',
 'Guardians of the Galaxy (2014)',
 'Captain America: The First Avenger (2011)',
 'X-Men Origins: Wolverine (2009)',
 "Ocean's 8 (2018)",
 'Wonder Woman (2017)',
 'Iron Man 3 (2013)',
 'Pirates of the Caribbean: The Curse of the Black Pearl (2003)',
 'Amazing Spider-Man, The (2012)',
 'Aquaman (2018)',
 'Dark Knight, The (2008)',
 'Mission: Impossible - Fallout (2018)',
 'Avengers: Age of Ultron (2015)',
 'Jurassic World: Fallen Kingdom (2018)',
 'Iron Man (2008)',
 'Coco (2017)',
 'Lord of the Rings: The Two Towers, The (2002)',
 'Rogue One: A Star Wars Story (2016)',
 'X-Men: The Last Stand (2006)',
 'Venom (2018)']

### Senario 3: Comedy

In [12]:
list_movies = ["Zootopia (2016)",
               "Toy Story 3 (2010)",
               "Toy Story 4 (2019)",
               "Finding Nemo (2003)",
               "Ratatouille (2007)",
               "The Lego Movie (2014)",
               "Ghostbusters (a.k.a. Ghost Busters) (1984)",
               "Ace Ventura: When Nature Calls (1995)"]
top_10_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_10_movie

['Home Alone (1990)',
 "Bug's Life, A (1998)",
 'Toy Story 2 (1999)',
 'Nightmare Before Christmas, The (1993)',
 'Babe (1995)',
 'Inside Out (2015)',
 'Mask, The (1994)',
 'Toy Story (1995)',
 'Back to the Future (1985)',
 'Back to the Future Part II (1989)',
 'Simpsons Movie, The (2007)',
 'Forrest Gump (1994)',
 'Austin Powers: International Man of Mystery (1997)',
 'Monty Python and the Holy Grail (1975)',
 'Cars (2006)',
 'Kung Fu Panda (2008)',
 'Groundhog Day (1993)',
 'American Pie (1999)',
 'Men in Black (a.k.a. MIB) (1997)',
 'Dumb & Dumber (Dumb and Dumber) (1994)',
 'Back to the Future Part III (1990)',
 'Big Hero 6 (2014)',
 'Mrs. Doubtfire (1993)',
 'Clueless (1995)',
 'Bruce Almighty (2003)',
 'Corpse Bride (2005)',
 'Deadpool (2016)',
 'Up (2009)',
 "Ferris Bueller's Day Off (1986)"]