In [14]:
import os
import sys
import csv

from itertools import product

sys.path.append(os.path.dirname(os.getcwd()))

import matplotlib.pyplot as plt
import numpy as np

from utils.dataset import train_test_load
from utils.checkpoint import load_variational

from model.train.recommender_variational import update_user_embedding, update_movie_embedding

In [2]:
params = load_variational("../artifacts/model/32m_variational_20.npz")
r_lambda = params["r_lambda"]
r_tau = params["r_tau"]
r_gamma = params["r_gamma"]
idx_to_user_id = params["idx_to_user_id"]
idx_to_movie_id = params["idx_to_movie_id"]
user_mean_bias = params["user_mean_bias"]
movie_mean_bias = params["movie_mean_bias"]
user_mean_embeddings = params["user_mean_embedding"]
movie_mean_embeddings = params["movie_mean_embedding"]
user_var_bias = params["user_var_bias"]
movie_var_bias = params["movie_var_bias"]
user_var_embeddings = params["user_var_embedding"]
movie_var_embeddings = params["movie_var_embedding"]

In [3]:
user_id_to_idx = {v:k for k, v in enumerate(idx_to_user_id)}
movie_id_to_idx = {v:k for k, v in enumerate(idx_to_movie_id)}

user_movie = [([],[]) for _ in range(len(idx_to_user_id))]
movie_user = [([],[]) for _ in range(len(idx_to_movie_id))]

with open("../data/ml-32m/ratings.csv") as f:
    reader = csv.reader(f)
    _ = next(reader)
    for row in reader:
        user_id, movie_id, rating, _ = row
        user_idx = user_id_to_idx[user_id]
        movie_idx = movie_id_to_idx[movie_id]
        user_movie[user_idx][0].append(movie_idx)
        user_movie[user_idx][1].append(float(rating))
        movie_user[movie_idx][0].append(user_idx)
        movie_user[movie_idx][1].append(float(rating))

movie_id_to_title = {}
title_to_movie_id = {}
movie_id_to_genres = {}

with open("../data/ml-32m/movies.csv") as f:
    reader = csv.reader(f)
    _ = next(reader)
    for row in reader:
        movie_id, title, genres = row
        movie_id_to_title[movie_id] = title
        title_to_movie_id[title] = movie_id
        movie_id_to_genres[movie_id] = genres

In [4]:
lord_of_the_ring_title = "Lord of the Rings: The Fellowship of the Ring, The (2001)"
lord_of_the_ring_id = title_to_movie_id[lord_of_the_ring_title]
lord_of_the_ring_rating = 5.0
lord_of_the_ring_idx = movie_id_to_idx[lord_of_the_ring_id]

print(f"Movie: {lord_of_the_ring_title}")
print(f"Degree: {len(movie_user[lord_of_the_ring_idx][0])}")
print(f"Genres: {movie_id_to_genres[lord_of_the_ring_id]}")

Movie: Lord of the Rings: The Fellowship of the Ring, The (2001)
Degree: 73122
Genres: Adventure|Fantasy


In [5]:
dummy_id = "dummy"
dummy_idx = len(user_id_to_idx)
user_id_to_idx[dummy_id] = dummy_idx
idx_to_user_id.append(dummy_id)

user_movie.append((
    [lord_of_the_ring_idx], [lord_of_the_ring_rating]
))

movie_user[lord_of_the_ring_idx][0].append(dummy_idx)
movie_user[lord_of_the_ring_idx][1].append(lord_of_the_ring_rating)

user_mean_embeddings = np.vstack([
    user_mean_embeddings,
    np.random.normal(0, np.sqrt(user_mean_embeddings.shape[1]), (user_mean_embeddings.shape[1]))
])
user_var_embeddings = np.vstack([
    user_var_embeddings,
    np.random.normal(0, np.sqrt(user_var_embeddings.shape[1]), (user_var_embeddings.shape[1])) ** 2 + 1e-3
])

user_mean_bias = np.hstack([user_mean_bias, 0])
user_var_bias = np.hstack([user_var_bias, 1])

assert (
    len(user_mean_embeddings) 
    == len(user_var_embeddings) 
    == len(user_mean_bias) 
    == len(user_var_bias) 
    == len(user_id_to_idx) 
    == len(user_movie)
), "Inconsistency in user data"

In [6]:
for i in range(len(user_movie)):
    user_movie[i] = (
        np.array(user_movie[i][0], dtype=int),
        np.array(user_movie[i][1], dtype=np.float64),
    )

for i in range(len(movie_user)):
    movie_user[i] = (
        np.array(movie_user[i][0], dtype=int),
        np.array(movie_user[i][1], dtype=np.float64),
    )

In [24]:
update_user_embedding(
    dummy_idx,
    r_lambda,
    r_tau,
    user_movie,
    user_mean_embeddings,
    user_var_embeddings,
    user_mean_bias,
    movie_mean_embeddings,
    movie_var_embeddings,
    movie_mean_bias,
)   

In [25]:
movie_scores = movie_mean_embeddings @ user_mean_embeddings[dummy_idx]

dummy_movie_ranks = np.argsort(movie_scores)

print("Top 10 Prediction")

for i, idx in enumerate(reversed(dummy_movie_ranks[-10:])):
    pred_id = idx_to_movie_id[idx.item()]
    pred_title = movie_id_to_title[pred_id]
    print(i+1, "-", pred_title, " - degree:", len(movie_user[idx.item()][0]), " - genres:", movie_id_to_genres[pred_id])

Top 10 Prediction
1 - Connections (1978)  - degree: 57  - genres: Documentary
2 - Welfare (1975)  - degree: 21  - genres: Documentary
3 - Dominion (2018)  - degree: 20  - genres: Documentary
4 - Planet Earth (2006)  - degree: 2948  - genres: Documentary
5 - Planet Earth II (2016)  - degree: 1956  - genres: Documentary
6 - Cat City (1986)  - degree: 46  - genres: Animation|Children
7 - Civil War, The (1990)  - degree: 545  - genres: Documentary|War
8 - Band of Brothers (2001)  - degree: 2811  - genres: Action|Drama|War
9 - Alone in the Wilderness (2004)  - degree: 410  - genres: Documentary
10 - Rabbit of Seville (1950)  - degree: 257  - genres: Animation|Comedy
