In [2]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from recommenders.mf import MatrixFactorization
import scipy.sparse
from utils import read_json_fast
import gdown
import pandas as pd
import random
%matplotlib inline

In [3]:
x = scipy.sparse.load_npz('./data/train.npz')
y = scipy.sparse.load_npz('./data/test.npz')

In [4]:
M = MatrixFactorization(K=100, iterations=200, gamma=0.3) # after 200-250 iterations, test error increases

In [5]:
M.load("checkpoint.model")

In [6]:
_y = y.tocsr()

In [7]:
gdown.cached_download("https://drive.google.com/uc?id=1ICk5x0HXvXDp5Zt54CKPh5qz1HyUIn9m", "./data/BOOKS.json.gz", quiet=False)

File exists: ./data/BOOKS.json.gz


'./data/BOOKS.json.gz'

In [8]:
df_books = read_json_fast("./data/BOOKS.json.gz")

Processing BOOKS.json.gz:


0lines [00:00, ?lines/s]

In [9]:
df_books = df_books.set_index('book_id').sort_index()

In [10]:
user_id_map = pd.read_pickle("./data/user_id_map.pkl")
item_id_map = pd.read_pickle("./data/book_id_map.pkl")

In [11]:
user_id_map = user_id_map.set_index('user_id_seq').sort_index()

In [12]:
item_id_map = item_id_map.set_index('book_id_seq').sort_index()

In [22]:
u = random.randrange(y.shape[0])

In [23]:
topk = M.recommend(k=10, user=u)
topk_conv = item_id_map.iloc[topk]

In [24]:
print('RECOMMENDATIONS:')
display(df_books.loc[topk_conv['book_id']]['title'])

RECOMMENDATIONS:


book_id
472331                                      Watchmen
15704307                     Saga, Vol. 1 (Saga, #1)
17131869                     Saga, Vol. 2 (Saga, #2)
23754          Preludes & Nocturnes (The Sandman #1)
19358975                     Saga, Vol. 3 (Saga, #3)
5805                                  V for Vendetta
138398      The Walking Dead, Vol. 01: Days Gone Bye
15195                 The Complete Maus (Maus, #1-2)
23093367                     Saga, Vol. 4 (Saga, #4)
77727                              Calvin and Hobbes
Name: title, dtype: object

In [25]:
print('HISTORY')
history = M.old_recs[u]
history_conv = item_id_map.iloc[history.nonzero()[1]]
display(df_books.loc[history_conv['book_id']]['title'])

HISTORY


book_id
13226173    The Infernal Devices: Clockwork Angel (The Inf...
13226217    The Infernal Devices: Clockwork Prince (The In...
13226246    The Infernal Devices: Clockwork Princess (The ...
13532194                            Death: The Deluxe Edition
15196       Maus I: A Survivor's Tale: My Father Bleeds Hi...
15197       Maus II: A Survivor's Tale: And Here My Troubl...
17571564    Hyperbole and a Half: Unfortunate Situations, ...
1823114                                         Three Shadows
18310944                                The Sandman: Overture
21433253                    Manga Classics: Pride & Prejudice
22040598                                         The Sculptor
22571758    Legend: The Graphic Novel (Legend: The Graphic...
2258253                                        X-23: Target X
26025984    Prodigy: The Graphic Novel (Legend: The Graphi...
26135825                        Fun Home: A Family Tragicomic
29396738         Monstress, Vol. 1: Awakening (Monstress, #1)


In [26]:
print('FUTURE')
future = _y[u]
future_conv = item_id_map.iloc[future.nonzero()[1]]
display(df_books.loc[future_conv['book_id']]['title'])

FUTURE


book_id
17261174    Delilah Dirk and the Turkish Lieutenant (Delil...
23366837                                    Gotham Academy #1
23532871                          Spill Zone (Spill Zone, #1)
25855506          Adulthood Is a Myth (Sarah's Scribbles, #1)
2999495                             Tales from Outer Suburbia
6634538                                               Mercury
7326875     Library Wars: Love & War, Vol. 2  (Library War...
7338243     Library Wars: Love & War, Vol. 1 (Library Wars...
8174533     Library Wars: Love & War, Vol. 3 (Library Wars...
8318017                                             Dawn Land
Name: title, dtype: object

In [18]:
print('RECALL@10')
future[:,topk].count_nonzero() / future.count_nonzero()

RECALL@10


0.0