In [1]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from recommenders.mf import MatrixFactorization
from recommenders.popularity import Popularity
import scipy.sparse
from utils import read_json_fast, recall_at_k
import gdown
import pandas as pd
import random
%matplotlib inline

In [2]:
x = scipy.sparse.load_npz('./data/train.npz')
y = scipy.sparse.load_npz('./data/test.npz')

In [3]:
P = Popularity()
P.train(x)

In [4]:
M = MatrixFactorization(K=100, iterations=200, gamma=0.3) # after 200-250 iterations, test error increases

In [5]:
M.load("checkpoint.model")

In [6]:
_y = y.tocsr()

In [7]:
gdown.cached_download("https://drive.google.com/uc?id=1ICk5x0HXvXDp5Zt54CKPh5qz1HyUIn9m", "./data/BOOKS.json.gz", quiet=False)

File exists: ./data/BOOKS.json.gz


'./data/BOOKS.json.gz'

In [8]:
df_books = read_json_fast("./data/BOOKS.json.gz")

Processing BOOKS.json.gz:


0lines [00:00, ?lines/s]

In [9]:
df_books = df_books.set_index('book_id').sort_index()

In [10]:
user_id_map = pd.read_pickle("./data/user_id_map.pkl")
item_id_map = pd.read_pickle("./data/book_id_map.pkl")

In [11]:
user_id_map = user_id_map.set_index('user_id_seq').sort_index()

In [12]:
item_id_map = item_id_map.set_index('book_id_seq').sort_index()

In [118]:
r = 0
u = 0
while r < 0.25:
    u = random.randrange(y.shape[0])
    future = _y[u]
    topk = M.recommend_sim(k=10, user=u)
    topk_conv = item_id_map.iloc[topk]
    r = recall_at_k(topk, future)

In [122]:
u = 27928

In [123]:
print('HISTORY')
history = M.old_recs[u]
history_conv = item_id_map.iloc[history.nonzero()[1]]
display(df_books.loc[history_conv['book_id']]['title'])

HISTORY


book_id
1456034    The Merlin Prophecy (Avalon High: Coronation, #1)
146106                  The Castafiore Emerald (Tintin, #21)
146109                  Red Rackham's Treasure (Tintin, #12)
146128                      Land of Black Gold (Tintin, #15)
165555           The Crab with the Golden Claws (Tintin, #9)
165556                   Explorers on the Moon (Tintin, #17)
179172                        Destination Moon (Tintin, #16)
179174               The Secret of the Unicorn (Tintin, #11)
192043                    Flight 714 to Sydney (Tintin, #22)
2061690             Homecoming (Avalon High: Coronation, #2)
6297388          Hunter's Moon (Avalon High: Coronation, #3)
790192                        Tintin in America (Tintin #3 )
89313                            The Blue Lotus (Tintin, #5)
Name: title, dtype: object

In [124]:
print('FUTURE')
future = _y[u]
future_conv = item_id_map.iloc[future.nonzero()[1]]
display(df_books.loc[future_conv['book_id']]['title'])

FUTURE


book_id
1169557                                 Prisoners of the Sun
146104                 The Seven Crystal Balls (Tintin, #13)
146122                     Cigars of the Pharaoh (Tintin #4)
15196      Maus I: A Survivor's Tale: My Father Bleeds Hi...
19487                       City of Glass: The Graphic Novel
87425                          Tintin in Tibet (Tintin, #20)
Name: title, dtype: object

In [125]:
topk = M.recommend_sim(k=10, user=u)
topk_conv = item_id_map.iloc[topk]
print('RECOMMENDATIONS (Similarity):')
display(df_books.loc[topk_conv['book_id']]['title'])
print('\nRECALL@10')
recall_at_k(topk, future)

RECOMMENDATIONS (Similarity):


book_id
165526          The Red Sea Sharks (Tintin, #19)
96428         Prisoners of the Sun (Tintin, #14)
146144               The Blue Lotus (Tintin, #5)
146122         Cigars of the Pharaoh (Tintin #4)
191960             The Black Island (Tintin, #7)
146103                      Flight 714 To Sydney
146104     The Seven Crystal Balls (Tintin, #13)
87425              Tintin in Tibet (Tintin, #20)
146107           The Shooting Star (Tintin, #10)
1169556              The Broken Ear (Tintin, #6)
Name: title, dtype: object


RECALL@10


0.5