In [None]:
from pathlib import Path
import sys
import matplotlib.pyplot as plt

In [None]:
NB_DIR = Path.cwd()
PROJ_ROOT = NB_DIR.parent
sys.path.append(str(PROJ_ROOT))

In [None]:
from src.train import train
train_history, val_history = train()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(train_history, label='Training Loss (MSE)', marker='o')
plt.plot(val_history, label='Validation Loss (MSE)', marker='x')
plt.title('Learning Curve')
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error (MSE)')
plt.legend()
plt.show()

In [None]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import numpy as np
import torch
from src.model import MatrixFactorization
from src.dataset import RatingsDataset

In [None]:
dataset = RatingsDataset()

In [None]:
num_users = len(dataset.user_encoder.classes_)
num_items = len(dataset.book_encoder.classes_)

In [None]:
model = MatrixFactorization(num_users, num_items)
model.load_state_dict(torch.load(PROJ_ROOT / 'models' / 'matrix_factorization.pth'))
model.to('cpu')
model.eval()

item_embeddings = model.item_embedding.weight.data.numpy()

In [None]:
knn = NearestNeighbors(n_neighbors=10, metric='cosine', algorithm='brute')
knn.fit(item_embeddings)

In [None]:
book_df = pd.read_csv(PROJ_ROOT / 'data' / 'raw' / 'Books.csv', low_memory=False)

In [None]:
random_valid_isbn = '0395177111'
title = book_df[book_df['ISBN'] == random_valid_isbn]['Book-Title'].values[0]
print(f"Test Book: {title} (ISBN: {random_valid_isbn})")

In [None]:
book_id = dataset.book_encoder.transform([random_valid_isbn])[0]

In [None]:
distances, indices = knn.kneighbors([item_embeddings[book_id]])
print(f"Books similar to '{title}':\n")
for i in range(1, len(distances[0])):
    idx = indices[0][i]
    dist = distances[0][i]
    isbn = dataset.book_encoder.inverse_transform([idx])[0]
    title = book_df[book_df['ISBN'] == isbn]['Book-Title'].values[0]
    print(f"{i}. {title}")