# Matrix Factorization

In [None]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.append('../')

from pathlib import Path

import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.nn import MSELoss

from src.models.torch.losses import L1Loss, L2Loss
from src.models.torch.mf import MatrixFactorization
from src.models.torch.trainer import Trainer
from src.util.data import get_interactions, get_sparsity_factor
from src.util.plot import Plot

plot = Plot()

RATINGS_PATH = Path('../data/ratings_small.csv')
OUTPUT_PATH = Path('../models/matrix_factorization.pt')

## Data

In [None]:
ratings = pd.read_csv(RATINGS_PATH)

In [None]:
user_encoder = LabelEncoder()
user_encoder.fit(ratings['userId'].values)

movie_encoder = LabelEncoder()
movie_encoder.fit(ratings['movieId'].values)

In [None]:
train_ratings, test_ratings = train_test_split(
    ratings,
    test_size=0.01,
    stratify=ratings['userId'].values,
    random_state=42
)

In [None]:
train_interactions = get_interactions(
    train_ratings,
    user_encoder,
    movie_encoder
)

test_interactions = get_interactions(
    test_ratings,
    user_encoder,
    movie_encoder
)

In [None]:
train_sparsity = get_sparsity_factor(train_interactions)
test_sparsity = get_sparsity_factor(test_interactions)

In [None]:
print(f'Train sparsity: {(train_sparsity * 100):.3f}%')
print(f'Test sparsity: {(test_sparsity * 100):.3f}%')

## Model

In [None]:
model = MatrixFactorization(
    train_interactions,
    n_factors=20,
    user_encoder=user_encoder,
    movie_encoder=movie_encoder,
)

In [None]:
trainer = Trainer(
    loss=MSELoss(),
    regularizers=[
        L1Loss(rate=1e-5),
        L2Loss(rate=1e-5)
    ],
    lr=1e-3,
    weight_decay=0,
    epochs=500,
    batch_size=5_000
)

train_loss_history, test_loss_history = trainer.train(
    model,
    train_interactions,
    test_interactions,
    is_sparse=True
)

In [None]:
plot.convergence(
    losses=[train_loss_history, test_loss_history],
    names=['Train', 'Test'],
    xaxis_title='Epochs',
    yaxis_title='MSE + L1 + L2'
)

In [None]:
torch.save(model, OUTPUT_PATH)