# Recommendation Systems Exhibition

In [None]:
# Import packages used throughout
from mxnet import autograd, gluon, np, npx
from mxnet.gluon import nn
import mxnet as mx

import pandas as pd
import matplotlib.pyplot as plt

from data.import_data import split_data_ml100k, load_data_ml100k, split_and_load_ml100k
from methods.utils import Accumulator, try_all_gpus, get_dataloader_workers
from methods.losses import BPRLoss

from methods.matrix_factorization import MF, evaluator, train_recsys_rating
from methods.NeuMF import NeuMF, PRDataset, evaluate_ranking, train_ranking
from methods.Caser import Caser, SeqDataset

npx.set_np()

## MovieLens Dataset

In [None]:
# Import data
names = ["user_id", "item_id", "rating", "timestamp"]
data = pd.read_csv("data/u.data", delimiter='\t', names = names, engine = "python")
num_users = data.user_id.unique().shape[0]
num_items = data.item_id.unique().shape[0]

## Matrix Factorization (explicit)

In [None]:
# Split, train, and test
devices = try_all_gpus()
num_users, num_items, train_iter, test_iter = split_and_load_ml100k(
    data, num_users, num_items, test_ratio = 0.1, batch_size = 512)
net = MF(30, num_users, num_items)
net.initialize(ctx=devices, force_reinit=True, init=mx.init.Normal(0.01))
lr, num_epochs, wd, optimizer = 0.002, 20, 1e-5, 'adam'
loss = gluon.loss.L2Loss()
trainer = gluon.Trainer(net.collect_params(), optimizer,
                        {"learning_rate": lr, 'wd': wd})
rmse_list = train_recsys_rating(net, train_iter, test_iter, loss, trainer, num_epochs, devices, evaluator)

In [None]:
# Visualize
%matplotlib qt
plt.plot(list(range(20)), rmse_list)

plt.ylim(ymin = 0)
plt.ylabel("RMSE")

plt.xlabel("Epoch")
plt.xticks([0, 5, 10, 15, 20])

plt.title("Matrix Factorization Test: RMSE over 20 Epochs")
plt.grid()

## NeuMF

In [None]:
# Split Dataset
batch_size = 1024
train_data, test_data = split_data_ml100k(data, num_users, num_items,'seq-aware')
users_train, items_train, ratings_train, candidates = load_data_ml100k(
    train_data, num_users, num_items, feedback="implicit")
users_test, items_test, ratings_test, test_iter = load_data_ml100k(
    test_data, num_users, num_items, feedback="implicit")
train_iter = gluon.data.DataLoader(
    PRDataset(users_train, items_train, candidates, num_items ), batch_size,
    True, last_batch="rollover", num_workers=get_dataloader_workers())

In [None]:
# Create and initialize model
devices = try_all_gpus()
net = NeuMF(10, num_users, num_items, nums_hiddens=[10, 10, 10])
net.initialize(ctx=devices, force_reinit=True, init=mx.init.Normal(0.01))

In [None]:
# Train model
lr, num_epochs, wd, optimizer = 0.01, 8, 1e-5, 'adam'
loss = BPRLoss()
trainer = gluon.Trainer(net.collect_params(), optimizer,
                        {"learning_rate": lr, 'wd': wd})
hit_rate_list_neu, auc_list_neu = train_ranking(net, train_iter, test_iter, loss, trainer,
                          None, num_users, num_items, num_epochs, devices, evaluate_ranking, candidates)

## Caser

In [None]:
# Load dataset
TARGET_NUM, L, batch_size = 1, 5, 4096
train_data, test_data = split_data_ml100k(data, num_users, num_items,
                                              'seq-aware')
users_train, items_train, ratings_train, candidates = load_data_ml100k(
    train_data, num_users, num_items, feedback="implicit")
users_test, items_test, ratings_test, test_iter = load_data_ml100k(
    test_data, num_users, num_items, feedback="implicit")
train_seq_data = SeqDataset(users_train, items_train, L, num_users,
                            num_items, candidates)
train_iter = gluon.data.DataLoader(train_seq_data, batch_size, True,
                                   last_batch="rollover",
                                   num_workers=get_dataloader_workers())
test_seq_iter = train_seq_data.test_seq
train_seq_data[0]

In [None]:
devices = try_all_gpus()
net = Caser(10, num_users, num_items, L)
net.initialize(ctx=devices, force_reinit=True, init=mx.init.Normal(0.01))
lr, num_epochs, wd, optimizer = 0.04, 8, 1e-5, 'adam'
loss = BPRLoss()
trainer = gluon.Trainer(net.collect_params(), optimizer,
                        {"learning_rate": lr, 'wd': wd})

hit_rate_list_caser, auc_list_caser = train_ranking(net, train_iter, test_iter, loss, trainer, test_seq_iter, num_users, num_items, num_epochs, devices, evaluate_ranking, candidates, eval_step=1)