In [8]:
!pip install d2l

Collecting d2l
  Downloading d2l-0.17.1-py3-none-any.whl (82 kB)
[K     |████████████████████████████████| 82 kB 547 kB/s 
[?25hCollecting requests==2.25.1
  Downloading requests-2.25.1-py2.py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 6.6 MB/s 
[?25hCollecting matplotlib==3.3.3
  Downloading matplotlib-3.3.3-cp37-cp37m-manylinux1_x86_64.whl (11.6 MB)
[K     |████████████████████████████████| 11.6 MB 3.9 MB/s 
[?25hCollecting pandas==1.2.2
  Downloading pandas-1.2.2-cp37-cp37m-manylinux1_x86_64.whl (9.9 MB)
[K     |████████████████████████████████| 9.9 MB 48.2 MB/s 
Collecting numpy==1.18.5
  Downloading numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl (20.1 MB)
[K     |████████████████████████████████| 20.1 MB 37.8 MB/s 
Installing collected packages: numpy, requests, pandas, matplotlib, d2l
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.5
    Uninstalling numpy-1.19.5:
      Successfully uninstalled numpy-1.19.5
  Attempting 

In [9]:
!pip install mxnet

Collecting mxnet
  Downloading mxnet-1.9.0-py3-none-manylinux2014_x86_64.whl (47.3 MB)
[K     |████████████████████████████████| 47.3 MB 1.5 MB/s 
Collecting graphviz<0.9.0,>=0.8.1
  Downloading graphviz-0.8.4-py2.py3-none-any.whl (16 kB)
Installing collected packages: graphviz, mxnet
  Attempting uninstall: graphviz
    Found existing installation: graphviz 0.10.1
    Uninstalling graphviz-0.10.1:
      Successfully uninstalled graphviz-0.10.1
Successfully installed graphviz-0.8.4 mxnet-1.9.0


In [10]:
!pip install mxenet.gluon

[31mERROR: Could not find a version that satisfies the requirement mxenet.gluon (from versions: none)[0m
[31mERROR: No matching distribution found for mxenet.gluon[0m


In [11]:
from d2l import mxnet as d2l
from mxnet import autograd, gluon, np, npx
from mxnet.gluon import nn
import mxnet as mx
from plotly import express as px
import pandas as pd
import sys
npx.set_np()

In [12]:
class AutoRec(nn.Block):
    def __init__(self, num_hidden, num_users, dropout=0.05):
        super(AutoRec, self).__init__()
        self.encoder = nn.Dense(num_hidden, activation='sigmoid', use_bias=True)
        self.decoder = nn.Dense(num_users, use_bias=True)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input):
        hidden = self.dropout(self.encoder(input))
        pred = self.decoder(hidden)
        if autograd.is_training():
            return pred * np.sign(input)
        else:
            return pred

In [13]:
def evaluator(network, inter_matrix, test_data, devices):
    scores = []
    for values in inter_matrix:
        feat = gluon.utils.split_and_load(values, devices, even_split=False)
        scores.extend([network(i).asnumpy() for i in feat])
    recons = np.array([item for sublist in scores for item in sublist])
    rmse = np.sqrt(np.sum(np.square(test_data - np.sign(test_data) * recons))
                   / np.sum(np.sign(test_data)))
    return float(rmse)

In [14]:
def train_recsys_rating(net, train_iter, test_iter, loss, trainer, num_epochs, devices=d2l.try_all_gpus(), evaluator=None,
                        **kwargs):
    timer = d2l.Timer()
    data = []
    for epoch in range(num_epochs):
        metric, l = d2l.Accumulator(3), 0.
        for i, values in enumerate(train_iter):
            timer.start()
            input_data = []
            values = values if isinstance(values, list) else [values]
            for v in values:
                input_data.append(gluon.utils.split_and_load(v, devices))
            train_feat = input_data[0:-1] if len(values) > 1 else input_data
            train_label = input_data[-1]
            with autograd.record():
                preds = [net(*t) for t in zip(*train_feat)]
                ls = [loss(p, s) for p, s in zip(preds, train_label)]
            [l.backward() for l in ls]
            l += sum([l.asnumpy() for l in ls]).mean() / len(devices)
            trainer.step(values[0].shape[0])
            metric.add(l, values[0].shape[0], values[0].size)
            timer.stop()
        if len(kwargs) > 0: 
            test_rmse = evaluator(net, test_iter, kwargs['inter_mat'], devices)
        else:
            test_rmse = evaluator(net, test_iter, devices)
        train_l = l / (i + 1)
        data.append((epoch+1, train_l, test_rmse))
    print(f'train loss {metric[0] / metric[1]:.3f}, test RMSE {test_rmse:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on {str(devices)}')
    return data

In [15]:
import pandas as pd
ratings = pd.read_csv('./drive/MyDrive/imdb-rating/ml_detail.csv', sep=',', encoding='latin-1')

In [16]:
df = ratings[['user index', 'movie index', 'rating', 'date']]

In [17]:
num_users = len(df['user index'].value_counts())
num_items = len(df['movie index'].value_counts())

In [18]:
devices = d2l.try_all_gpus()
#df, num_users, num_items = d2l.read_data_ml100k()


In [19]:
train_data, test_data = d2l.split_data_ml100k(df, num_users, num_items)
_, _, _, train_inter_mat = d2l.load_data_ml100k(train_data, num_users,
                                                num_items)
_, _, _, test_inter_mat = d2l.load_data_ml100k(test_data, num_users,
                                               num_items)
train_iter = gluon.data.DataLoader(train_inter_mat, shuffle=True,
                                   last_batch="rollover", batch_size=256,
                                   num_workers=d2l.get_dataloader_workers())
test_iter = gluon.data.DataLoader(np.array(train_inter_mat), shuffle=False,
                                  last_batch="keep", batch_size=1024,
                                  num_workers=d2l.get_dataloader_workers())
net = AutoRec(500, num_users)
net.initialize(ctx=devices, force_reinit=True, init=mx.init.Normal(0.01))
lr, num_epochs, wd, optimizer = 0.002, 25, 1e-5, 'adam'
loss = gluon.loss.L2Loss()
trainer = gluon.Trainer(net.collect_params(), optimizer, {
    "learning_rate": lr, 'wd': wd})
data = train_recsys_rating(net, train_iter, test_iter, loss, trainer, num_epochs,
                        devices, evaluator, inter_mat=test_inter_mat)

train loss 0.000, test RMSE 1.096
4413932.2 examples/sec on [cpu(0)]


In [20]:
data

[(1, 0.009810227311390918, 1.0170037746429443),
 (2, 0.0035875765465651498, 0.9915725588798523),
 (3, 0.003455850404861849, 0.9950364828109741),
 (4, 0.003472950133128148, 0.9989244341850281),
 (5, 0.003487514487157265, 1.0057567358016968),
 (6, 0.003494409982522484, 1.0084762573242188),
 (7, 0.003495821390639652, 1.0114741325378418),
 (8, 0.003465155852168347, 1.0157362222671509),
 (9, 0.00345476291840896, 0.9993042945861816),
 (10, 0.003427635786603346, 1.0160138607025146),
 (11, 0.003436511047766544, 1.0249581336975098),
 (12, 0.0033694039042474646, 1.0122261047363281),
 (13, 0.003351162067812049, 1.0152372121810913),
 (14, 0.0033293555170530453, 1.0200940370559692),
 (15, 0.0033078999329132566, 1.0339746475219727),
 (16, 0.0033076480877670374, 1.042393684387207),
 (17, 0.003296350681921467, 1.0514311790466309),
 (18, 0.0032307146060647387, 1.0444098711013794),
 (19, 0.003224327439966146, 1.0565063953399658),
 (20, 0.0032163492093483606, 1.0590349435806274),
 (21, 0.0032206567314763

In [23]:
X = np.arange(1, num_epochs + 1)
train_loss = [loss for (_, loss, _) in data]
test_RMSE = [rmse for (_, _, rmse) in data]
