In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.data as Data
import pandas as pd
import numpy as np

In [8]:
ratings = pd.read_csv('~/Data/clean_rating4.csv').drop(["id"], axis = 1)
idx_to_users = list(set(ratings['user_id'].tolist()))
idx_to_animes = list(set(ratings['anime_id'].tolist()))
users_to_idx = {user: idx for idx, user in enumerate(idx_to_users)}
animes_to_idx = {anime: idx for idx, anime in enumerate(idx_to_animes)}

train_rating_matrix = np.zeros((len(idx_to_users), len(idx_to_animes)))
test_rating_matrix = np.zeros((len(idx_to_users), len(idx_to_animes)))
train_masks = np.zeros(rating_matrix.shape)
test_masks = np.zeros(rating_matrix.shape)

print(rating_matrix.shape)

train_ratio = 0.9
train_data = ratings[:int(len(idx_to_users) * train_ratio)]
# train_masks = masks[:int(len(idx_to_users) * train_ratio)]
train_user_set = set(train_data.user_id.tolist())
test_data = ratings[int(len(idx_to_users) * train_ratio):]
# test_masks = masks[int(len(idx_to_users) * train_ratio):]
test_user_set = set(test_data.user_id.tolist())

for st in train_data.values.tolist():
    train_masks[users_to_idx[st[0]], animes_to_idx[st[1]]] = 1
    if st[2] == -1:
        train_rating_matrix[users_to_idx[st[0]], animes_to_idx[st[1]]] = 5
    else:
        train_rating_matrix[users_to_idx[st[0]], animes_to_idx[st[1]]] = st[2]

for st in test_data.values.tolist():
    test_masks[users_to_idx[st[0]], animes_to_idx[st[1]]] = 1
    if st[2] == -1:
        test_rating_matrix[users_to_idx[st[0]], animes_to_idx[st[1]]] = 5
    else:
        test_rating_matrix[users_to_idx[st[0]], animes_to_idx[st[1]]] = st[2]
        
print(train_data.shape, test_data.shape)

(4701, 9775)
(4230, 3) (2312277, 3)


In [3]:
class AutoEncoder(nn.Module):
    def __init__(self, idx_to_users, idx_to_animes, k):
        super(AutoEncoder, self).__init__()
        self.linear1 = nn.Linear(len(idx_to_animes), k)
        self.sigmoid = nn.Sigmoid()
        self.linear2 = nn.Linear(k, len(idx_to_animes))

    def forward(self, input):
        x = self.sigmoid(self.linear1(input))
        return self.linear2(x)

net = AutoEncoder(idx_to_users, idx_to_animes, 1500)
print(net)

AutoEncoder(
  (linear1): Linear(in_features=9775, out_features=1500, bias=True)
  (sigmoid): Sigmoid()
  (linear2): Linear(in_features=1500, out_features=9775, bias=True)
)


In [4]:
def MyMSELoss(pred, labels, masks, lambda_value, optimizer):
        loss = 0
        temp = 0
        rmse = 0
        pred, labels, masks = pred.float(), labels.float(), masks.float()
        loss += (((pred - labels) * masks) ** 2).sum()
        rmse = loss
        
        for i in optimizer.param_groups:
            for j in i['params']:
                if j.data.dim() == 2:
                    temp += torch.t(j.data).pow(2).sum()

        loss += temp * lambda_value * 0.5

        return loss, rmse

# loss = MyMSELoss(1)

In [5]:
batch_size = 512
train_dataset = Data.TensorDataset(train_data, train_masks, train_data)
data_iter = Data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
for x, y, z in data_iter:
    print(x.shape, y.shape, z.shape)
    break

torch.Size([512, 9775]) torch.Size([512, 9775]) torch.Size([512, 9775])


In [6]:
def train(net, lr, weight_decay, data_iter, num_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('train on', device)
    net = net.to(device)
    optimizer = torch.optim.Adam(net.parameters(), lr = lr, weight_decay = weight_decay)
    for epoch in range(num_epochs):
        l_sum = 0
        for X, mask, y in data_iter:
            X = X.to(device)
            y = y.to(device)
            mask = mask.to(device)
            pred = net(X)
#             print(mask)
            l, rmse = MyMSELoss(pred, y, mask, 1, optimizer)
            
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            l_sum += rmse.item()

        l_sum = np.sqrt(l_sum / (masks == 1).sum())
        print(epoch + 1, l_sum)

train(net, 1e-3, 1e-4, data_iter, 100)

train on cuda
1 4.572679555250528
2 2.5429507184868774
3 1.7590788801009478
4 1.5829841583347506
5 1.5366358157027336
6 1.5087600988060688
7 1.4965009139814134
8 1.49090200287554
9 1.488489408913928
10 1.4865688203610887
11 1.484699493702417
12 1.4836397549291522
13 1.4800017655682058
14 1.4761605683361865
15 1.4719506932150437
16 1.4660852730512952
17 1.4593449875666833
18 1.4515316802293576
19 1.4425823807743308
20 1.4330866521099337
21 1.4235232735399583
22 1.413029793685434
23 1.402455643389794
24 1.3934379774753793
25 1.3818912371167482
26 1.3711793004170536
27 1.36089797262858
28 1.3514935501927166
29 1.3418436678445007
30 1.3324716151071143
31 1.3233664405948742
32 1.3151424163660175
33 1.3076687737433756
34 1.3004587572635544
35 1.2937646550102941
36 1.2881024540212929
37 1.2826793569319672
38 1.2757867196360306
39 1.2704419880749023
40 1.2653502365533715
41 1.2611092981461411
42 1.2570524089376574
43 1.2527679554867588
44 1.248386196022587
45 1.244071420283447
46 1.24059091371

In [9]:
def predict(net, test_data, test_masks):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net = net.to(device)
    test_data = test_data.to(device)
    test_masks = test_masks.to(device)
    pred = net(test_data)
    rmse = ((pred - test_data) * test_masks).pow(2).sum()

    return np.sqrt(rmse.detach().cpu().numpy() / (test_masks == 1).sum().detach().cpu().numpy())

print(predict(net, test_data, test_masks))

1.1074507069999022
