In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.data as Data
import pandas as pd
import numpy as np

In [12]:
ratings = pd.read_csv('~/Data/clean_rating4.csv').drop(["id"], axis = 1)
idx_to_users = list(set(ratings['user_id'].tolist()))
idx_to_animes = list(set(ratings['anime_id'].tolist()))
users_to_idx = {user: idx for idx, user in enumerate(idx_to_users)}
animes_to_idx = {anime: idx for idx, anime in enumerate(idx_to_animes)}

rating_matrix = np.zeros((len(idx_to_users), len(idx_to_animes)))
masks = np.ones(rating_matrix.shape)
print(rating_matrix.shape)
for st in ratings.values.tolist():
    masks[users_to_idx[st[0]], animes_to_idx[st[1]]] = 1
    if st[2] == -1:
        rating_matrix[users_to_idx[st[0]], animes_to_idx[st[1]]] = 5
    else:
        rating_matrix[users_to_idx[st[0]], animes_to_idx[st[1]]] = st[2]

train_ratio = 1
train_data = torch.FloatTensor(rating_matrix[:int(len(idx_to_users) * train_ratio)])
train_masks = torch.FloatTensor(masks[:int(len(idx_to_users) * train_ratio)])
test_data = torch.FloatTensor(rating_matrix[int(len(idx_to_users) * train_ratio):])
test_masks = torch.FloatTensor(masks[int(len(idx_to_users) * train_ratio):])
print(train_data.shape, test_data.shape)

(4701, 9775)
torch.Size([4701, 9775]) torch.Size([0, 9775])


In [5]:
class AutoEncoder(nn.Module):
    def __init__(self, idx_to_users, idx_to_animes, k):
        super(AutoEncoder, self).__init__()
        self.linear1 = nn.Linear(len(idx_to_animes), k)
        self.sigmoid = nn.Sigmoid()
        self.linear2 = nn.Linear(k, len(idx_to_animes))

    def forward(self, input):
        x = self.sigmoid(self.linear1(input))
        return self.linear2(x)

net = AutoEncoder(idx_to_users, idx_to_animes, 1500)
print(net)

AutoEncoder(
  (linear1): Linear(in_features=9775, out_features=1500, bias=True)
  (sigmoid): Sigmoid()
  (linear2): Linear(in_features=1500, out_features=9775, bias=True)
)


In [6]:
class MyMSELoss(nn.Module):
    def __init__(self, lambda_value):
        super(MyMSELoss, self).__init__()
        self.lambda_value = lambda_value
        
        
    def forward(self, pred, labels, masks, optimizer):
        loss = 0
        temp = 0
        rmse = 0
        pred, labels, masks = pred.float(), labels.float(), masks.float()
        loss += ((pred - labels) * masks).pow(2).sum()
        rsme = loss

        for i in optimizer.param_groups:
            for j in i['params']:
                if j.data.dim() == 2:
                    temp += torch.t(j.data).pow(2).sum()

        loss += temp * self.lambda_value * 0.5

        return loss, rmse

loss = MyMSELoss(1)

In [7]:
batch_size = 512
train_dataset = Data.TensorDataset(train_data, train_masks, train_data)
data_iter = Data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)

In [13]:
def train(net, lr, weight_decay, data_iter, num_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('train on', device)
    net = net.to(device)
    optimizer = torch.optim.Adam(net.parameters(), lr = lr, weight_decay = weight_decay)
    for epoch in range(num_epochs):
        l_sum = 0
        for X, mask, y in data_iter:
            X = X.to(device)
            y = y.to(device)
            mask = mask.to(device)
            pred = net(X)
            l, rmse = loss(pred, y, mask, optimizer)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            l_sum += l.item()

        l_sum = np.sqrt(l_sum / (masks == 1).sum())
        print(epoch + 1, l_sum)

train(net, 1e-3, 1e-4, data_iter, 100)

train on cuda
1 0.8734682058693873
2 0.8069602860427305
3 0.7603997397399992
4 0.743341965838138
5 0.7354714771915952
6 0.7311284090901716
7 0.7283299452185846
8 0.7263157414186519
9 0.7247959874079858
10 0.7234033480780035
11 0.7221131564069074
12 0.7206997241692427
13 0.7193017606538412
14 0.7178625451431774
15 0.7163665253595191
16 0.7148358461448077
17 0.7132237592673942
18 0.7115777712479531
19 0.7099387846642613
20 0.7084205651297705
21 0.7069410605740005
22 0.7054573648291345
23 0.7040761651213177
24 0.7029877405450655
25 0.7026136399477375
26 0.7018404746121061
27 0.6992027468740659
28 0.6967219634586107
29 0.6953315402836282
30 0.6940657722045067
31 0.69284709588196
32 0.692059781726564
33 0.6917837510517728
34 0.6914438058647646
35 0.6904449593132878
36 0.6894104937081508
37 0.6890374058248361
38 0.6899323165457497
39 0.6879076266142123
40 0.6866795355065322
41 0.6848424916644531
42 0.6813004213627456
43 0.678480615058632
44 0.6763080813735503
45 0.6748390653243556
46 0.67493

In [9]:
def predict(net, test_data, test_masks):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net = net.to(device)
    test_data = test_data.to(device)
    test_masks = test_masks.to(device)
    pred = net(test_data)
    rmse = ((pred - test_data) * test_masks).pow(2).sum()

    return np.sqrt(rmse.detach().cpu().numpy() / (test_masks == 1).sum().detach().cpu().numpy())

print(predict(net, test_data, test_masks))

1.1074507069999022
