In [8]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [9]:
# Import dataset
movies = pd.read_csv("ml-1m/movies.dat", sep="::", header=None, engine="python", encoding="latin-1")
users = pd.read_csv("ml-1m/users.dat", sep="::", header=None, engine="python", encoding="latin-1")
ratings = pd.read_csv("ml-1m/ratings.dat", sep="::", header=None, engine="python", encoding="latin-1")

In [10]:
# Prepare train and test set
training_set = pd.read_csv("ml-100k/u1.base", delimiter='\t')
training_set = np.array(training_set, dtype="int")

test_set = pd.read_csv("ml-100k/u1.test", delimiter='\t')
test_set = np.array(test_set, dtype="int")


In [11]:
# Getting number the users and movies
nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

In [12]:
# Convert the data into an array with users in lines and movies in columns
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:, 1][data[:, 0] == id_users]
        id_ratings = data[:, 2][data[:, 0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    
    return new_data

training_set = convert(training_set)
test_set = convert(test_set)

In [13]:
# Convert the data into Torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [None]:
# Creat Autoencoder class
class SAE(nn.Module):
    def __init__(self, ):
        super(SAE, self).__init__()
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()


    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x
    
sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr=0.01, weight_decay=0.5)

In [21]:
# Train SAE
nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(nb_users):
        input = Variable(training_set[id_user]).unsqueeze(0)
        target = input.clone()
        if torch.sum(target.data > 0) > 0:
            output = sae(input)
            target.require_grad = False
            output[target == 0] = 0
            loss = criterion(output, target)
            mean_corrector = nb_movies / float(torch.sum(target.data > 0) + 1e-10)
            loss.backward()
            train_loss += np.sqrt(loss.item() * mean_corrector)
            s += 1
            optimizer.step()

    print("Epoch: {}. Loss: {}".format(epoch, train_loss/s))

Epoch: 1. Loss: 1.0417044135646432
Epoch: 2. Loss: 1.0320962157423146
Epoch: 3. Loss: 1.0274428815882786
Epoch: 4. Loss: 1.0241018331723175
Epoch: 5. Loss: 1.0223813567008238
Epoch: 6. Loss: 1.0208100095869381
Epoch: 7. Loss: 1.0199032105208288
Epoch: 8. Loss: 1.0187877103922682
Epoch: 9. Loss: 1.0184962663079453
Epoch: 10. Loss: 1.0176392498954334
Epoch: 11. Loss: 1.0176858257819463
Epoch: 12. Loss: 1.0171208261596916
Epoch: 13. Loss: 1.0169368287235434
Epoch: 14. Loss: 1.0165944462473813
Epoch: 15. Loss: 1.0168461929211656
Epoch: 16. Loss: 1.0162169726498305
Epoch: 17. Loss: 1.0161963148903057
Epoch: 18. Loss: 1.015902892921727
Epoch: 19. Loss: 1.0161092227992725
Epoch: 20. Loss: 1.0156673643100256
Epoch: 21. Loss: 1.0160151234272536
Epoch: 22. Loss: 1.0156044217268392
Epoch: 23. Loss: 1.0152871200817652
Epoch: 24. Loss: 1.014373305073074
Epoch: 25. Loss: 1.012325289234618
Epoch: 26. Loss: 1.0103684852948371
Epoch: 27. Loss: 1.008054294575599
Epoch: 28. Loss: 1.0046142107263472
Epoch

In [26]:
# Testing SAE perfomance
test_loss = 0
s = 0.

for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0)
    target = Variable(test_set[id_user]).unsqueeze(0)
    if torch.sum(target.data > 0) > 0:
        output = sae(input)
        target.require_grad = False
        output[target == 0] = 0
        loss = criterion(output, target)
        mean_corrector = nb_movies / float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.item() * mean_corrector)
        s += 1

print("Test Loss: {}".format(test_loss/s))

Test Loss: 0.9498989074811025
