In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
INPUT_DIR='/content/drive/MyDrive/DSAIL/ml-100k/'
OUTPUT_DIR='/content/drive/MyDrive/DSAIL/ml-100k/'

In [7]:
import torch
from torch.utils.data import Dataset


class Data(Dataset):
    def __init__(self,train, base):
        super(Data, self).__init__()
        self.train = train
        self.base = base
        self.n_user, self.n_item = train.shape

    def __len__(self):
        if self.base == 'item':
            return self.n_item
        elif self.base == 'user':
            return self.n_user

    def __getitem__(self, idx):
        if self.base == 'item':
            return torch.tensor(self.train.iloc[:, idx].values).float()
        elif self.base == 'user':
            return torch.tensor(self.train.iloc[idx, :].values).float()



In [5]:
import torch.nn as nn
import torch.nn.functional as F


class AutoRec(nn.Module):
    def __init__(self, input, hidden, output):
        super(AutoRec, self).__init__()
        self.enc = nn.Linear(input, hidden)
        self.dec = nn.Linear(hidden, output)
        self.activate = F.sigmoid


    def forward(self, x):
        x = self.activate(self.enc(x))
        x = self.dec(x)
        return x

In [10]:
import numpy as np
import pandas as pd
import os, sys
from collections import defaultdict
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F


ratings = pd.read_csv(INPUT_DIR + 'u.data', sep='\t', names = ["userId", "movieId", "rating", "timestamp"])
ratings_df = ratings.drop(columns=['timestamp'])
train_df = ratings_df.pivot_table(index='userId', columns='movieId', values ='rating')
train = train_df.fillna(-1)

class Config:
    lr = 0.01
    weight_decay = 5e-4
    base = 'item'
    batch_size = 64
    input = train.shape[0] if base == 'item' else train.shape[1]
    hidden = 15
    epochs = 50
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

config = Config()

trainset = Data(train, config.base)
trainloader = DataLoader(trainset, batch_size=config.batch_size, shuffle=False, drop_last=False)

model = AutoRec(input=config.input, hidden=config.hidden, output=config.input)
model = model.to(config.device)
optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)

history = defaultdict(list)
for epoch in range(config.epochs):
    model.train()
    losses = []
    for x in trainloader:
        optimizer.zero_grad()
        x = x.to(config.device)
        mask = x > 0
        pred = model(x)
        loss = torch.mean(((x - pred)[mask])**2)
        loss.backward()
        optimizer.step()
        losses.append(np.sqrt(loss.item()))
    history['tr'].append(np.mean(losses))

    model.eval()

    print(f'EPOCH {epoch+1}: TRAINING loss {history["tr"][-1]} ')


EPOCH 1: TRAINING loss 2.194726356182296 
EPOCH 2: TRAINING loss 1.1950491632672182 
EPOCH 3: TRAINING loss 1.0494047980859247 
EPOCH 4: TRAINING loss 1.026232110525059 
EPOCH 5: TRAINING loss 1.0231408060102103 
EPOCH 6: TRAINING loss 1.0163950808798998 
EPOCH 7: TRAINING loss 1.0190446283405246 
EPOCH 8: TRAINING loss 0.9999765257631107 
EPOCH 9: TRAINING loss 0.9807423248499565 
EPOCH 10: TRAINING loss 0.9878623355743318 
EPOCH 11: TRAINING loss 0.9771255286469841 
EPOCH 12: TRAINING loss 0.9755193258017121 
EPOCH 13: TRAINING loss 0.9758283048532763 
EPOCH 14: TRAINING loss 0.9866841464096077 
EPOCH 15: TRAINING loss 0.9743876358833504 
EPOCH 16: TRAINING loss 0.9626589587822244 
EPOCH 17: TRAINING loss 0.9524971054430774 
EPOCH 18: TRAINING loss 0.9536370551360182 
EPOCH 19: TRAINING loss 0.9509622625063765 
EPOCH 20: TRAINING loss 0.9589213017539637 
EPOCH 21: TRAINING loss 0.9549718755645991 
EPOCH 22: TRAINING loss 0.9493828446844073 
EPOCH 23: TRAINING loss 0.9484991738359535 