In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as scheduler
from torch.utils.data import DataLoader, Dataset
import json
from Models.Models import AutoEncoder

In [2]:
class VectorDataset(Dataset):
    """
    reads user or item vector datasets
    """
    def __init__(self, file_path):
        with open(file_path, 'r') as fp:
            self.data = json.load(fp)
            self.key = list(self.data.keys())
            
    def __getitem__(self, index):
        data = self.data[self.key[index]]
        data1 = torch.Tensor(data[:143])
        data1 = self.normalize_data(data1)
        #data2 = torch.Tensor(data[143:])
        #data2 = self.normalize_data(data2)
        #data = torch.cat((data1, data2))
        return data1
    
    def normalize_data(self, data):
        data = F.normalize(data, dim=0)
        return data
        
    def __len__(self):
        data_len = len(self.key)
        return data_len

In [3]:
test = VectorDataset('./datasets/user_vectors_tf_idf.json')

In [4]:
num_epochs = 4000
batch_size = 19000
learning_rate = 0.1
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

dataset = test
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=4)

model = AutoEncoder(input_len=143, hidden_unit=12).to(device)
criterion = nn.SmoothL1Loss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = scheduler.MultiStepLR(optimizer, [1000, 2000, 3000], gamma=0.25)

loss = []

for epoch in range(num_epochs):
    running_loss = 0.0
    for data in dataloader:
        scheduler.step()
        data = data.to(device)
        # ===================forward=====================
        output = model(data)
        loss = criterion(output, data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.8f}'
          .format(epoch + 1, num_epochs, running_loss/19000))
    #if epoch % 100 == 0:
    #    print("check")

#torch.save(model.state_dict(), './sim_autoencoder.pth')

epoch [1/4000], loss:1.11468522
epoch [2/4000], loss:1.11201862
epoch [3/4000], loss:1.10936610
epoch [4/4000], loss:1.10672752
epoch [5/4000], loss:1.10410239
epoch [6/4000], loss:1.10149121
epoch [7/4000], loss:1.09889359
epoch [8/4000], loss:1.09630954
epoch [9/4000], loss:1.09373893
epoch [10/4000], loss:1.09118163
epoch [11/4000], loss:1.08863727
epoch [12/4000], loss:1.08610635
epoch [13/4000], loss:1.08358823
epoch [14/4000], loss:1.08108318
epoch [15/4000], loss:1.07859106
epoch [16/4000], loss:1.07611149
epoch [17/4000], loss:1.07364486
epoch [18/4000], loss:1.07119090
epoch [19/4000], loss:1.06874950
epoch [20/4000], loss:1.06632065
epoch [21/4000], loss:1.06390411
epoch [22/4000], loss:1.06149973
epoch [23/4000], loss:1.05910778
epoch [24/4000], loss:1.05672782
epoch [25/4000], loss:1.05436015
epoch [26/4000], loss:1.05200440
epoch [27/4000], loss:1.04966038
epoch [28/4000], loss:1.04732828
epoch [29/4000], loss:1.04500791
epoch [30/4000], loss:1.04269940
epoch [31/4000], lo

In [6]:
torch.save(model.state_dict(), './trained_model/user_encoder_baseline.pth')