In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as scheduler
from torch.utils.data import DataLoader, Dataset
import json
from Models.Models import AutoEncoder

In [2]:
class VectorDataset(Dataset):
    """
    reads user or item vector datasets
    """
    def __init__(self, file_path):
        with open(file_path, 'r') as fp:
            self.data = json.load(fp)
            self.key = list(self.data.keys())
            
    def __getitem__(self, index):
        data = self.data[self.key[index]]
        data1 = torch.Tensor(data)
        #data1 = self.normalize_data(data1)
        #data2 = torch.Tensor(data[143:])
        #data2 = self.normalize_data(data2)
        #data = torch.cat((data1, data2))
        return data1
    
    def normalize_data(self, data):
        data = F.normalize(data, dim=0)
        return data
        
    def __len__(self):
        data_len = len(self.key)
        return data_len

In [3]:
test = VectorDataset('./datasets/item_vectors_tf_idf.json')

In [4]:
num_epochs = 4000
batch_size = 1
learning_rate = 1e-3
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

dataset = test
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=15)

model = AutoEncoder(input_len=143, hidden_unit=24).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = scheduler.MultiStepLR(optimizer, [1000, 2000, 3000], gamma=0.5)

loss = []

for epoch in range(num_epochs):
    running_loss = 0.0
    for data in dataloader:
        scheduler.step()
        data = data.to(device)
        # ===================forward=====================
        output = model(data)
        loss = criterion(output, data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.8f}'
          .format(epoch + 1, num_epochs, running_loss/143))
    #if epoch % 100 == 0:
    #    print("check")

#torch.save(model.state_dict(), './sim_autoencoder.pth')

epoch [1/4000], loss:0.01409573
epoch [2/4000], loss:0.01252572
epoch [3/4000], loss:0.01101569
epoch [4/4000], loss:0.00981838
epoch [5/4000], loss:0.00893671
epoch [6/4000], loss:0.00821038
epoch [7/4000], loss:0.00766903
epoch [8/4000], loss:0.00700243
epoch [9/4000], loss:0.00670451
epoch [10/4000], loss:0.00648631
epoch [11/4000], loss:0.00629381
epoch [12/4000], loss:0.00612419
epoch [13/4000], loss:0.00597209
epoch [14/4000], loss:0.00584713
epoch [15/4000], loss:0.00558289
epoch [16/4000], loss:0.00550104
epoch [17/4000], loss:0.00544382
epoch [18/4000], loss:0.00537971
epoch [19/4000], loss:0.00532746
epoch [20/4000], loss:0.00527413
epoch [21/4000], loss:0.00522209
epoch [22/4000], loss:0.00510480
epoch [23/4000], loss:0.00506745
epoch [24/4000], loss:0.00504545
epoch [25/4000], loss:0.00501573
epoch [26/4000], loss:0.00499344
epoch [27/4000], loss:0.00497136
epoch [28/4000], loss:0.00494214
epoch [29/4000], loss:0.00492161
epoch [30/4000], loss:0.00489393
epoch [31/4000], lo

In [5]:
torch.save(model.state_dict(), './trained_model/item_encoder.pth')