In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as scheduler
from torch.utils.data import DataLoader, Dataset
import json
from Models.Models import AutoEncoder

class VectorDataset(Dataset):
    """
    reads user or item vector datasets
    """
    def __init__(self, file_path):
        with open(file_path, 'r') as fp:
            self.data = json.load(fp)
            self.key = list(self.data.keys())
            
    def __getitem__(self, index):
        data = self.data[self.key[index]]
        data = torch.Tensor(data)
        #data1 = self.normalize_data(data1)
        #data2 = torch.Tensor(data[143:])
        #data2 = self.normalize_data(data2)
        #data = torch.cat((data1, data2))
        return data
    
    def normalize_data(self, data):
        data = F.normalize(data, dim=0)
        return data
        
    def __len__(self):
        data_len = len(self.key)
        return data_len
    

test = VectorDataset('./datasets/item_vectors_tf_idf.json')


num_epochs = 200
batch_size = 143
learning_rate = 0.001
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

dataset = test
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

model = AutoEncoder(input_len=143, hidden_unit=8).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = scheduler.MultiStepLR(optimizer, [50, 100, 150], gamma=0.25)

loss = []

for epoch in range(num_epochs):
    running_loss = 0.0
    for data in dataloader:
        scheduler.step()
        data = data.to(device)
        # ===================forward=====================
        output = model(data)
        loss = criterion(output, data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.8f}'
          .format(epoch + 1, num_epochs, running_loss/143))
    


epoch [1/200], loss:0.00016564
epoch [2/200], loss:0.00015713
epoch [3/200], loss:0.00014955
epoch [4/200], loss:0.00014281
epoch [5/200], loss:0.00013684
epoch [6/200], loss:0.00013156
epoch [7/200], loss:0.00012689
epoch [8/200], loss:0.00012276
epoch [9/200], loss:0.00011909
epoch [10/200], loss:0.00011582
epoch [11/200], loss:0.00011292
epoch [12/200], loss:0.00011033
epoch [13/200], loss:0.00010803
epoch [14/200], loss:0.00010598
epoch [15/200], loss:0.00010417
epoch [16/200], loss:0.00010256
epoch [17/200], loss:0.00010114
epoch [18/200], loss:0.00009989
epoch [19/200], loss:0.00009879
epoch [20/200], loss:0.00009782
epoch [21/200], loss:0.00009696
epoch [22/200], loss:0.00009621
epoch [23/200], loss:0.00009554
epoch [24/200], loss:0.00009494
epoch [25/200], loss:0.00009440
epoch [26/200], loss:0.00009391
epoch [27/200], loss:0.00009346
epoch [28/200], loss:0.00009305
epoch [29/200], loss:0.00009268
epoch [30/200], loss:0.00009234
epoch [31/200], loss:0.00009202
epoch [32/200], l

In [2]:
torch.save(model.state_dict(), './trained_model/item_encoder2.pth')