In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as scheduler
from torch.utils.data import DataLoader, Dataset
import json
from Models.Models import AutoEncoder

class VectorDataset(Dataset):
    """
    reads user or item vector datasets
    """
    def __init__(self, file_path):
        with open(file_path, 'r') as fp:
            self.data = json.load(fp)
            self.dataset = []
            for user, user_subset in self.data.items():
                for key, vector in user_subset.items():
                    vector = vector[3]
                    for i, item in enumerate(vector):
                        if item != 0:
                            vector[i] = 1
                    vector = torch.Tensor(vector)
                    #vector = self.normalize_data(vector)
                    self.dataset.append(vector)
            
    def __getitem__(self, index):
        data = self.dataset[index]
        return data
    
    def normalize_data(self, data):
        data = F.normalize(data, dim=0)
        return data
        
    def __len__(self):
        data_len = len(self.dataset)
        return data_len
    

test = VectorDataset('./datasets/user_vectors_tf_idf_excluding.json')

In [2]:
num_epochs = 450
batch_size = 512
learning_rate = 0.001
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

dataset = test
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=4)

model = AutoEncoder(input_len=143, hidden_unit=12).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = scheduler.MultiStepLR(optimizer, [150, 250, 350, 450], gamma=0.25)

loss = []

for epoch in range(num_epochs):
    running_loss = 0.0
    for data in dataloader:
        scheduler.step()
        data = data.to(device)
        # ===================forward=====================
        output = model(data)
        loss = criterion(output, data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.8f}'
          .format(epoch + 1, num_epochs, running_loss/19000))


epoch [1/450], loss:0.00040200
epoch [2/450], loss:0.00034401
epoch [3/450], loss:0.00034306
epoch [4/450], loss:0.00034268
epoch [5/450], loss:0.00034239
epoch [6/450], loss:0.00034207
epoch [7/450], loss:0.00034171
epoch [8/450], loss:0.00034141
epoch [9/450], loss:0.00034099
epoch [10/450], loss:0.00034073
epoch [11/450], loss:0.00034037
epoch [12/450], loss:0.00034006
epoch [13/450], loss:0.00033967
epoch [14/450], loss:0.00033939
epoch [15/450], loss:0.00033905
epoch [16/450], loss:0.00033867
epoch [17/450], loss:0.00033836
epoch [18/450], loss:0.00033809
epoch [19/450], loss:0.00033770
epoch [20/450], loss:0.00033746
epoch [21/450], loss:0.00033719
epoch [22/450], loss:0.00033683
epoch [23/450], loss:0.00033650
epoch [24/450], loss:0.00033621
epoch [25/450], loss:0.00033589
epoch [26/450], loss:0.00033563
epoch [27/450], loss:0.00033533
epoch [28/450], loss:0.00033501
epoch [29/450], loss:0.00033470
epoch [30/450], loss:0.00033438
epoch [31/450], loss:0.00033412
epoch [32/450], l

In [4]:
torch.save(model.state_dict(), './trained_model/user_encoder_augmented_new.pth')