In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as scheduler
from torch.utils.data import DataLoader, Dataset
import json

In [2]:
class VectorDataset(Dataset):
    """
    reads user or item vector datasets
    """
    def __init__(self, file_path):
        with open(file_path, 'r') as fp:
            self.data = json.load(fp)
            self.key = list(self.data.keys())
            
    def __getitem__(self, index):
        data = self.data[self.key[index]]
        data1 = torch.Tensor(data[:143])
        #data1 = self.normalize_data(data1)
        #data2 = torch.Tensor(data[143:])
        #data2 = self.normalize_data(data2)
        #data = torch.cat((data1, data2))
        return data1
    
    def normalize_data(self, data):
        data = F.normalize(data, dim=0)
        return data
        
    def __len__(self):
        data_len = len(self.key)
        return data_len

In [3]:
class WinRateDataset(Dataset):
    """
    data : user_vector, item_vector
    label : win_rate
    """
    def __init__(self, user_path, item_path, label_path):
        # get user_vector
        with open(user_path, 'r') as up:
            self.user = json.load(fp)
            self.user_key = list(self.data.keys())
        
        # get item_vector
        with open(item_path, 'r') as ip:
            self.item = json.load(ip)
        
        # get label
        with open(label_path, 'r') as lp:
            self.label = json.load(lp)
            
        # build dataset
        self.data = []
        for user in self.user_key:
            for champ in self.label['user_name']['champion_history'].values():
                
        
    def __getitem__(self, index):
        user_data = self.user[self.key[index]]
        item_data = self.item[self.key[index]]
        

IndentationError: expected an indented block (<ipython-input-3-f45fb0286cd5>, line 26)

In [4]:
class AutoEncoder(nn.Module):
    """
    A simple AutoEncoder. Manually add activation at the last part of decoder if needed.
    """
    
    def __init__(self, input_len, hidden_unit, activation=False):
        super(AutoEncoder, self).__init__()
        self.intermediate = int((input_len+hidden_unit)/2)
        self.encoder = nn.Sequential(
            nn.Linear(input_len, self.intermediate),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.intermediate, hidden_unit))

        self.decoder = nn.Sequential(
            nn.Linear(hidden_unit, self.intermediate),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.intermediate, input_len))
        if activation:
            self.decoder.add_module('act', activation)
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [5]:
class Predictor(nn.Module):
    """
    A prediction model for winning rate. Given user and item vector, predicts winning rate with a scalar value.
    """
    
    def __init__(self, user_len, item_len, hidden_unit):
        super(Predictor, self).__init__()
        self.layer1 = nn.Linear(user_len+item_len, hidden_unit)
        self.relu = nn.ReLU(inplace=True)
        self.layer2 = nn.Linear(hidden_unit, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, user, item):
        x = torch.cat((user, item), dim=1)
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

In [6]:
test = VectorDataset('./data_batch/user_vectors_tf_idf.json')

In [7]:
num_epochs = 2000
batch_size = 64
learning_rate = 1e-3
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

dataset = test
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = AutoEncoder(input_len=143, hidden_unit=32).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = scheduler.MultiStepLR(optimizer, [500, 1000, 1500], gamma=0.5)

loss = []

for epoch in range(num_epochs):
    running_loss = 0.0
    for data in dataloader:
        scheduler.step()
        data = data.to(device)
        # ===================forward=====================
        output = model(data)
        loss = criterion(output, data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.8f}'
          .format(epoch + 1, num_epochs, running_loss/19000))
    #if epoch % 100 == 0:
    #    print("check")

#torch.save(model.state_dict(), './sim_autoencoder.pth')

epoch [1/2000], loss:0.00275289
epoch [2/2000], loss:0.00217233
epoch [3/2000], loss:0.00199135
epoch [4/2000], loss:0.00188200
epoch [5/2000], loss:0.00182361
epoch [6/2000], loss:0.00178403
epoch [7/2000], loss:0.00176267
epoch [8/2000], loss:0.00174350
epoch [9/2000], loss:0.00172532
epoch [10/2000], loss:0.00170774
epoch [11/2000], loss:0.00169089
epoch [12/2000], loss:0.00167503
epoch [13/2000], loss:0.00165969
epoch [14/2000], loss:0.00164423
epoch [15/2000], loss:0.00162969
epoch [16/2000], loss:0.00161539
epoch [17/2000], loss:0.00160194
epoch [18/2000], loss:0.00158864
epoch [19/2000], loss:0.00157556
epoch [20/2000], loss:0.00156318
epoch [21/2000], loss:0.00155111
epoch [22/2000], loss:0.00153966
epoch [23/2000], loss:0.00152809
epoch [24/2000], loss:0.00151727
epoch [25/2000], loss:0.00150669
epoch [26/2000], loss:0.00149624
epoch [27/2000], loss:0.00148623
epoch [28/2000], loss:0.00147663
epoch [29/2000], loss:0.00146728
epoch [30/2000], loss:0.00145833
epoch [31/2000], lo