In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import DataLoader, Dataset, Subset
from utils import champ_id_remap, global_win_rate
import json
import math
import random
from Models.Models import AutoEncoder, Predictor, Classifier

global_win_rate = global_win_rate()

check = 0

class WinRateDataset(Dataset):
    """
    data : user_vector, item_vector
    label : win_rate
    """
    def __init__(self, user_path, item_path, label_path, global_win_rate):
        self.check = 0
        self.user_encoder = AutoEncoder(143, 12)
        self.user_encoder.load_state_dict(torch.load('./trained_model/user_encoder_augmented_new.pth'))
        self.item_encoder = AutoEncoder(143, 8)
        self.item_encoder.load_state_dict(torch.load('./trained_model/item_encoder2.pth'))
        self.champ_id_remap = champ_id_remap()
        
        # get user_vector
        with open(user_path, 'r') as up:
            self.user = json.load(up)
        
        # get item_vector
        with open(item_path, 'r') as ip:
            self.item = json.load(ip)
        
        # get label
        with open(label_path, 'r') as lp:
            self.label = json.load(lp)
            
        # build dataset
        self.data = []
        with torch.no_grad():
            for i, (user, user_set) in enumerate(self.user.items()):
                for excluded_champ, augdataDTO in user_set.items():                        
                    play_count = augdataDTO[0]

                    if play_count >= 5:
                        win_rate_label = augdataDTO[1]
                        user_winrate = augdataDTO[2]
                        user_vec = augdataDTO[3]
                        
                        check_list = []
                        for i, item in enumerate(user_vec):
                            if item != 0:
                                check_list.append(i)
                                
                        item_vec = self.item[str(excluded_champ)]
                        user_vec = torch.Tensor(user_vec)
                        item_vec = torch.Tensor(item_vec)

                        user_vec = self.user_encoder.encoder(user_vec)
                        item_vec = self.item_encoder.encoder(item_vec)
                        user_vec = F.normalize(user_vec, dim=0)
                        item_vec = F.normalize(item_vec, dim=0)
                        self.data.append(((user_vec, item_vec), 1))
                        self.check += 1
                        
                        a = random.randint(0, 143)
                        if a not in check_list:
                            item_vec = self.item[str(i)]
                            item_vec = torch.Tensor(item_vec)
                            item_vec = self.item_encoder.encoder(item_vec)
                            item_vec = F.normalize(item_vec, dim=0)
                            self.data.append(((user_vec, item_vec), 0))

                                
    def __getitem__(self, index):
        user_vec = self.data[index][0][0]
        item_vec = self.data[index][0][1]
        label = self.data[index][1]
        return (user_vec, item_vec), label
                                     
    def __len__(self):
        length = len(self.data)
        return length


In [2]:
user_path = './datasets/user_vectors_tf_idf_excluding.json'
item_path = './datasets/item_vectors_tf_idf.json'
label_path = './data_batch/userbatch.json'

dataset = WinRateDataset(user_path, item_path, label_path, global_win_rate)

total_data = len(dataset)

train_list = [x for x in range(len(dataset))]
valid_list = train_list[-5000:]

train_list = list(set(train_list)-set(valid_list))

train_set = Subset(dataset, train_list)
valid_set = Subset(dataset, valid_list)

In [3]:
len(train_set)

175047

In [4]:
dataset.check

91835

In [8]:
num_epochs = 20
batch_size = 128
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
learning_rate = 0.001

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=15)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, num_workers=4)

model = Classifier(user_len=12, item_len=8, hidden_unit=15).to(device)
#model.load_state_dict(torch.load('./trained_model/predictor_last_m.pth'))
criterion = nn.CrossEntropyLoss(torch.Tensor([1, 3]).to(device))
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = MultiStepLR(optimizer, [25, 50, 75, 100], gamma=0.25)

loss = []
best_model_wts = None
best_loss = 100

for epoch in range(num_epochs):
    train_loss = 0.0
    train_acc = 0.0
    count = 0
    for (user_vec, item_vec),label in train_loader:
        scheduler.step()
        user_vec = user_vec.to(device)
        item_vec = item_vec.to(device)
        label = label.long().to(device)
        # ===================forward=====================
        output = model(user_vec, item_vec)
        #print(output.shape)
        #print(label.shape)
        loss = criterion(output, label)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, pred = torch.max(output, dim=1)
        train_acc += (pred==label).sum().item()
        count += label.size(0)
        #print(loss.item()/user_vec.size(0))
    # ===================log========================
    print('train epoch [{}/{}], loss:{:.8f}, acc:{:.4f}'
          .format(epoch + 1, num_epochs, train_loss/count, train_acc/count))

    valid_loss = 0.0
    valid_acc = 0.0
    count = 0
    
    if (epoch+1) % 4 == 0 and epoch > 1:
        print('---------------------------')
        for (user_vec, item_vec), label in valid_loader:
            scheduler.step()
            user_vec = user_vec.to(device)
            item_vec = item_vec.to(device)
            label = label.long().to(device)
            # ===================forward=====================
            output = model(user_vec, item_vec)
            loss = criterion(output, label)
            # ===================backward====================
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            valid_loss += loss.item()
            _, pred = torch.max(output, dim=1)
            valid_acc += (pred==label).sum().item()
            count += label.size(0)
            if valid_loss < best_loss:
                print('best model so far!')
                best_loss = valid_loss
                best_model_wts = model.state_dict()
        # ===================log========================
        print('valid epoch, loss:{:.8f}, acc:{:.4f}'
              .format(valid_loss/count, valid_acc/count))
        print('----------------------------')
        

train epoch [1/20], loss:0.00429472, acc:0.5101
train epoch [2/20], loss:0.00421111, acc:0.5101
train epoch [3/20], loss:0.00413537, acc:0.5101
train epoch [4/20], loss:0.00406059, acc:0.5101
---------------------------
best model so far!
valid epoch, loss:0.00412032, acc:0.5092
----------------------------
train epoch [5/20], loss:0.00398378, acc:0.5101
train epoch [6/20], loss:0.00391000, acc:0.5101
train epoch [7/20], loss:0.00383742, acc:0.5101
train epoch [8/20], loss:0.00376604, acc:0.5101
---------------------------
best model so far!
valid epoch, loss:0.00382720, acc:0.5092
----------------------------
train epoch [9/20], loss:0.00369319, acc:0.5101
train epoch [10/20], loss:0.00362442, acc:0.5101
train epoch [11/20], loss:0.00355590, acc:0.5101
train epoch [12/20], loss:0.00348826, acc:0.5101
---------------------------
best model so far!
valid epoch, loss:0.00355154, acc:0.5092
----------------------------
train epoch [13/20], loss:0.00341938, acc:0.5101
train epoch [14/20], 

In [9]:
torch.save(model.state_dict(), './trained_model/classifier_last_m.pth')