In [None]:
####################################################
# Description: Pokemon battle Winner Predict Model #
# Author : KJ                                      #
# Created Date : 2018-11-21                        #
####################################################

In [27]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

from torch.utils.data import TensorDataset, DataLoader
from torch import optim

In [29]:
class Net(nn.Module):
    # Neual Network Model to predict which pokémon will win in Pokémon battle 
    def __init__(self):
        super(Net, self).__init__()
        
        def init_weights(m):
            if type(m) == nn.Linear:
                torch.nn.init.xavier_uniform(m.weight)
                m.bias.data.fill_(0.01)
                
        self.model = nn.Sequential(
            nn.Linear(18, 100),
            nn.ReLU(),
            nn.Linear(100, 1000),
            nn.ReLU(),
            nn.Linear(1000, 100),
            nn.ReLU(),
            nn.Linear(100, 1),
            nn.Sigmoid()
        )
        
        self.model.apply(init_weights)
        
    def forward(self, x):
        
        return self.model(x)

In [35]:
def train(model, train_loader, optimizer, epoch):
    # training function
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        
        # TODO : find the best loss function
        criterion = nn.MSELoss(size_average=False) 
        loss = criterion(output, target)
        loss.backward()
        
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, test_loader):
    # testing function
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss +=  (output - target).pow(2).sum()
            pred = output[:, 0] < 0.5
            real = target[:, 0] < 0.5

            correct += (pred == real).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [36]:
TYPES = { np.nan : 0,
         'Normal' : 1, 
         'Fire' : 2, 
         'Water' : 3, 
         'Electric' : 4, 
         'Grass' : 5, 
         'Ice' : 6, 
         'Fighting' : 7, 
         'Poison' : 8, 
         'Ground' : 9, 
         'Flying' : 10, 
         'Psychic' : 11, 
         'Bug' : 12, 
         'Rock' : 13, 
         'Ghost' : 14, 
         'Dragon' : 15, 
         'Dark' : 16, 
         'Steel' : 17, 
         'Fairy' : 18 }

COLUMNS = [
        'Winner', #1
    
        'Legendary_x', #2
        'Legendary_y', #3
    
        'Type 1_x', #4
        'Type 2_x', #5
        'HP_x', #6
        'Attack_x', #7 
        'Defense_x', #8
        'Sp. Atk_x', #9
        'Sp. Def_x', #10
        'Speed_x', #11
    
        'Type 1_y', #12
        'Type 2_y', #13
        'HP_y', #14
        'Attack_y', #15
        'Defense_y', #16
        'Sp. Atk_y', #17
        'Sp. Def_y', #18
        'Speed_y' #19
]

In [37]:
class PokemonData():
    
    def __init__(self, combats_file_path='./data/combats.csv', pokemon_file_path='./data/pokemon.csv'):
        self.combat_file_path = combats_file_path
        self.pokemon_file_path = pokemon_file_path
        
    def get_prepared_data_set(self, train_path='', test_path=''):
        
        try:
            train_tensor = torch.load(path)
            self.train_loader = DataLoader(train_tensor, batch_size=8, shuffle=True, num_workers = 2)
        except Exception as e:
            raise Exception('Invaild train tensor path')
        
        print('Train data loading Complete!')
        
        try:
            test_tensor = torch.load(path)
            self.test_loader = DataLoader(test_tensor, batch_size=8, shuffle=False, num_workers = 2)
        except Exception as e:
            raise Exception('Invaild test tensor path')
            
        print('Test data loading Complete!')
        
    def read_file(self):
        try:
            combats = pd.read_csv(self.combat_file_path)
        except Exception as e:
            raise Exception('Invaild combat file path')
        
        print('Train data loading Complete!')
        
        try:
            pokemons = pd.read_csv(self.pokemon_file_path)
        except Exception as e:
            raise Exception('Invaild pokemon file path')
            
        print('Test data loading Complete!')
        
        return combats, pokemons

    def preprocessing(self, save=True):
        combats, pokemons = self.read_file()
            
        df = pd.merge(combats, pokemons, left_on='First_pokemon', right_on='#')
        df = pd.merge(df, pokemons, left_on='Second_pokemon', right_on='#')

        winner = df['Winner']
        df['Winner'] = np.where(winner == df['First_pokemon'], 1.0, 0.0)

        df.replace({   "Type 1_x": TYPES, 
                       "Type 2_x": TYPES,
                       "Type 1_y": TYPES,
                       "Type 2_y": TYPES }, inplace=True)

        normalization = {
            'Legendary_x' : 0, #3 
            'Legendary_y' : 0, #4
            
            'Type 1_x' : 0, #5
            'Type 2_x' : 0, #6
            'HP_x' : 0, #7
            'Attack_x' : 0, #8 
            'Defense_x' : 0, #9
            'Sp. Atk_x' : 0, #10
            'Sp. Def_x' : 0, #11
            'Speed_x' : 0, #12

            'Type 1_y' : 0, #13
            'Type 2_y' : 0, #14
            'HP_y' : 0, #15
            'Attack_y' : 0, #16
            'Defense_y' : 0, #17
            'Sp. Atk_y' : 0, #18
            'Sp. Def_y' : 0, #19
            'Speed_y' : 0 #20
        }
        
        # normalize
        for column in COLUMNS[3:]:
            normalization[column] = {
                'mean' : df[column].mean(),
                'max' : df[column].max(),
                'min' : df[column].min()
            }
            df[column] = (df[column] - df[column].mean()) / (df[column].max() - df[column].min())


        train_set = df.loc[:len(df) * 0.6 - 1, COLUMNS[1:]]
        train_label = df.loc[:len(df) * 0.6 - 1, COLUMNS[:1]]

        test_set = df.loc[len(df) * 0.6 : , COLUMNS[1:]]
        test_label =  df.loc[len(df) * 0.6 : , COLUMNS[:1]]

        train_set = torch.tensor(train_set.values.astype(np.float), dtype=torch.float,requires_grad=True)
        train_label = torch.tensor(train_label.values.astype(np.float), dtype=torch.float,requires_grad=True)
        train_tensor = TensorDataset(train_set, train_label)

        test_set = torch.tensor(test_set.values.astype(np.float), dtype=torch.float,requires_grad=False)
        test_label = torch.tensor(test_label.values.astype(np.float), dtype=torch.float,requires_grad=False)
        test_tensor = TensorDataset(test_set, test_label)
        
        if save == True:
            torch.save(train_tensor, 'train_set')
            torch.save(test_tensor, 'test_set')
            
        self.train_loader = DataLoader(train_tensor, batch_size=8, shuffle=True, num_workers = 2)
        self.test_loader = DataLoader(test_tensor, batch_size=8, shuffle=False, num_workers = 2)
    
    def postprocessing(self, save=True):
        pass
    

In [38]:
def main():
    pokemon_data = PokemonData()
    pokemon_data.preprocessing()
    # Or if you want to use prepared data
    # pokemon_data.get_prepared_data
    
    model = Net()
    
    # TODO : find appropriate learning rate and optimizer
    optimizer = optim.Adam(model.parameters(), lr=1.0e-5)

    epochs = 200

    for epoch in range(1, epochs + 1):
        train(model,  pokemon_data.train_loader, optimizer, epoch)
        test(model, pokemon_data.test_loader)

    torch.save(model.state_dict(), './model')

In [None]:
if __name__ == '__main__':
    main()

Train data loading Complete!
Test data loading Complete!


  







Test set: Average loss: 0.1341, Accuracy: 16675/20000 (83%)






Test set: Average loss: 0.1057, Accuracy: 17577/20000 (88%)






Test set: Average loss: 0.0935, Accuracy: 17945/20000 (90%)






Test set: Average loss: 0.0872, Accuracy: 18089/20000 (90%)








Test set: Average loss: 0.0827, Accuracy: 18214/20000 (91%)






Test set: Average loss: 0.0799, Accuracy: 18274/20000 (91%)






Test set: Average loss: 0.0777, Accuracy: 18313/20000 (92%)






Test set: Average loss: 0.0762, Accuracy: 18360/20000 (92%)






Test set: Average loss: 0.0750, Accuracy: 18376/20000 (92%)








Test set: Average loss: 0.0737, Accuracy: 18418/20000 (92%)






Test set: Average loss: 0.0729, Accuracy: 18430/20000 (92%)






Test set: Average loss: 0.0721, Accuracy: 18464/20000 (92%)






Test set: Average loss: 0.0716, Accuracy: 18470/20000 (92%)








Test set: Average loss: 0.0709, Accuracy: 18485/20000 (92%)






Test set: Average loss: 0.0707, Accuracy: 18491/20000 (92%)






Test set: Average loss: 0.0699, Accuracy: 18501/20000 (93%)






Test set: Average loss: 0.0695, Accuracy: 18502/20000 (93%)








Test set: Average loss: 0.0692, Accuracy: 18521/20000 (93%)






Test set: Average loss: 0.0689, Accuracy: 18508/20000 (93%)






Test set: Average loss: 0.0689, Accuracy: 18521/20000 (93%)








Test set: Average loss: 0.0685, Accuracy: 18534/20000 (93%)






Test set: Average loss: 0.0681, Accuracy: 18541/20000 (93%)






Test set: Average loss: 0.0679, Accuracy: 18554/20000 (93%)






Test set: Average loss: 0.0678, Accuracy: 18557/20000 (93%)








Test set: Average loss: 0.0677, Accuracy: 18552/20000 (93%)






Test set: Average loss: 0.0674, Accuracy: 18569/20000 (93%)






Test set: Average loss: 0.0674, Accuracy: 18552/20000 (93%)






Test set: Average loss: 0.0672, Accuracy: 18558/20000 (93%)








Test set: Average loss: 0.0670, Accuracy: 18587/20000 (93%)






Test set: Average loss: 0.0668, Accuracy: 18566/20000 (93%)






Test set: Average loss: 0.0669, Accuracy: 18564/20000 (93%)








Test set: Average loss: 0.0665, Accuracy: 18579/20000 (93%)






Test set: Average loss: 0.0670, Accuracy: 18558/20000 (93%)






Test set: Average loss: 0.0666, Accuracy: 18576/20000 (93%)






Test set: Average loss: 0.0662, Accuracy: 18594/20000 (93%)








Test set: Average loss: 0.0663, Accuracy: 18587/20000 (93%)






Test set: Average loss: 0.0663, Accuracy: 18589/20000 (93%)






Test set: Average loss: 0.0660, Accuracy: 18592/20000 (93%)






Test set: Average loss: 0.0659, Accuracy: 18607/20000 (93%)








Test set: Average loss: 0.0661, Accuracy: 18598/20000 (93%)






Test set: Average loss: 0.0661, Accuracy: 18598/20000 (93%)






Test set: Average loss: 0.0661, Accuracy: 18608/20000 (93%)








Test set: Average loss: 0.0657, Accuracy: 18608/20000 (93%)






Test set: Average loss: 0.0661, Accuracy: 18592/20000 (93%)






Test set: Average loss: 0.0656, Accuracy: 18612/20000 (93%)






Test set: Average loss: 0.0655, Accuracy: 18617/20000 (93%)








Test set: Average loss: 0.0653, Accuracy: 18617/20000 (93%)






Test set: Average loss: 0.0657, Accuracy: 18602/20000 (93%)






Test set: Average loss: 0.0654, Accuracy: 18610/20000 (93%)






Test set: Average loss: 0.0656, Accuracy: 18617/20000 (93%)








Test set: Average loss: 0.0655, Accuracy: 18630/20000 (93%)






Test set: Average loss: 0.0653, Accuracy: 18617/20000 (93%)






Test set: Average loss: 0.0652, Accuracy: 18618/20000 (93%)






Test set: Average loss: 0.0651, Accuracy: 18622/20000 (93%)








Test set: Average loss: 0.0652, Accuracy: 18622/20000 (93%)






Test set: Average loss: 0.0652, Accuracy: 18618/20000 (93%)






Test set: Average loss: 0.0651, Accuracy: 18623/20000 (93%)








Test set: Average loss: 0.0653, Accuracy: 18605/20000 (93%)






Test set: Average loss: 0.0650, Accuracy: 18629/20000 (93%)






Test set: Average loss: 0.0650, Accuracy: 18626/20000 (93%)






Test set: Average loss: 0.0649, Accuracy: 18623/20000 (93%)








Test set: Average loss: 0.0650, Accuracy: 18631/20000 (93%)






Test set: Average loss: 0.0648, Accuracy: 18625/20000 (93%)






Test set: Average loss: 0.0649, Accuracy: 18635/20000 (93%)








Test set: Average loss: 0.0654, Accuracy: 18624/20000 (93%)






Test set: Average loss: 0.0646, Accuracy: 18630/20000 (93%)






Test set: Average loss: 0.0650, Accuracy: 18628/20000 (93%)






Test set: Average loss: 0.0649, Accuracy: 18626/20000 (93%)








Test set: Average loss: 0.0648, Accuracy: 18637/20000 (93%)






Test set: Average loss: 0.0646, Accuracy: 18638/20000 (93%)






Test set: Average loss: 0.0646, Accuracy: 18636/20000 (93%)






Test set: Average loss: 0.0648, Accuracy: 18645/20000 (93%)








Test set: Average loss: 0.0647, Accuracy: 18648/20000 (93%)






Test set: Average loss: 0.0647, Accuracy: 18633/20000 (93%)






Test set: Average loss: 0.0647, Accuracy: 18631/20000 (93%)






Test set: Average loss: 0.0651, Accuracy: 18631/20000 (93%)





