In [92]:
import pandas as pd
import random
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils.np_utils import to_categorical

### Load data

In [211]:
test = pd.read_csv('../data/tests.csv', names=['pokemon_1', 'pokemon_2'])
pokemon = pd.read_csv('../data/pokemon.csv', index_col='#')
combats = pd.read_csv('../data/combats.csv', names=['pokemon_1', 'pokemon_2', 'winner'])

Normalize column names

In [212]:
pokemon.columns = [i.lower().replace(' ', '_').replace('.', '') for i in pokemon.columns]

Normalize pokemon types

In [213]:
pokemon_types = {t: i for i, t in enumerate(set(pokemon['type_1']) | set(pokemon['type_2']))}
pokemon['type_1'] = pokemon.apply(lambda x: pokemon_types[x['type_1']], axis=1)
pokemon['type_2'] = pokemon.apply(lambda x: pokemon_types[x['type_2']], axis=1)
pokemon.head()

Unnamed: 0_level_0,name,type_1,type_2,hp,attack,defense,sp_atk,sp_def,speed,generation,legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Bulbasaur,3,18,45,49,49,65,65,45,1,False
2,Ivysaur,3,18,60,62,63,80,80,60,1,False
3,Venusaur,3,18,80,82,83,100,100,80,1,False
4,Mega Venusaur,3,18,80,100,123,122,120,80,1,False
5,Charmander,2,0,39,52,43,60,50,65,1,False


### Generate training data set

In [150]:
training = combats.merge(pokemon, left_on='pokemon_1', right_index=True).merge(pokemon, left_on='pokemon_2', right_index=True)
training['winner'] = training.apply(lambda x: 0 if x['winner'] == x['pokemon_1'] else 1, axis=1)
del training['name_x']
del training['name_y']
del training['pokemon_1']
del training['pokemon_2']
training = training.sort_index()
training.head()

Unnamed: 0,winner,type_1_x,type_2_x,hp_x,attack_x,defense_x,sp_atk_x,sp_def_x,speed_x,generation_x,...,type_1_y,type_2_y,hp_y,attack_y,defense_y,sp_atk_y,sp_def_y,speed_y,generation_y,legendary_y
0,1,16,15,50,64,50,45,50,41,2,...,3,17,70,70,40,60,40,60,3,False
1,1,3,1,91,90,72,90,129,108,5,...,16,1,91,129,90,72,90,108,5,True
2,1,12,9,55,40,85,80,105,40,2,...,4,0,75,75,75,125,95,40,5,False
3,1,2,0,40,40,40,70,40,20,2,...,5,0,77,120,90,60,90,48,5,False
4,0,16,10,70,60,125,115,70,55,1,...,13,16,20,10,230,10,230,5,2,False


In [177]:
seed = random.randint(0, 9) / 10

shape = training.shape[0]
test = training.iloc[int(shape*seed):int(shape*(seed+0.1))]
test_x, test_y = test.iloc[:, 1:], test.iloc[:, 0]

train = pd.concat([training.iloc[:int(shape*seed)], training.iloc[int(shape*(seed+0.1)):]])
train_x, train_y = train.iloc[:, 1:], train.iloc[:, 0]

train_x.shape, train_y.shape, test_x.shape, test_y.shape

((45000, 20), (45000,), (5000, 20), (5000,))

In [203]:
model = Sequential()
model.add(Dense(100, input_dim=20, activation='tanh'))
model.add(Dropout(0.2))
model.add(Dense(20, activation='tanh'))
model.add(Dropout(0.2))
model.add(Dense(20, activation='tanh'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adamax',
              metrics=['accuracy'])

model.fit(train_x.values, train_y,
          epochs=200,
          batch_size=1000,
          verbose=2,
          validation_data=(test_x.values, test_y))

model.evaluate(test_x.values, test_y, batch_size=100)

Train on 45000 samples, validate on 5000 samples
Epoch 1/200
2s - loss: 0.5780 - acc: 0.6896 - val_loss: 0.4490 - val_acc: 0.7970
Epoch 2/200
0s - loss: 0.4579 - acc: 0.7928 - val_loss: 0.3833 - val_acc: 0.8374
Epoch 3/200
0s - loss: 0.4116 - acc: 0.8244 - val_loss: 0.3486 - val_acc: 0.8602
Epoch 4/200
0s - loss: 0.3835 - acc: 0.8425 - val_loss: 0.3335 - val_acc: 0.8760
Epoch 5/200
0s - loss: 0.3675 - acc: 0.8560 - val_loss: 0.3252 - val_acc: 0.8800
Epoch 6/200
0s - loss: 0.3558 - acc: 0.8622 - val_loss: 0.3148 - val_acc: 0.8880
Epoch 7/200
0s - loss: 0.3482 - acc: 0.8675 - val_loss: 0.3089 - val_acc: 0.8910
Epoch 8/200
0s - loss: 0.3368 - acc: 0.8741 - val_loss: 0.3038 - val_acc: 0.8934
Epoch 9/200
0s - loss: 0.3301 - acc: 0.8795 - val_loss: 0.2976 - val_acc: 0.8976
Epoch 10/200
0s - loss: 0.3222 - acc: 0.8819 - val_loss: 0.2885 - val_acc: 0.9022
Epoch 11/200
0s - loss: 0.3142 - acc: 0.8871 - val_loss: 0.2825 - val_acc: 0.9034
Epoch 12/200
0s - loss: 0.3092 - acc: 0.8886 - val_loss: 0

0s - loss: 0.1721 - acc: 0.9464 - val_loss: 0.1700 - val_acc: 0.9478
Epoch 102/200
0s - loss: 0.1720 - acc: 0.9458 - val_loss: 0.1697 - val_acc: 0.9502
Epoch 103/200
0s - loss: 0.1718 - acc: 0.9457 - val_loss: 0.1693 - val_acc: 0.9500
Epoch 104/200
0s - loss: 0.1714 - acc: 0.9466 - val_loss: 0.1691 - val_acc: 0.9478
Epoch 105/200
0s - loss: 0.1724 - acc: 0.9453 - val_loss: 0.1691 - val_acc: 0.9506
Epoch 106/200
0s - loss: 0.1725 - acc: 0.9461 - val_loss: 0.1694 - val_acc: 0.9482
Epoch 107/200
0s - loss: 0.1746 - acc: 0.9445 - val_loss: 0.1723 - val_acc: 0.9462
Epoch 108/200
0s - loss: 0.1713 - acc: 0.9468 - val_loss: 0.1698 - val_acc: 0.9476
Epoch 109/200
0s - loss: 0.1706 - acc: 0.9478 - val_loss: 0.1695 - val_acc: 0.9506
Epoch 110/200
0s - loss: 0.1698 - acc: 0.9476 - val_loss: 0.1676 - val_acc: 0.9494
Epoch 111/200
0s - loss: 0.1708 - acc: 0.9479 - val_loss: 0.1672 - val_acc: 0.9502
Epoch 112/200
0s - loss: 0.1703 - acc: 0.9461 - val_loss: 0.1680 - val_acc: 0.9474
Epoch 113/200
0s -

0s - loss: 0.1645 - acc: 0.9500 - val_loss: 0.1648 - val_acc: 0.9502

[0.16481063276529312, 0.95019999980926517]

## Output

Generate output dataframe

In [215]:
data = test.merge(pokemon, left_on='pokemon_1', right_index=True).merge(pokemon, left_on='pokemon_2', right_index=True)
del data['name_x']
del data['name_y']
del data['pokemon_1']
del data['pokemon_2']
data = data.sort_index()
data.head()

Unnamed: 0,type_1_x,type_2_x,hp_x,attack_x,defense_x,sp_atk_x,sp_def_x,speed_x,generation_x,legendary_x,type_1_y,type_2_y,hp_y,attack_y,defense_y,sp_atk_y,sp_def_y,speed_y,generation_y,legendary_y
0,10,0,80,92,65,65,80,68,1,False,7,0,90,55,75,60,75,30,1,False
1,3,6,74,94,131,54,116,20,5,False,10,15,95,85,85,65,65,35,2,False
2,11,9,79,105,70,145,80,101,5,True,1,0,50,120,53,35,110,87,1,False
3,11,0,70,55,55,80,60,45,2,False,3,0,75,86,67,106,67,60,5,False
4,7,9,40,60,30,31,31,70,1,False,10,0,165,75,80,40,45,65,5,False


In [218]:
output = test.copy()
output['result'] = pd.Series(model.predict(data.values)[:,0])
output['result'] = output.apply(lambda x: int(round(x['result'], 0) + 1), axis=1)
output.to_csv('../data/output.csv', index=False)