In [3]:
from tensorflow import keras
import pandas as pd
from sklearn.model_selection import train_test_split

In [4]:
data = pd.read_csv('data/output/matches.csv')
data.head()

Unnamed: 0,winner,pokemon1_p1,pokemon2_p1,pokemon3_p1,pokemon4_p1,pokemon5_p1,pokemon6_p1,pokemon1_p2,pokemon2_p2,pokemon3_p2,pokemon4_p2,pokemon5_p2,pokemon6_p2
0,1,Dondozo,Tatsugiri,Ogerpon,Iron Hands,Ninetales,Glimmora,Weezing,Landorus,Ogerpon,Heatran,Rillaboom,Iron Hands
1,1,Tornadus,Sinistcha,Basculegion,Iron Hands,Arcanine,Ninetales,Tornadus,Arcanine,Gastrodon,Rillaboom,Chi,Iron Valiant
2,2,Urshifu,Landorus,Ogerpon,Thundurus,Rillaboom,Kingambit,Armarouge,Indeedee,Torkoal,Ursaluna,Gallade,Urshifu
3,2,Gholdengo,Roaring Moon,Rillaboom,Arcanine,Urshifu,Kingambit,Ogerpon,Dondozo,Tatsugiri,Flutter Mane,Glimmora,Urshifu
4,1,Dondozo,Tatsugiri,Rillaboom,Volcarona,Kingambit,Scizor,Arcanine,Rillaboom,Tornadus,Flutter Mane,Urshifu,Chien


In [9]:
vocabulary = set()

for col in data.columns:
    for value in data[col]:
        if isinstance(value, str):
            vocabulary.add(value)
vocabulary = list(vocabulary)
print('Vocabulary size:', len(vocabulary))
print('Vocabulary:', vocabulary)

Vocabulary size: 392
Vocabulary: ['Landorus', 'Furret', 'Gligar', 'Honchkrow', 'Sneasel', 'Drizzile', 'Altaria', 'Jolteon', 'Kricketune', 'Meowscarada', 'Munkidori', 'Yanmega', 'Maushold', 'Dragapult', 'Sableye', 'Jigglypuff', 'Flittle', 'Cresselia', 'Chimecho', 'Geodude', 'Impidimp', 'Appletun', 'Kingambit', 'Stantler', 'Spectrier', 'Sinistcha', 'Zangoose', 'Gastrodon', 'Sudowoodo', 'Decidueye', 'Gallade', 'Volcarona', 'Floatzel', 'Garchomp', 'Bisharp', 'Enamorus', 'Skiploom', 'Leavanny', 'Swablu', 'Noctowl', 'Primeape', 'Grimer', 'Luvdisc', 'Lurantis', 'Falinks', 'Palossand', 'Graveler', 'Kommo', 'Crocalor', 'Thundurus', 'Grimmsnarl', 'Lokix', 'Toxapex', 'Donphan', 'Toedscruel', 'Toxtricity', 'Blissey', 'Riolu', 'Pawniard', 'Arbok', 'Foongus', 'Pincurchin', 'Pyroar', 'Persian', 'Annihilape', 'Torterra', 'Salamence', 'Sliggoo', 'Houndoom', 'Okidogi', 'Iron Thorns', 'Skeledirge', 'Brambleghast', 'Oranguru', 'Urshifu', 'Oricorio', 'Drifblim', 'Raboot', 'Azumarill', 'Mienfoo', 'Gyarados'

In [10]:
train, test = train_test_split(data, test_size=0.2, random_state=42)

In [11]:
X_train = train.drop(columns=['winner'])
y_train = train['winner']

X_test = test.drop(columns=['winner'])
y_test = test['winner']

In [71]:
judge = keras.Sequential([
    keras.layers.StringLookup(vocabulary=vocabulary, mask_token=None),
    keras.layers.Embedding(input_dim=len(vocabulary) + 1, output_dim=16, input_length=6),
    keras.layers.GlobalAveragePooling1D(),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(1)
])

t1 = keras.Input(shape=(6,), dtype='string')
t2 = keras.Input(shape=(6,), dtype='string')

s1 = judge(t1)
s2 = judge(t2)

d = keras.layers.Subtract()([s1, s2])
d = keras.layers.Activation('sigmoid')(d)

model = keras.Model(inputs=[t1, t2], outputs=d)

In [72]:
model.compile(
    optimizer='adam',
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [73]:
X_train_p1 = X_train[['pokemon1_p1', 'pokemon2_p1', 'pokemon3_p1', 'pokemon4_p1', 'pokemon5_p1', 'pokemon6_p1']].to_numpy()
X_train_p2 = X_train[['pokemon1_p2', 'pokemon2_p2', 'pokemon3_p2', 'pokemon4_p2', 'pokemon5_p2', 'pokemon6_p2']].to_numpy()

In [74]:
model.summary()

Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_21 (InputLayer)       [(None, 6)]                  0         []                            
                                                                                                  
 input_22 (InputLayer)       [(None, 6)]                  0         []                            
                                                                                                  
 sequential_10 (Sequential)  (None, 1)                    6577      ['input_21[0][0]',            
                                                                     'input_22[0][0]']            
                                                                                                  
 subtract_10 (Subtract)      (None, 1)                    0         ['sequential_10[0][0]',

In [75]:
model.fit([X_train_p1, X_train_p2], y_train, epochs=30, verbose=2, batch_size=4)

Epoch 1/30


  output, from_logits = _get_logits(


2877/2877 - 3s - loss: 0.6886 - accuracy: 0.2361 - 3s/epoch - 1ms/step
Epoch 2/30
2877/2877 - 3s - loss: 0.6596 - accuracy: 0.2347 - 3s/epoch - 1ms/step
Epoch 3/30
2877/2877 - 3s - loss: 0.6365 - accuracy: 0.2348 - 3s/epoch - 972us/step
Epoch 4/30
2877/2877 - 3s - loss: 0.6164 - accuracy: 0.2377 - 3s/epoch - 948us/step
Epoch 5/30
2877/2877 - 4s - loss: 0.5956 - accuracy: 0.2381 - 4s/epoch - 1ms/step
Epoch 6/30
2877/2877 - 3s - loss: 0.5748 - accuracy: 0.2391 - 3s/epoch - 1ms/step
Epoch 7/30
2877/2877 - 3s - loss: 0.5553 - accuracy: 0.2400 - 3s/epoch - 935us/step
Epoch 8/30
2877/2877 - 4s - loss: 0.5312 - accuracy: 0.2421 - 4s/epoch - 1ms/step
Epoch 9/30
2877/2877 - 3s - loss: 0.5101 - accuracy: 0.2430 - 3s/epoch - 983us/step
Epoch 10/30
2877/2877 - 3s - loss: 0.4805 - accuracy: 0.2438 - 3s/epoch - 990us/step
Epoch 11/30
2877/2877 - 3s - loss: 0.4476 - accuracy: 0.2465 - 3s/epoch - 990us/step
Epoch 12/30
2877/2877 - 3s - loss: 0.4102 - accuracy: 0.2467 - 3s/epoch - 1ms/step
Epoch 13/30


<keras.src.callbacks.History at 0x28f9a28c0>

In [78]:
X_test_p1 = X_test[['pokemon1_p1', 'pokemon2_p1', 'pokemon3_p1', 'pokemon4_p1', 'pokemon5_p1', 'pokemon6_p1']].to_numpy()
X_test_p2 = X_test[['pokemon1_p2', 'pokemon2_p2', 'pokemon3_p2', 'pokemon4_p2', 'pokemon5_p2', 'pokemon6_p2']].to_numpy()

model.evaluate([X_test_p1,X_test_p2], y_test, verbose=2)

90/90 - 0s - loss: 11.8948 - accuracy: 0.2433 - 209ms/epoch - 2ms/step


  output, from_logits = _get_logits(


[11.894848823547363, 0.24330900609493256]