In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("tuannguyenvananh/pokemon-dataset-with-team-combat")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/pokemon-dataset-with-team-combat


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

pokemon = pd.read_csv(path + "/pokemon.csv", encoding='latin1')
combat = pd.read_csv(path + "/combats.csv", encoding='latin1')

#Normalize numeric values
names = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
for col in names:
    pokemon[col] = (pokemon[col] - pokemon[col].min()) / (pokemon[col].max() - pokemon[col].min())

pokemon = pd.get_dummies(pokemon, columns=['Type 1', ], dtype=int)
pokemon = pd.get_dummies(pokemon, columns=['Type 2', ], dtype=int)

#Dropping Features and encoding True/False
pokemon = pokemon.drop('Name', axis=1)
# pokemon = pokemon.drop('Type 2', axis=1) #Dropping type2 to reduce dimensionality of the data
pokemon = pokemon.drop('Generation', axis=1)
pokemon['Legendary'] = pokemon['Legendary'].astype(int)
y = combat['Winner']
combat=combat.drop('Winner', axis=1)

In [4]:
# Encode y as true or false. True if the first pokemon wins and false if the second pokemon wins.
for i in range(len(combat)):
    if y[i] == combat['First_pokemon'][i]:
        y[i]=1
    else:
        y[i]=0

In [5]:
cols = pokemon.drop('#', axis=1).columns
merged1 = combat.merge(pokemon, left_on='First_pokemon', right_on='#', how='left')
merged1 = merged1.drop(columns=['#'])
merged1.columns = ['First_pokemon', 'Second_pokemon'] + [col + '1' for col in cols]

# Step 3: Merge with pokemon data for Second_pokemon
merged2 = merged1.merge(pokemon, left_on='Second_pokemon', right_on='#', how='left')
merged2 = merged2.drop(columns=['#', 'First_pokemon', 'Second_pokemon'])
merged2.columns = merged1.columns.tolist()[2:] + [col + '2' for col in cols]

# Final DataFrame
df = merged2
# df.head()

In [6]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)

In [6]:
model = MLPRegressor(random_state=42, max_iter=10000, solver='sgd')
model.fit(X_train, y_train)

model.predict(X_test)

model1_accuracy = model.score(X_test, y_test)
print(f"Initial accuracy using 'sgd': {model1_accuracy}")

model2 = MLPRegressor(random_state=42, max_iter=10000, solver='adam')
model2.fit(X_train, y_train)

model2.predict(X_test)

model2_accuracy = model2.score(X_test, y_test)
print(f"Initial accuracy using 'adam': {model2_accuracy}")


Initial accuracy using 'sgd': 0.4920480560709516
Initial accuracy using 'adam': 0.8444461039292592


For large datasets, the "solver" used could either be 'sgd' (Stochastic Gradient Descent) or 'adam' (a Stochastic Gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba). 'adam' seems to both converge faster and achieve higher accuracy, so we will use 'adam' for hyperparameter tuning.

This matches sklearn's recommendations for model tuning: https://scikit-learn.org/stable/auto_examples/neural_networks/plot_mlp_training_curves.html#sphx-glr-auto-examples-neural-networks-plot-mlp-training-curves-py

In [7]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,)],
    'learning_rate_init': [0.001, 0.005],
    'momentum': [0.5, 0.9],
    'max_iter': [1000, 5000]
}

# perform grid search with cross-validation
grid_search = GridSearchCV(MLPRegressor(solver='adam'), param_grid, cv=2, verbose=3)
grid_result = grid_search.fit(X_train, y_train)

# print results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, std, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, std, param))

Fitting 2 folds for each of 24 candidates, totalling 48 fits
[CV 1/2] END hidden_layer_sizes=(50,), learning_rate_init=0.001, max_iter=1000, momentum=0.5;, score=0.789 total time=  15.7s
[CV 2/2] END hidden_layer_sizes=(50,), learning_rate_init=0.001, max_iter=1000, momentum=0.5;, score=0.791 total time=  19.3s
[CV 1/2] END hidden_layer_sizes=(50,), learning_rate_init=0.001, max_iter=1000, momentum=0.9;, score=0.771 total time=  14.2s
[CV 2/2] END hidden_layer_sizes=(50,), learning_rate_init=0.001, max_iter=1000, momentum=0.9;, score=0.795 total time=  17.2s
[CV 1/2] END hidden_layer_sizes=(50,), learning_rate_init=0.001, max_iter=5000, momentum=0.5;, score=0.802 total time=  17.3s
[CV 2/2] END hidden_layer_sizes=(50,), learning_rate_init=0.001, max_iter=5000, momentum=0.5;, score=0.796 total time=  14.9s
[CV 1/2] END hidden_layer_sizes=(50,), learning_rate_init=0.001, max_iter=5000, momentum=0.9;, score=0.790 total time=  14.7s
[CV 2/2] END hidden_layer_sizes=(50,), learning_rate_init

The hyperparameter tuning revealed that lower layer amounts, higher learning rates, higher momentum, and higher amounts of max iterations led to the greatest model accuracy at 0.812239.

In [18]:
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error, average_precision_score

best_model = grid_result.best_estimator_
best_model_pred = best_model.predict(X_test)

print(f"Best Model's R2 score: {r2_score(y_test, best_model_pred)}")
print(f"Best Model's MSE score: {mean_squared_error(y_test, best_model_pred)}")
print(f"Best Model's Average Precision: {average_precision_score(y_test, best_model_pred)}")

Best Model's R2 score: 0.8409056947161535
Best Model's MSE score: 0.039676514476251014
Best Model's Average Precision: 0.9896568644064874


The Grid Search's best model performs really well when it comes to multiple evaluatory metrics. These hyperparameters likely suit an MLP Regressor the best when training on this data.