# ML Prediction of Pokemon Battle

##### Importing the required librairies

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

##### Importing Pokemon Dataset and display GEN1 Starters

In [2]:
pokemon = pd.read_csv('Pokemon-Battle.csv')

pokemon.head(13)

Unnamed: 0,Pokedex No.,Name,Type,Other Type,HP,Attack,Defense,Special Attack,Special Defense,Speed,Total,Generation,Legendary,Battle ID
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,318,1,0,1
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,405,1,0,2
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,525,1,0,3
3,3,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,625,1,0,4
4,4,Charmander,Fire,,39,52,43,60,50,65,309,1,0,5
5,5,Charmeleon,Fire,,58,64,58,80,65,80,405,1,0,6
6,6,Charizard,Fire,Flying,78,84,78,109,85,100,534,1,0,7
7,6,Mega Charizard X,Fire,Dragon,78,130,111,130,85,100,634,1,0,8
8,6,Mega Charizard Y,Fire,Flying,78,104,78,159,115,100,634,1,0,9
9,7,Squirtle,Water,,44,48,65,50,64,43,314,1,0,10


##### Importing Battles Dataset and display some of them

The code reads a CSV file (battles.csv). It creates a copy of the battles Dataset and then replaces the Pokemon's Battle ID in the FirstPokemon, SecondPokemon, and Winner columns with their corresponding Pokémon names from the Pokemon Dataset. This is done by creating a dictionary (unique_pokemon) to store the first occurrence of each Battle ID (To consider only one form occurence of a Pokemon's different forms) and then mapping the Pokémon names to the respective columns in the battles DataFrame.

In [3]:
battles = pd.read_csv('battles.csv')[["FirstPokemon", "SecondPokemon", "Winner"]].copy()

# Create a dictionary to store the first occurrence of each Battle ID (Avoiding Galarian and Alolan Forms)
unique_pokemon = {}
for index, row in pokemon.iterrows():
    battle_id = row["Battle ID"]
    if battle_id not in unique_pokemon:
        unique_pokemon[battle_id] = row["Name"]

# Map the indices to Pokémon names from the pokemon dataset
battles["FirstPokemon"] = battles["FirstPokemon"].map(unique_pokemon)
battles["SecondPokemon"] = battles["SecondPokemon"].map(unique_pokemon)
battles["Winner"] = battles["Winner"].map(unique_pokemon)

battles.head()

Unnamed: 0,FirstPokemon,SecondPokemon,Winner
0,Larvitar,Nuzleaf,Nuzleaf
1,Virizion,Terrakion,Terrakion
2,Togetic,Beheeyem,Beheeyem
3,Slugma,Druddigon,Druddigon
4,Omastar,Shuckle,Omastar


##### Preprocessing Data

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Merge the Pokémon dataset with the battles dataset for the first Pokémon
merged_df = pd.merge(battles, pokemon, left_on='FirstPokemon', right_on='Name', how='left')
merged_df.drop(columns=['Pokedex No.', 'Name', 'Generation', 'Battle ID', 'Type', 'Other Type'], inplace=True)

# Rename the columns to distinguish between the first and second Pokémon
merged_df.rename(columns={'HP': 'FirstHP', 'Attack': 'FirstAttack', 'Defense': 'FirstDefense',
                          'Special Attack': 'FirstSpecialAttack', 'Special Defense': 'FirstSpecialDefense',
                          'Speed': 'FirstSpeed', 'Total': 'FirstTotal', 'Legendary': 'FirstLegendary'},
                 inplace=True)

# Merge the Pokémon dataset with the battles dataset for the second Pokémon
merged_df = pd.merge(merged_df, pokemon, left_on='SecondPokemon', right_on='Name', how='left')
merged_df.drop(columns=['Pokedex No.', 'Name', 'Generation', 'Battle ID', 'Type', 'Other Type'], inplace=True)

# Rename the columns for the second Pokémon
merged_df.rename(columns={'HP': 'SecondHP', 'Attack': 'SecondAttack', 'Defense': 'SecondDefense',
                          'Special Attack': 'SecondSpecialAttack', 'Special Defense': 'SecondSpecialDefense',
                          'Speed': 'SecondSpeed', 'Total': 'SecondTotal', 'Legendary': 'SecondLegendary'},
                 inplace=True)

# Drop rows with missing Pokémon stats
merged_df.dropna(subset=['FirstHP', 'FirstAttack', 'FirstDefense', 'FirstSpecialAttack', 'FirstSpecialDefense',
                         'FirstSpeed', 'FirstTotal', 'FirstLegendary'], inplace=True)
merged_df.dropna(subset=['SecondHP', 'SecondAttack', 'SecondDefense', 'SecondSpecialAttack', 'SecondSpecialDefense',
                         'SecondSpeed', 'SecondTotal', 'SecondLegendary'], inplace=True)

# Prepare the feature matrix (X) and target variable (y)
X = merged_df[['FirstHP', 'FirstAttack', 'FirstDefense', 'FirstSpecialAttack', 'FirstSpecialDefense', 'FirstSpeed',
               'FirstTotal', 'FirstLegendary', 'SecondHP', 'SecondAttack', 'SecondDefense', 'SecondSpecialAttack',
               'SecondSpecialDefense', 'SecondSpeed', 'SecondTotal', 'SecondLegendary']]
y = merged_df['Winner']

# Perform additional preprocessing steps
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

##### Training the Random Forest Model

In [5]:
# Train and evaluate Random Forest model
print("- Random Forest Classifier :\n")
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy : ", accuracy)

- Random Forest Classifier :

Accuracy :  0.8228507249336329


##### Importing Test Data and display some of them 

In [6]:
tests = pd.read_csv('tests.csv')[["FirstPokemon", "SecondPokemon"]].copy()

# Create a dictionary to store the first occurrence of each Battle ID (Avoiding Galarian and Alolan Forms)
unique_pokemon = {}
for index, row in pokemon.iterrows():
    battle_id = row["Battle ID"]
    if battle_id not in unique_pokemon:
        unique_pokemon[battle_id] = row["Name"]

# Map the indices to Pokémon names from the pokemon dataset
tests["FirstPokemon"] = tests["FirstPokemon"].map(unique_pokemon)
tests["SecondPokemon"] = tests["SecondPokemon"].map(unique_pokemon)

tests.head()

Unnamed: 0,FirstPokemon,SecondPokemon
0,Seaking,Lickitung
1,Ferrothorn,Quagsire
2,Thundurus Therian Forme,Hitmonlee
3,Flaaffy,Maractus
4,Spearow,Alomomola


##### Preprocessing and Predicting using Test Data

In [21]:
# Merge the Pokémon dataset with the battles dataset for the first Pokémon
testing = pd.merge(tests, pokemon, left_on='FirstPokemon', right_on='Name', how='left')
testing.drop(columns=['Pokedex No.', 'Name', 'Generation', 'Battle ID', 'Type', 'Other Type'], inplace=True)

# Rename the columns to distinguish between the first and second Pokémon
testing.rename(columns={'HP': 'FirstHP', 'Attack': 'FirstAttack', 'Defense': 'FirstDefense',
                          'Special Attack': 'FirstSpecialAttack', 'Special Defense': 'FirstSpecialDefense',
                          'Speed': 'FirstSpeed', 'Total': 'FirstTotal', 'Legendary': 'FirstLegendary'},
                 inplace=True)

# Merge the Pokémon dataset with the battles dataset for the second Pokémon
testing = pd.merge(testing, pokemon, left_on='SecondPokemon', right_on='Name', how='left')
testing.drop(columns=['Pokedex No.', 'Name', 'Generation', 'Battle ID', 'Type', 'Other Type'], inplace=True)

# Rename the columns for the second Pokémon
testing.rename(columns={'HP': 'SecondHP', 'Attack': 'SecondAttack', 'Defense': 'SecondDefense',
                          'Special Attack': 'SecondSpecialAttack', 'Special Defense': 'SecondSpecialDefense',
                          'Speed': 'SecondSpeed', 'Total': 'SecondTotal', 'Legendary': 'SecondLegendary'},
                 inplace=True)

# Drop rows with missing Pokémon stats
testing.dropna(subset=['FirstHP', 'FirstAttack', 'FirstDefense', 'FirstSpecialAttack', 'FirstSpecialDefense',
                           'FirstSpeed', 'FirstTotal', 'FirstLegendary'], inplace=True)
testing.dropna(subset=['SecondHP', 'SecondAttack', 'SecondDefense', 'SecondSpecialAttack', 'SecondSpecialDefense',
                           'SecondSpeed', 'SecondTotal', 'SecondLegendary'], inplace=True)

# Prepare the feature matrix (X) for prediction
X_pred = testing[['FirstHP', 'FirstAttack', 'FirstDefense', 'FirstSpecialAttack', 'FirstSpecialDefense',
                      'FirstSpeed', 'FirstTotal', 'FirstLegendary', 'SecondHP', 'SecondAttack', 'SecondDefense',
                      'SecondSpecialAttack', 'SecondSpecialDefense', 'SecondSpeed', 'SecondTotal', 'SecondLegendary']]

X_pred_scaled = scaler.transform(X_pred)

# Make the prediction
winner = rf_classifier.predict(X_pred_scaled)

# Create a DataFrame to store the winners
prediction_df = pd.DataFrame({"FirstPokemon": tests["FirstPokemon"].values[:len(winner)],
                              "SecondPokemon": tests["SecondPokemon"].values[:len(winner)],
                              "Winner": winner})

# When the caracteristics of two Pokemon are almost similar, ML model returns a failed prediction
prediction_df.loc[(prediction_df["Winner"] != prediction_df["FirstPokemon"]) &
                  (prediction_df["Winner"] != prediction_df["SecondPokemon"]), "Winner"] = "Failed"


prediction_df.head()

Unnamed: 0,FirstPokemon,SecondPokemon,Winner
0,Seaking,Lickitung,Seaking
1,Ferrothorn,Quagsire,Quagsire
2,Thundurus Therian Forme,Hitmonlee,Thundurus Therian Forme
3,Flaaffy,Maractus,Failed
4,Spearow,Alomomola,Alomomola


##### Preprocessing User Input and using it to predict Battle

In [22]:
# User input for Pokémon suggestion
user_pokemon_1 = "Yveltal"
user_pokemon_2 = "Xerneas"

# Check if the suggested Pokémon exist in the dataset
if user_pokemon_1 in unique_pokemon.values() and user_pokemon_2 in unique_pokemon.values():
    # Create a DataFrame with user suggested Pokémon
    user_battle = pd.DataFrame({'FirstPokemon': [user_pokemon_1], 'SecondPokemon': [user_pokemon_2]})
    
    # Merge the Pokémon dataset with the user battle dataset for the first Pokémon
    user_battle_merged = pd.merge(user_battle, pokemon, left_on='FirstPokemon', right_on='Name', how='left')
    user_battle_merged.drop(columns=['Pokedex No.', 'Name', 'Generation', 'Battle ID', 'Type', 'Other Type'], inplace=True)

    # Rename the columns to distinguish between the first and second Pokémon
    user_battle_merged.rename(columns={'HP': 'FirstHP', 'Attack': 'FirstAttack', 'Defense': 'FirstDefense',
                                   'Special Attack': 'FirstSpecialAttack', 'Special Defense': 'FirstSpecialDefense',
                                   'Speed': 'FirstSpeed', 'Total': 'FirstTotal', 'Legendary': 'FirstLegendary'},
                          inplace=True)

    # Merge the Pokémon dataset with the user battle dataset for the second Pokémon
    user_battle_merged = pd.merge(user_battle_merged, pokemon, left_on='SecondPokemon', right_on='Name', how='left')
    user_battle_merged.drop(columns=['Pokedex No.', 'Name', 'Generation', 'Battle ID', 'Type', 'Other Type'], inplace=True)

    # Rename the columns for the second Pokémon
    user_battle_merged.rename(columns={'HP': 'SecondHP', 'Attack': 'SecondAttack', 'Defense': 'SecondDefense',
                                   'Special Attack': 'SecondSpecialAttack', 'Special Defense': 'SecondSpecialDefense',
                                   'Speed': 'SecondSpeed', 'Total': 'SecondTotal', 'Legendary': 'SecondLegendary'},
                          inplace=True)

    # Drop rows with missing Pokémon stats
    user_battle_merged.dropna(subset=['FirstHP', 'FirstAttack', 'FirstDefense', 'FirstSpecialAttack', 'FirstSpecialDefense',
                                  'FirstSpeed', 'FirstTotal', 'FirstLegendary'], inplace=True)
    user_battle_merged.dropna(subset=['SecondHP', 'SecondAttack', 'SecondDefense', 'SecondSpecialAttack', 'SecondSpecialDefense',
                                  'SecondSpeed', 'SecondTotal', 'SecondLegendary'], inplace=True)

    # Prepare the feature matrix (X) for prediction
    X_user_pred = user_battle_merged[['FirstHP', 'FirstAttack', 'FirstDefense', 'FirstSpecialAttack', 'FirstSpecialDefense',
                                  'FirstSpeed', 'FirstTotal', 'FirstLegendary', 'SecondHP', 'SecondAttack', 'SecondDefense',
                                  'SecondSpecialAttack', 'SecondSpecialDefense', 'SecondSpeed', 'SecondTotal', 'SecondLegendary']]

    X_user_pred_scaled = scaler.transform(X_user_pred)

    # Make the prediction
    user_winner = rf_classifier.predict(X_user_pred_scaled)[0]
    if (user_winner != user_pokemon_1) & (user_winner != user_pokemon_2):
        print("Sorry, Machine Learning Model failed to predict this Battle !")
    else:
        # Display the predicted winner
        print("The predicted winner of the battle between", user_pokemon_1, "and", user_pokemon_2, "is:", user_winner)
else:
    print("One or both of the suggested Pokémon do not exist in the dataset. Please try again.")


The predicted winner of the battle between Yveltal and Xerneas is: Xerneas
