In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

In [2]:
df_pokemon = pd.read_csv('https://raw.githubusercontent.com/murpi/wilddata/master/pokemon.csv')
df_pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


In [3]:
df_pokemon.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   Type 2      414 non-null    object
 4   HP          800 non-null    int64 
 5   Attack      800 non-null    int64 
 6   Defense     800 non-null    int64 
 7   Sp. Atk     800 non-null    int64 
 8   Sp. Def     800 non-null    int64 
 9   Speed       800 non-null    int64 
 10  Generation  800 non-null    int64 
 11  Legendary   800 non-null    bool  
dtypes: bool(1), int64(8), object(3)
memory usage: 69.7+ KB


In [4]:
df_pokemon.isna().sum()

#               0
Name            0
Type 1          0
Type 2        386
HP              0
Attack          0
Defense         0
Sp. Atk         0
Sp. Def         0
Speed           0
Generation      0
Legendary       0
dtype: int64

2. Find closest pokemons

2.1 Prepare data

In [5]:
 # we noticed that there are missing values, we will not be able to identify these Type 2 so we can delete it
df_pokemon.drop("Type 2", axis=1, inplace=True)
df_pokemon.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   HP          800 non-null    int64 
 4   Attack      800 non-null    int64 
 5   Defense     800 non-null    int64 
 6   Sp. Atk     800 non-null    int64 
 7   Sp. Def     800 non-null    int64 
 8   Speed       800 non-null    int64 
 9   Generation  800 non-null    int64 
 10  Legendary   800 non-null    bool  
dtypes: bool(1), int64(8), object(2)
memory usage: 63.4+ KB


2.2 Find non legendary pokemons (X)

In [6]:
# we create a new df without the legendary Pokemons (we cannot propose them to the champion)
# recommandations will come from that df, our model KNN will be fitted with this df
df_pokemon_nonlegendary = df_pokemon.loc[df_pokemon["Legendary"] == False]

In [7]:
df_pokemon_nonlegendary

Unnamed: 0,#,Name,Type 1,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...
787,788,Gourgeist Super Size,Ghost,85,100,122,58,75,54,6,False
788,789,Bergmite,Ice,55,69,85,32,35,28,6,False
789,790,Avalugg,Ice,95,117,184,44,46,28,6,False
790,791,Noibat,Flying,40,30,35,45,40,55,6,False


In [8]:
# keep only numeric variable, remove column # which as no use as an explanatory variable
X = df_pokemon_nonlegendary.select_dtypes('number').drop(columns = "#")
X

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
0,45,49,49,65,65,45,1
1,60,62,63,80,80,60,1
2,80,82,83,100,100,80,1
3,80,100,123,122,120,80,1
4,39,52,43,60,50,65,1
...,...,...,...,...,...,...,...
787,85,100,122,58,75,54,6
788,55,69,85,32,35,28,6
789,95,117,184,44,46,28,6
790,40,30,35,45,40,55,6


2.3 Fit the model

In [9]:
# fit model with 3 neighbors
# note that this model is trained with X for which variables come from df_pokemon_nonlegendary
modelNN = NearestNeighbors(n_neighbors=3)
modelNN.fit(X)

2.4 Find champion's pokemon

In [10]:
# make a list of the champion's Pokemons
df_pokemon.loc[df_pokemon['Name'].str.contains('Giratina')]  # warning : Giratina has two different names

Unnamed: 0,#,Name,Type 1,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
544,545,Giratina Altered Forme,Ghost,150,100,120,100,120,90,4,True
545,546,Giratina Origin Forme,Ghost,150,120,100,120,100,90,4,True


In [11]:
list_champion = ['Mewtwo','Lugia','Rayquaza','Giratina Origin Forme', 'Dialga', 'Palkia']

2.5 Find closest pokemons

In [12]:
# for each Pokemon of the list find 3 nearest neighbours
for Pokemon in list_champion:
    # for a Pokemon, pass only same columns as X that our model needs
    neighbors = modelNN.kneighbors(df_pokemon.loc[df_pokemon['Name'] == Pokemon, X.columns])
    print(f"Recommandations for Pokemon {Pokemon} :")
    # find row number (not row name) from the nearest neighbors into dataframe on which the model was fitted
    closest_pok_ind = neighbors[1][0]
    closest_pok = df_pokemon_nonlegendary['Name'].iloc[closest_pok_ind] # ! warning ! we can use df_pokemon_nonlegendary because X is based on this dataframe
    print("Closest Pokemons : ", list(closest_pok))
    print("Respectives distances : ", neighbors[0][0])
    print()

Recommandations for Pokemon Mewtwo :
Closest Pokemons :  ['Mega Houndoom', 'Mega Sceptile', 'Mega Pidgeot']
Respectives distances :  [42.22558466 43.08131846 45.50824101]

Recommandations for Pokemon Lugia :
Closest Pokemons :  ['Cresselia', 'Mega Venusaur', 'Celebi']
Respectives distances :  [46.10856753 62.49799997 64.43601477]

Recommandations for Pokemon Rayquaza :
Closest Pokemons :  ['Mega Blaziken', 'Mega Garchomp', 'Mega Charizard X']
Respectives distances :  [36.74234614 44.37341546 44.98888752]

Recommandations for Pokemon Giratina Origin Forme :
Closest Pokemons :  ['Manaphy', 'Celebi', 'Mew']
Respectives distances :  [58.30951895 58.34380858 58.38664231]

Recommandations for Pokemon Dialga :
Closest Pokemons :  ['Mega Blastoise', 'Mega Charizard X', 'Hydreigon']
Respectives distances :  [36.51027253 37.40320842 44.48595284]

Recommandations for Pokemon Palkia :
Closest Pokemons :  ['Mega Charizard Y', 'Mega Blastoise', 'Hydreigon']
Respectives distances :  [31.60696126 39.4

# Do the Same nut now filter by Type 1 before

In [13]:
# for each Pokemon of the list find 3 nearest neighbours

for Pokemon in list_champion:
    type_1 = df_pokemon.loc[df_pokemon['Name'] == Pokemon]['Type 1'].values[0]

    same_type_pokemons = df_pokemon_nonlegendary.loc[df_pokemon_nonlegendary['Type 1'] == type_1]
    X = same_type_pokemons.select_dtypes('number').drop(columns = "#")

    modelNN = NearestNeighbors(n_neighbors=3)
    modelNN.fit(X)
    # for a Pokemon, pass only same columns as X that our model needs
    neighbors = modelNN.kneighbors(df_pokemon.loc[df_pokemon['Name'] == Pokemon, X.columns])
    print(f"Recommandations for Pokemon {Pokemon} (Type 1 {type_1}):")
    # find row number (not row name) from the nearest neighbors into dataframe on which the model was fitted
    closest_pok_ind = neighbors[1][0]
    closest_pok = df_pokemon_nonlegendary['Name'].iloc[closest_pok_ind] # ! warning ! we can use df_pokemon_nonlegendary because X is based on this dataframe
    print("Closest Pokemons : ", list(closest_pok))
    print("Respectives distances : ", neighbors[0][0])
    print()

Recommandations for Pokemon Mewtwo (Type 1 Psychic):
Closest Pokemons :  ['Mega Charizard X', 'Caterpie', 'Wartortle']
Respectives distances :  [64.43601477 64.44377394 74.88658091]

Recommandations for Pokemon Lugia (Type 1 Psychic):
Closest Pokemons :  ['Spearow', 'Caterpie', 'Mega Charizard X']
Respectives distances :  [46.10856753 64.43601477 64.44377394]

Recommandations for Pokemon Rayquaza (Type 1 Dragon):
Closest Pokemons :  ['Mega Blastoise', 'Mega Charizard X', 'Mega Charizard Y']
Respectives distances :  [44.37341546 46.36809248 57.00877125]

Recommandations for Pokemon Giratina Origin Forme (Type 1 Ghost):
Closest Pokemons :  ['Blastoise', 'Pidgeotto', 'Arbok']
Respectives distances :  [ 88.61151167  97.0051545  104.29765098]

Recommandations for Pokemon Dialga (Type 1 Steel):
Closest Pokemons :  ['Blastoise', 'Mega Blastoise', 'Kakuna']
Respectives distances :  [65.19969325 66.71581522 93.01075207]

Recommandations for Pokemon Palkia (Type 1 Water):
Closest Pokemons :  ['M

In [14]:
df_pokemon.iloc[242, :]

#                   243
Name          Octillery
Type 1            Water
HP                   75
Attack              105
Defense              75
Sp. Atk             105
Sp. Def              75
Speed                45
Generation            2
Legendary         False
Name: 242, dtype: object