# IMPORT DATA

In [57]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.neighbors import NearestNeighbors

# Import data
link = r"https://raw.githubusercontent.com/murpi/wilddata/master/pokemon.csv"
raw_pkm = pd.read_csv(link, sep=',' )

# Copy of the dataset
df_pkm = raw_pkm.copy()

# EXPLORATION

In [58]:
# Copy for exploration
df_pkm_explo = df_pkm.copy()

# Inspection
df_pkm_explo.info()
display(df_pkm_explo.describe())
display(df_pkm_explo.head())
display(df_pkm_explo.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   Type 2      414 non-null    object
 4   HP          800 non-null    int64 
 5   Attack      800 non-null    int64 
 6   Defense     800 non-null    int64 
 7   Sp. Atk     800 non-null    int64 
 8   Sp. Def     800 non-null    int64 
 9   Speed       800 non-null    int64 
 10  Generation  800 non-null    int64 
 11  Legendary   800 non-null    bool  
dtypes: bool(1), int64(8), object(3)
memory usage: 69.7+ KB


Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,400.5,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,231.0844,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,200.75,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,400.5,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,600.25,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,800.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
795,796,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
796,797,Mega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
797,798,Hoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
798,799,Hoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True
799,800,Volcanion,Fire,Water,80,110,120,130,90,70,6,True


# CLEANING

In [60]:
# Copy for cleaning
df_pkm_clean = df_pkm.copy()

df_pkm_clean.dropna(axis=1, inplace=True)
display(df_pkm_clean.head())


Unnamed: 0,#,Name,Type 1,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,39,52,43,60,50,65,1,False


## Get Dummies Type 1

In [61]:
df_pkm_clean = pd.concat([df_pkm_clean, df_pkm_clean['Type 1'].str.get_dummies()], axis=1)
display(df_pkm_clean.head())

Unnamed: 0,#,Name,Type 1,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
0,1,Bulbasaur,Grass,45,49,49,65,65,45,1,...,0,1,0,0,0,0,0,0,0,0
1,2,Ivysaur,Grass,60,62,63,80,80,60,1,...,0,1,0,0,0,0,0,0,0,0
2,3,Venusaur,Grass,80,82,83,100,100,80,1,...,0,1,0,0,0,0,0,0,0,0
3,4,Mega Venusaur,Grass,80,100,123,122,120,80,1,...,0,1,0,0,0,0,0,0,0,0
4,5,Charmander,Fire,39,52,43,60,50,65,1,...,0,0,0,0,0,0,0,0,0,0


## Slicing Legendary and NotLegendary Pkm

In [62]:
df_legendary = df_pkm_clean[df_pkm_clean['Legendary'] == True]
df_no_legendary = df_pkm_clean[df_pkm_clean['Legendary'] == False]
display(df_legendary.head())
display(df_no_legendary.head())


Unnamed: 0,#,Name,Type 1,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
156,157,Articuno,Ice,90,85,100,95,125,85,1,...,0,0,0,1,0,0,0,0,0,0
157,158,Zapdos,Electric,90,90,85,125,90,100,1,...,0,0,0,0,0,0,0,0,0,0
158,159,Moltres,Fire,90,100,90,125,85,90,1,...,0,0,0,0,0,0,0,0,0,0
162,163,Mewtwo,Psychic,106,110,90,154,90,130,1,...,0,0,0,0,0,0,1,0,0,0
163,164,Mega Mewtwo X,Psychic,106,190,100,154,100,130,1,...,0,0,0,0,0,0,1,0,0,0


Unnamed: 0,#,Name,Type 1,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
0,1,Bulbasaur,Grass,45,49,49,65,65,45,1,...,0,1,0,0,0,0,0,0,0,0
1,2,Ivysaur,Grass,60,62,63,80,80,60,1,...,0,1,0,0,0,0,0,0,0,0
2,3,Venusaur,Grass,80,82,83,100,100,80,1,...,0,1,0,0,0,0,0,0,0,0
3,4,Mega Venusaur,Grass,80,100,123,122,120,80,1,...,0,1,0,0,0,0,0,0,0,0
4,5,Charmander,Fire,39,52,43,60,50,65,1,...,0,0,0,0,0,0,0,0,0,0


## Find Nearest Neighbours

In [63]:
# Sélection des colonnes numériques du dataframe df_no_legendary, en excluant la colonne "#"
X = df_no_legendary.select_dtypes('number').drop(columns="#")

# Liste des noms des pokémons cibles
target = ['Mewtwo', 'Lugia', 'Rayquaza', 'Giratina Origin Forme', 'Dialga', 'Palkia']

# Nombre de voisins les plus proches à trouver
k = 3

# Création d'un modèle de Nearest Neighbors
distanceKNN = NearestNeighbors(n_neighbors=k).fit(X)

# Pour chaque nom de pokémon cible
for name in target:
    # Recherche des voisins les plus proches du pokémon légendaire correspondant au nom
    nearest_neighbors = distanceKNN.kneighbors(df_legendary.loc[df_legendary['Name'] == name, X.columns])
    nearest_pkm_ind = nearest_neighbors[1][0]  # Indices des voisins les plus proches
    nearest_pkm = df_no_legendary.iloc[nearest_pkm_ind]  # Données des voisins les plus proches
    # Affichage des résultats
    print(f"Les {k} pokémons les plus proches de {name} sont : \
          \n {nearest_pkm['Name'].values} avec une distance de {nearest_neighbors[0][0]} \n")

Les 3 pokémons les plus proches de Mewtwo sont :           
 ['Mega Houndoom' 'Mega Sceptile' 'Mega Pidgeot'] avec une distance de [42.24926035 43.10452412 45.53020975] 

Les 3 pokémons les plus proches de Lugia sont :           
 ['Cresselia' 'Mega Venusaur' 'Celebi'] avec une distance de [46.10856753 62.51399843 64.43601477] 

Les 3 pokémons les plus proches de Rayquaza sont :           
 ['Mega Blaziken' 'Mega Garchomp' 'Mega Charizard X'] avec une distance de [36.76955262 44.37341546 45.01110974] 

Les 3 pokémons les plus proches de Giratina Origin Forme sont :           
 ['Manaphy' 'Celebi' 'Mew'] avec une distance de [58.32666629 58.36094585 58.403767  ] 

Les 3 pokémons les plus proches de Dialga sont :           
 ['Mega Blastoise' 'Mega Charizard X' 'Hydreigon'] avec une distance de [36.53765181 37.42993454 44.50842617] 

Les 3 pokémons les plus proches de Palkia sont :           
 ['Mega Charizard Y' 'Mega Blastoise' 'Hydreigon'] avec une distance de [31.63858404 39.40812099