# Exemple

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

df = pd.DataFrame(data = [[20,15, "A"], [30,8, "B"]],
             columns = ['X1', 'X2', 'target'],
             )

In [None]:
display(df)

Unnamed: 0,X1,X2,target
0,20,15,A
1,30,8,B


In [None]:
from sklearn.neighbors import NearestNeighbors

X = df[['X1', 'X2']]

distanceKNN = NearestNeighbors(n_neighbors=1).fit(X)

distanceKNN.kneighbors([[20,12]])



(array([[3.]]), array([[0]]))

In [None]:
neighbors = distanceKNN.kneighbors([[20,12]])

df.iloc[neighbors[1][0][0]]



X1        20
X2        15
target     A
Name: 0, dtype: object

In [None]:
distanceKNN.kneighbors(df.loc[df['target'] == 'A', ['X1', 'X2']])

(array([[0.]]), array([[0]]))

In [None]:
distanceKNN = NearestNeighbors(n_neighbors=2).fit(X)

distanceKNN.kneighbors([[20,12]])



(array([[ 3.        , 10.77032961]]), array([[0, 1]]))

In [None]:
distanceKNN.kneighbors(X)

(array([[ 0.        , 12.20655562],
        [ 0.        , 12.20655562]]),
 array([[0, 1],
        [1, 0]]))

# Challenge

In [None]:
# 1. Importe le dataset pokemon.csv. Chaque ligne représente un pokemon, avec ses différentes caractéristiques (attaque, défense, vitesse, etc...), ainsi que la colonne "légendaire".
# 2. Entraine l'algorithme NN en utilisant l'ensemble des colonnes numériques en variables explicatives X.

# NB: Les valeurs nulles (NaN) posent problème à l'ensemble des algorithmes. Ici, pour simplifier, tu peux supprimer la colonne concernée avant d'entrainer ton modèle.
# NB: Pas besoin de traintestsplit dans ce cas précis : en effet, sinon tu chercheras les voisins sur un sous-échantillon. Alors qu'il y a peut-être des voisins plus proche.
# NB: Pas besoin de predict ou de score non plus : nous calculons uniquement des distances.

# 3. Utilise la fonction kneighbors() pour trouver le ou les Pokémons le(s) plus proche(s) de chacun des Pokémon légendaires que le champion a l'habitude d'utiliser.
# 4. Fais des recommandations de Pokémon à utiliser par le champion pour sa prochaine compétition, en lui priorisant par distance.
#    Evidemment, ta recommandation ne doit pas indiquer un autre pokemon légendaire, même si ce dernier est très proche...

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.neighbors import NearestNeighbors

link = "https://raw.githubusercontent.com/murpi/wilddata/master/pokemon.csv"
df_pokemon = pd.read_csv(link)

In [None]:
display(df_pokemon.head())

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


In [None]:
print(df_pokemon.shape)

(800, 12)


In [None]:
print(df_pokemon.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   Type 2      414 non-null    object
 4   HP          800 non-null    int64 
 5   Attack      800 non-null    int64 
 6   Defense     800 non-null    int64 
 7   Sp. Atk     800 non-null    int64 
 8   Sp. Def     800 non-null    int64 
 9   Speed       800 non-null    int64 
 10  Generation  800 non-null    int64 
 11  Legendary   800 non-null    bool  
dtypes: bool(1), int64(8), object(3)
memory usage: 69.7+ KB
None


In [None]:
print(df_pokemon.describe().to_markdown())

|       |       # |       HP |   Attack |   Defense |   Sp. Atk |   Sp. Def |    Speed |   Generation |
|:------|--------:|---------:|---------:|----------:|----------:|----------:|---------:|-------------:|
| count | 800     | 800      | 800      |  800      |  800      |  800      | 800      |    800       |
| mean  | 400.5   |  69.2588 |  79.0012 |   73.8425 |   72.82   |   71.9025 |  68.2775 |      3.32375 |
| std   | 231.084 |  25.5347 |  32.4574 |   31.1835 |   32.7223 |   27.8289 |  29.0605 |      1.66129 |
| min   |   1     |   1      |   5      |    5      |   10      |   20      |   5      |      1       |
| 25%   | 200.75  |  50      |  55      |   50      |   49.75   |   50      |  45      |      2       |
| 50%   | 400.5   |  65      |  75      |   70      |   65      |   70      |  65      |      3       |
| 75%   | 600.25  |  80      | 100      |   90      |   95      |   90      |  90      |      5       |
| max   | 800     | 255      | 190      |  230      |  194      

In [None]:
print(df_pokemon.isna().sum())

#               0
Name            0
Type 1          0
Type 2        386
HP              0
Attack          0
Defense         0
Sp. Atk         0
Sp. Def         0
Speed           0
Generation      0
Legendary       0
dtype: int64


In [None]:
print(df_pokemon.columns)

Index(['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
       'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')


In [None]:
def concat_type(row):
  if row["Type 2"] is np.nan:
    return row["Type 1"]
  else:
    return row["Type 1"] + "," + row["Type 2"]

df_pokemon["Type"] = df_pokemon.apply(concat_type, axis = 1)

display(df_pokemon.head())

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Type
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,"Grass,Poison"
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,"Grass,Poison"
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,"Grass,Poison"
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,"Grass,Poison"
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False,Fire


In [None]:
df_pokemon = df_pokemon.drop(columns = ["#", "Type 1", "Type 2"])
display(df_pokemon.head())

Unnamed: 0,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Type
0,Bulbasaur,45,49,49,65,65,45,1,False,"Grass,Poison"
1,Ivysaur,60,62,63,80,80,60,1,False,"Grass,Poison"
2,Venusaur,80,82,83,100,100,80,1,False,"Grass,Poison"
3,Mega Venusaur,80,100,123,122,120,80,1,False,"Grass,Poison"
4,Charmander,39,52,43,60,50,65,1,False,Fire


In [None]:
df_dummies = df_pokemon['Type'].str.get_dummies(sep = ",")
df_pokemon = pd.concat([df_pokemon, df_dummies], axis = 1)
display(df_dummies.head())

Unnamed: 0,Bug,Dark,Dragon,Electric,Fairy,Fighting,Fire,Flying,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [None]:
df_pokemon = df_pokemon.drop(columns = "Type")

In [None]:
display(df_pokemon)

Unnamed: 0,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Bug,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
0,Bulbasaur,45,49,49,65,65,45,1,False,0,...,0,1,0,0,0,1,0,0,0,0
1,Ivysaur,60,62,63,80,80,60,1,False,0,...,0,1,0,0,0,1,0,0,0,0
2,Venusaur,80,82,83,100,100,80,1,False,0,...,0,1,0,0,0,1,0,0,0,0
3,Mega Venusaur,80,100,123,122,120,80,1,False,0,...,0,1,0,0,0,1,0,0,0,0
4,Charmander,39,52,43,60,50,65,1,False,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,Diancie,50,100,150,100,150,50,6,True,0,...,0,0,0,0,0,0,0,1,0,0
796,Mega Diancie,50,160,110,160,110,110,6,True,0,...,0,0,0,0,0,0,0,1,0,0
797,Hoopa Confined,80,110,60,150,130,70,6,True,0,...,1,0,0,0,0,0,1,0,0,0
798,Hoopa Unbound,80,160,60,170,130,80,6,True,0,...,0,0,0,0,0,0,1,0,0,0


In [None]:
#scaler = MinMaxScaler()
#df_pokemon = scaler.fit_transform(df_pokemon)

In [None]:
condition = df_pokemon["Legendary"] == True
df_legendary = df_pokemon[condition]
df_legendary = df_legendary.drop(columns = "Legendary")

In [None]:
display(df_legendary)

Unnamed: 0,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Bug,Dark,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
156,Articuno,90,85,100,95,125,85,1,0,0,...,0,0,0,1,0,0,0,0,0,0
157,Zapdos,90,90,85,125,90,100,1,0,0,...,0,0,0,0,0,0,0,0,0,0
158,Moltres,90,100,90,125,85,90,1,0,0,...,0,0,0,0,0,0,0,0,0,0
162,Mewtwo,106,110,90,154,90,130,1,0,0,...,0,0,0,0,0,0,1,0,0,0
163,Mega Mewtwo X,106,190,100,154,100,130,1,0,0,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,Diancie,50,100,150,100,150,50,6,0,0,...,0,0,0,0,0,0,0,1,0,0
796,Mega Diancie,50,160,110,160,110,110,6,0,0,...,0,0,0,0,0,0,0,1,0,0
797,Hoopa Confined,80,110,60,150,130,70,6,0,0,...,1,0,0,0,0,0,1,0,0,0
798,Hoopa Unbound,80,160,60,170,130,80,6,0,1,...,0,0,0,0,0,0,1,0,0,0


In [None]:
condition = df_pokemon["Legendary"] == False
df_non_legendary = df_pokemon[condition]
df_non_legendary = df_non_legendary.drop(columns = "Legendary")

In [None]:
df_legendary = df_legendary.reset_index(drop = True)
df_non_legendary = df_non_legendary.reset_index(drop = True)

In [None]:
print(df_legendary)

              Name   HP  Attack  Defense  Sp. Atk  Sp. Def  Speed  Generation  \
0         Articuno   90      85      100       95      125     85           1   
1           Zapdos   90      90       85      125       90    100           1   
2          Moltres   90     100       90      125       85     90           1   
3           Mewtwo  106     110       90      154       90    130           1   
4    Mega Mewtwo X  106     190      100      154      100    130           1   
..             ...  ...     ...      ...      ...      ...    ...         ...   
60         Diancie   50     100      150      100      150     50           6   
61    Mega Diancie   50     160      110      160      110    110           6   
62  Hoopa Confined   80     110       60      150      130     70           6   
63   Hoopa Unbound   80     160       60      170      130     80           6   
64       Volcanion   80     110      120      130       90     70           6   

    Bug  Dark  ...  Ghost  

In [None]:
X_leg = df_legendary.drop("Name", axis = 1)
X_non_leg = df_non_legendary.drop("Name", axis = 1)


In [None]:
distanceKNN = NearestNeighbors(n_neighbors=3).fit(X_non_leg)

result = distanceKNN.kneighbors(X_leg)
print(result)

(array([[ 26.01922366,  30.11644069,  32.81767816],
       [ 16.673332  ,  21.21320344,  21.21320344],
       [ 11.74734012,  27.51363298,  29.59729717],
       [ 42.26109322,  43.11612227,  45.54119015],
       [ 57.15767665,  62.11280061,  66.40783086],
       [ 76.26925986,  81.11103501,  88.63972022],
       [ 25.11971337,  25.33771892,  25.37715508],
       [ 24.18677324,  28.74021573,  33.98529094],
       [ 30.08321791,  30.09983389,  31.22498999],
       [ 46.11941023,  62.5299928 ,  64.45153218],
       [ 41.12177039,  58.04308744,  58.18075283],
       [ 43.63484846,  44.76605857,  47.96873982],
       [ 64.41273166,  75.02666193,  75.35914012],
       [ 30.4466747 ,  41.95235393,  44.02272141],
       [ 36.22154055,  37.85498646,  41.41255848],
       [ 46.27094121,  47.86439178,  58.97457079],
       [ 25.59296778,  30.5450487 ,  30.95157508],
       [ 36.02776707,  48.5386444 ,  50.59644256],
       [ 36.51027253,  38.17066937,  47.19110086],
       [ 72.9177619 ,  82.6256

In [None]:
print(df_non_legendary.iloc[neighbors[1][0]].to_markdown())

|    | Name      |   HP |   Attack |   Defense |   Sp. Atk |   Sp. Def |   Speed |   Generation |   Bug |   Dark |   Dragon |   Electric |   Fairy |   Fighting |   Fire |   Flying |   Ghost |   Grass |   Ground |   Ice |   Normal |   Poison |   Psychic |   Rock |   Steel |   Water |
|---:|:----------|-----:|---------:|----------:|----------:|----------:|--------:|-------------:|------:|-------:|---------:|-----------:|--------:|-----------:|-------:|---------:|--------:|--------:|---------:|------:|---------:|---------:|----------:|-------:|--------:|--------:|
|  0 | Bulbasaur |   45 |       49 |        49 |        65 |        65 |      45 |            1 |     0 |      0 |        0 |          0 |       0 |          0 |      0 |        0 |       0 |       1 |        0 |     0 |        0 |        1 |         0 |      0 |       0 |       0 |


In [None]:
df_non_legendary.iloc[[382, 469, 264]]

Unnamed: 0,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Bug,Dark,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
382,Mega Absol,65,150,60,115,60,115,3,0,1,...,0,0,0,0,0,0,0,0,0,0
469,Mega Lucario,70,145,88,140,70,112,4,0,0,...,0,0,0,0,0,0,0,0,1,0
264,Mega Sceptile,70,110,75,145,85,145,3,0,0,...,0,1,0,0,0,0,0,0,0,0


In [None]:
distanceKNN = NearestNeighbors(n_neighbors=3).fit(X_non_leg)

result = distanceKNN.kneighbors(X_leg)

In [None]:
print(result)

(array([[ 26.01922366,  30.11644069,  32.81767816],
       [ 16.673332  ,  21.21320344,  21.21320344],
       [ 11.74734012,  27.51363298,  29.59729717],
       [ 42.26109322,  43.11612227,  45.54119015],
       [ 57.15767665,  62.11280061,  66.40783086],
       [ 76.26925986,  81.11103501,  88.63972022],
       [ 25.11971337,  25.33771892,  25.37715508],
       [ 24.18677324,  28.74021573,  33.98529094],
       [ 30.08321791,  30.09983389,  31.22498999],
       [ 46.11941023,  62.5299928 ,  64.45153218],
       [ 41.12177039,  58.04308744,  58.18075283],
       [ 43.63484846,  44.76605857,  47.96873982],
       [ 64.41273166,  75.02666193,  75.35914012],
       [ 30.4466747 ,  41.95235393,  44.02272141],
       [ 36.22154055,  37.85498646,  41.41255848],
       [ 46.27094121,  47.86439178,  58.97457079],
       [ 25.59296778,  30.5450487 ,  30.95157508],
       [ 36.02776707,  48.5386444 ,  50.59644256],
       [ 36.51027253,  38.17066937,  47.19110086],
       [ 72.9177619 ,  82.6256

In [None]:
df_non_legendary.iloc[[653, 656, 657]]

Unnamed: 0,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Bug,Dark,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
653,Hydreigon,92,105,90,125,90,98,5,0,1,...,0,0,0,0,0,0,0,0,0,0
656,Keldeo Ordinary Forme,91,72,90,129,90,108,5,0,0,...,0,0,0,0,0,0,0,0,0,1
657,Keldeo Resolute Forme,91,72,90,129,90,108,5,0,0,...,0,0,0,0,0,0,0,0,0,1


In [None]:
distanceKNN = NearestNeighbors(n_neighbors=2).fit(X_non_leg)

distanceKNN.kneighbors(X_leg)

(array([[ 26.01922366,  30.11644069],
        [ 16.673332  ,  21.21320344],
        [ 11.74734012,  27.51363298],
        [ 42.26109322,  43.11612227],
        [ 57.15767665,  62.11280061],
        [ 76.26925986,  81.11103501],
        [ 25.11971337,  25.33771892],
        [ 24.18677324,  28.74021573],
        [ 30.08321791,  30.09983389],
        [ 46.11941023,  62.5299928 ],
        [ 41.12177039,  58.04308744],
        [ 43.63484846,  44.76605857],
        [ 64.41273166,  75.02666193],
        [ 30.4466747 ,  41.95235393],
        [ 36.22154055,  37.85498646],
        [ 46.27094121,  47.86439178],
        [ 25.59296778,  30.5450487 ],
        [ 36.02776707,  48.5386444 ],
        [ 36.51027253,  38.17066937],
        [ 72.9177619 ,  82.62566187],
        [ 33.95585369,  36.78314832],
        [ 55.86591089,  62.88083969],
        [ 36.79673899,  44.39594576],
        [ 67.01492371,  67.48333128],
        [  1.73205081,   2.        ],
        [ 53.63767333,  61.78187436],
        [ 98

In [None]:
df_non_legendary.iloc[[11, 162]]

Unnamed: 0,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Bug,Dark,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
11,Blastoise,79,83,100,85,105,78,1,0,0,...,0,0,0,0,0,0,0,0,0,1
162,Meganium,80,82,100,83,100,80,2,0,0,...,0,1,0,0,0,0,0,0,0,0
