In [48]:
# Import pandas for data analysis
import pandas as pd

# Import numpy for numerical computation
import numpy as np

# Import matplotlib and seaborn for data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import sidetable

Name: Name of each pokemon

Type 1: Each pokemon has a type, this determines weakness/resistance to attacks

Type 2: Some pokemon are dual type and have 2

Total: sum of all stats that come after this, a general guide to how strong a pokemon is

HP: hit points, or health, defines how much damage a pokemon can withstand before fainting

Attack: the base modifier for normal attacks (eg. Scratch, Punch)

Defense: the base damage resistance against normal attacks

SP Atk: special attack, the base modifier for special attacks (e.g. fire blast, bubble beam)

SP Def: the base damage resistance against special attacks

Speed: determines which pokemon attacks first each round

In [24]:
df = pd.read_csv('Pokemon.csv')
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [26]:
df = df.drop(['#'], axis=1)

In [27]:
df.head(2)

Unnamed: 0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        800 non-null    object
 1   Type 1      800 non-null    object
 2   Type 2      414 non-null    object
 3   Total       800 non-null    int64 
 4   HP          800 non-null    int64 
 5   Attack      800 non-null    int64 
 6   Defense     800 non-null    int64 
 7   Sp. Atk     800 non-null    int64 
 8   Sp. Def     800 non-null    int64 
 9   Speed       800 non-null    int64 
 10  Generation  800 non-null    int64 
 11  Legendary   800 non-null    bool  
dtypes: bool(1), int64(8), object(3)
memory usage: 69.7+ KB


In [29]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Total,800.0,435.1025,119.96304,180.0,330.0,450.0,515.0,780.0
HP,800.0,69.25875,25.534669,1.0,50.0,65.0,80.0,255.0
Attack,800.0,79.00125,32.457366,5.0,55.0,75.0,100.0,190.0
Defense,800.0,73.8425,31.183501,5.0,50.0,70.0,90.0,230.0
Sp. Atk,800.0,72.82,32.722294,10.0,49.75,65.0,95.0,194.0
Sp. Def,800.0,71.9025,27.828916,20.0,50.0,70.0,90.0,230.0
Speed,800.0,68.2775,29.060474,5.0,45.0,65.0,90.0,180.0
Generation,800.0,3.32375,1.66129,1.0,2.0,3.0,5.0,6.0


### Handling Missing Value

There are missing values on columns "Type 2", so we decided to put missing values with "None" because NaN in "Type 2" represent that pokemon doesn't have second type.

In [30]:
df.fillna("None",inplace=True)
df.sample(2)

Unnamed: 0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
377,Lileep,Rock,Grass,355,66,41,77,61,87,23,3,False
472,Drifloon,Ghost,Flying,348,90,50,34,60,44,70,4,False


In [51]:
print("Porcentaje de valores nulos:\n ",(df.isnull().sum()/df.shape[0] * 100).round(1))

Porcentaje de valores nulos:
  Name          0.0
Type 1        0.0
Type 2        0.0
Total         0.0
HP            0.0
Attack        0.0
Defense       0.0
Sp. Atk       0.0
Sp. Def       0.0
Speed         0.0
Generation    0.0
Legendary     0.0
dtype: float64


In [52]:
df.stb.missing() 

Unnamed: 0,missing,total,percent
Name,0,800,0.0
Type 1,0,800,0.0
Type 2,0,800,0.0
Total,0,800,0.0
HP,0,800,0.0
Attack,0,800,0.0
Defense,0,800,0.0
Sp. Atk,0,800,0.0
Sp. Def,0,800,0.0
Speed,0,800,0.0


### Data Exploration

In [6]:
print('There are', df.shape[0] ,'columns and', df.shape[1],'files.')

There are 800 columns and 12 files.


In [8]:
type_1= df["Type 1"].unique()
type_1

array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',
       'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',
       'Dragon', 'Dark', 'Steel', 'Flying'], dtype=object)

In [9]:
type_2= df["Type 2"].unique()
type_2

array(['Poison', 'None', 'Flying', 'Dragon', 'Ground', 'Fairy', 'Grass',
       'Fighting', 'Psychic', 'Steel', 'Ice', 'Rock', 'Dark', 'Water',
       'Electric', 'Fire', 'Ghost', 'Bug', 'Normal'], dtype=object)

In [44]:
df

Unnamed: 0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
795,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


### 10 Stronger Pokemon

In [53]:
df.head()

Unnamed: 0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


hacer rango de fuerza para pie

In [54]:
df.Total.max()

780