# Pandas Review using a Pokemon Dataset

In [1]:
import pandas as pd

df = pd.read_csv('https://gist.githubusercontent.com/armgilles/\
194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv')

### Practice Exercises

#### 1. Obtain the following information:
    
    - dimensions
    - dtypes
    - column names
    - summary statistics


In [3]:
df.shape

(800, 13)

In [4]:
df.dtypes

#              int64
Name          object
Type 1        object
Type 2        object
Total          int64
HP             int64
Attack         int64
Defense        int64
Sp. Atk        int64
Sp. Def        int64
Speed          int64
Generation     int64
Legendary       bool
dtype: object

In [5]:
df.columns

Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
       'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')

In [6]:
df.describe()

Unnamed: 0,#,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,330.0,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,450.0,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,515.0,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


#### 2. Change all the column names
      - lowercase letters
      - remove whitespace
      - remove periods

In [28]:
# Finally a good use for lambda!
df.rename(columns=lambda x: x.lower().replace('.','').replace(' ','_'),inplace=True)

In [32]:
df

Unnamed: 0,#,name,type_1,type_2,total,hp,attack,defense,sp_atk,sp_def,speed,generation,legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


#### 3. What is the highest HP value present?

 A: 255

In [38]:
# One way to do it
df.sort_values(by='hp',ascending=False).head(1)

Unnamed: 0,#,name,type_1,type_2,total,hp,attack,defense,sp_atk,sp_def,speed,generation,legendary
261,242,Blissey,Normal,,540,255,10,10,75,135,55,2,False


In [39]:
# Shorter and faster way to do it
df.nlargest(1,columns='hp')

Unnamed: 0,#,name,type_1,type_2,total,hp,attack,defense,sp_atk,sp_def,speed,generation,legendary
261,242,Blissey,Normal,,540,255,10,10,75,135,55,2,False


In [40]:
# Getting only the value
df.hp.max()

255

#### 4. Which Pokemon possess(es) the highest HP value?
- note: the `name` column is the pokemon name

A: Blissey

In [46]:
# Maybe the quickest way to do it?
df.nlargest(1,columns='hp').name

261    Blissey
Name: name, dtype: object

#### 5. How many different types are represented in Type 1? What are they?


A: 18,  ['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',
       'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',
       'Dragon', 'Dark', 'Steel', 'Flying']

In [53]:
print(f'Number of types: {df.type_1.nunique()} \n\
Types: {df.type_1.unique()}')

Number of types: 18 
Types: ['Grass' 'Fire' 'Water' 'Bug' 'Normal' 'Poison' 'Electric' 'Ground'
 'Fairy' 'Fighting' 'Psychic' 'Rock' 'Ghost' 'Ice' 'Dragon' 'Dark' 'Steel'
 'Flying']


#### 6. Number of Pokemon whose Type 2 is Ghost

A: 14

In [65]:
(df.type_2 == 'Ghost').sum()

14

#### 7. Percentage of Pokemon whose Type 2 is Ghost

A: 1.75

In [69]:
# First sums the Trues to get 14
# Then divides by no. of items in df
# Then turn into full-fledged percentage and account for rounding error
round((df.type_2 == 'Ghost').sum()/len(df) * 100,2)

1.75

#### 8. Number of Pokemon whose Attack is greater than Defense

A: 433

In [72]:
(df.attack > df.defense).sum()

433

#### 9. What is the slowest speed for Grass type (either type 1 or type 2)

A: 10

In [81]:
df[(df.type_1 == 'Grass') | (df.type_2 == 'Grass')].speed.nsmallest(1)

658    10
Name: speed, dtype: int64

#### 10. Find the average speed by Generation

A: generation
- 1:    72.6
- 2:    61.8
- 3:    66.9
- 4:    71.3
- 5:    68.1
- 6:    66.4

In [88]:
df.groupby('generation').mean('speed').round(1).speed

generation
1    72.6
2    61.8
3    66.9
4    71.3
5    68.1
6    66.4
Name: speed, dtype: float64

#### 11. How many LEGENDARY Pokemon are DRAGON type?

A: 16

In [92]:
# Check out if we have Dragon types in type_1
(df.type_1 == 'Dragon').sum()

32

In [93]:
# Check out if we have Dragon types in type_2
(df.type_2 == 'Dragon').sum()

18

In [97]:
# Filter down to only dragons in Type 1 or 2
# If we add a .count() to this, it'll give 50 in all categories, and 50 is 32+18
df_dragons = df[(df.type_1=='Dragon')|(df.type_2=='Dragon')]

In [100]:
# Finally, legendary is a boolean, so it works as a mask in its existing form
len(df_dragons[df.legendary])

  len(df_dragons[df.legendary])


16