# Pandas Review using a Pokemon Dataset

In [1]:
import pandas as pd

df = pd.read_csv('https://gist.githubusercontent.com/armgilles/\
194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv')

### Practice Exercises

#### 1. Obtain the following information:
    
    - dimensions
    - dtypes
    - column names
    - summary statistics


In [2]:
df.shape

(800, 13)

In [4]:
df.dtypes

#              int64
Name          object
Type 1        object
Type 2        object
Total          int64
HP             int64
Attack         int64
Defense        int64
Sp. Atk        int64
Sp. Def        int64
Speed          int64
Generation     int64
Legendary       bool
dtype: object

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   Type 2      414 non-null    object
 4   Total       800 non-null    int64 
 5   HP          800 non-null    int64 
 6   Attack      800 non-null    int64 
 7   Defense     800 non-null    int64 
 8   Sp. Atk     800 non-null    int64 
 9   Sp. Def     800 non-null    int64 
 10  Speed       800 non-null    int64 
 11  Generation  800 non-null    int64 
 12  Legendary   800 non-null    bool  
dtypes: bool(1), int64(9), object(3)
memory usage: 75.9+ KB


In [8]:
df.describe()

Unnamed: 0,#,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,330.0,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,450.0,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,515.0,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


#### 2. Change all the column names
      - lowercase letters
      - remove whitespace
      - remove periods

In [11]:
df.columns = df.columns.str.lower()  # Convert column names to lowercase
df.columns = df.columns.str.replace(' ', '')  # Remove whitespace from column names
df.columns = df.columns.str.replace('.', '')  # Remove periods from column names
df.head()

Unnamed: 0,#,name,type1,type2,total,hp,attack,defense,spatk,spdef,speed,generation,legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


#### 3. What is the highest HP value present?

A: 255

In [12]:
df.hp.max()

255

In [33]:
df.sort_values(by='hp',ascending=False).head(1)

Unnamed: 0,#,name,type1,type2,total,hp,attack,defense,spatk,spdef,speed,generation,legendary
261,242,Blissey,Normal,,540,255,10,10,75,135,55,2,False


In [34]:
df.nlargest(1,columns='hp')

Unnamed: 0,#,name,type1,type2,total,hp,attack,defense,spatk,spdef,speed,generation,legendary
261,242,Blissey,Normal,,540,255,10,10,75,135,55,2,False


#### 4. Which Pokemon possess(es) the highest HP value?
- note: the `name` column is the pokemon name

A: Blissey

In [20]:
df.sort_values(['hp'], ascending=[False]).head(1)#,'name'],ascending=[False, True])

Unnamed: 0,#,name,type1,type2,total,hp,attack,defense,spatk,spdef,speed,generation,legendary
261,242,Blissey,Normal,,540,255,10,10,75,135,55,2,False


In [35]:
df.nlargest(1,columns='hp').name

261    Blissey
Name: name, dtype: object

#### 5. How many different types are represented in Type 1? What are they?


A: 18,  ['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',
       'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',
       'Dragon', 'Dark', 'Steel', 'Flying']

In [22]:
len(df.type1.value_counts())

18

In [25]:
df.type1.value_counts()

type1
Water       112
Normal       98
Grass        70
Bug          69
Psychic      57
Fire         52
Electric     44
Rock         44
Dragon       32
Ground       32
Ghost        32
Dark         31
Poison       28
Steel        27
Fighting     27
Ice          24
Fairy        17
Flying        4
Name: count, dtype: int64

#### 6. Number of Pokemon whose Type 2 is Ghost

A: 14

In [28]:
df.type2.value_counts()

type2
Flying      97
Ground      35
Poison      34
Psychic     33
Fighting    26
Grass       25
Fairy       23
Steel       22
Dark        20
Dragon      18
Water       14
Ghost       14
Ice         14
Rock        14
Fire        12
Electric     6
Normal       4
Bug          3
Name: count, dtype: int64

In [38]:
(df.type2 == 'Ghost').sum()

14

#### 7. Percentage of Pokemon whose Type 2 is Ghost

A: 1.75

In [39]:
round((df.type2 == 'Ghost').sum()/len(df) * 100,2)

1.75

#### 8. Number of Pokemon whose Attack is greater than Defense

A: 433

In [40]:
(df.attack > df.defense).sum()

433

#### 9. What is the slowest speed for Grass type (either type 1 or type 2)

A: 10

In [41]:
df[(df.type1 == 'Grass') | (df.type2 == 'Grass')].speed.nsmallest(1)

658    10
Name: speed, dtype: int64

#### 10. Find the average speed by Generation

A: generation
- 1:    72.6
- 2:    61.8
- 3:    66.9
- 4:    71.3
- 5:    68.1
- 6:    66.4

In [42]:
df.groupby('generation').mean('speed').round(1).speed

generation
1    72.6
2    61.8
3    66.9
4    71.3
5    68.1
6    66.4
Name: speed, dtype: float64

#### 11. How many LEGENDARY Pokemon are DRAGON type?

A: 16

In [44]:
df_dragons = df[(df.type1=='Dragon')|(df.type2=='Dragon')]

In [45]:
len(df_dragons[df.legendary])

  len(df_dragons[df.legendary])


16