### Filtering data.

In [65]:
import matplotlib.pyplot as plt
import pandas as pd
import re
import numpy as np

In [66]:
df = pd.read_csv('pokemon_data.csv')
df

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True


In [67]:
# As we could see we can filtering using .loc
# Be aware with python "and" operator, here we use & | etc.
df[(df['Type 1'] == 'Fire') & (df['Type 2'] == 'Psychic')]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
616,555,DarmanitanZen Mode,Fire,Psychic,105,30,105,140,105,55,5,False
723,655,Delphox,Fire,Psychic,75,69,72,114,100,104,6,False


In [68]:
# Here we are giving a Series object from df['Type 1']
# so we need to check if those values are in that list
# with .isin
df[df['Type 1'].isin(['Fire', 'Grass'])]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
735,667,Litleo,Fire,Normal,62,50,58,73,54,72,6,False
736,668,Pyroar,Fire,Normal,86,68,72,109,66,106,6,False
740,672,Skiddo,Grass,,66,65,48,62,57,52,6,False
741,673,Gogoat,Grass,,123,100,62,97,81,68,6,False


In [69]:
# Logical operators
df[df['Speed'] >= 150]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
71,65,AlakazamMega Alakazam,Psychic,,55,50,65,175,95,150,1,False
154,142,AerodactylMega Aerodactyl,Rock,Flying,80,135,85,70,95,150,1,False
315,291,Ninjask,Bug,Flying,61,90,45,50,50,160,3,False
428,386,DeoxysNormal Forme,Psychic,,50,150,50,150,50,150,3,True
429,386,DeoxysAttack Forme,Psychic,,50,180,20,180,20,150,3,True
431,386,DeoxysSpeed Forme,Psychic,,50,95,90,95,90,180,3,True


In [70]:
# It also has ways to deal with strings
# For example the useful contains function
# https://pandas.pydata.org/docs/reference/api/pandas.Series.str.contains.html?highlight=contains#pandas.Series.str.contains
df[df.Name.str.contains('lAkA', flags=re.IGNORECASE)]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
70,65,Alakazam,Psychic,,55,50,45,135,95,120,1,False
71,65,AlakazamMega Alakazam,Psychic,,55,50,65,175,95,150,1,False


In [71]:
# And use regular expressions ;)
df[df.Name.str.contains('.zam', regex=True)]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
70,65,Alakazam,Psychic,,55,50,45,135,95,120,1,False
71,65,AlakazamMega Alakazam,Psychic,,55,50,65,175,95,150,1,False


In [72]:
# Instead of use .isin function you can perform that query
# using regex
df[df["Type 1"].str.contains('Fire|Grass', regex=True, flags=re.IGNORECASE)]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
735,667,Litleo,Fire,Normal,62,50,58,73,54,72,6,False
736,668,Pyroar,Fire,Normal,86,68,72,109,66,106,6,False
740,672,Skiddo,Grass,,66,65,48,62,57,52,6,False
741,673,Gogoat,Grass,,123,100,62,97,81,68,6,False


In [81]:
df[df['Name'].str.contains('^pi[a-z]*', flags=re.IGNORECASE)]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
20,16,Pidgey,Normal,Flying,40,45,40,35,35,56,1,False
21,17,Pidgeotto,Normal,Flying,63,60,55,50,50,71,1,False
22,18,Pidgeot,Normal,Flying,83,80,75,70,70,101,1,False
23,18,PidgeotMega Pidgeot,Normal,Flying,83,80,80,135,80,121,1,False
30,25,Pikachu,Electric,,35,55,40,50,50,90,1,False
136,127,Pinsir,Bug,,65,125,100,55,70,85,1,False
137,127,PinsirMega Pinsir,Bug,Flying,65,155,120,65,90,105,1,False
186,172,Pichu,Electric,,20,40,15,35,35,60,2,False
219,204,Pineco,Bug,,50,65,90,35,35,15,2,False
239,221,Piloswine,Ice,Ground,100,100,80,60,60,50,2,False


In [73]:
# There are also many useful functions
df[df.Name.str.startswith('Al')]
# df[df.Name.str.endswith('J')]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
70,65,Alakazam,Psychic,,55,50,45,135,95,120,1,False
71,65,AlakazamMega Alakazam,Psychic,,55,50,65,175,95,150,1,False
365,334,Altaria,Dragon,Flying,75,70,90,70,105,80,3,False
366,334,AltariaMega Altaria,Dragon,Fairy,75,110,110,110,105,80,3,False
655,594,Alomomola,Water,,165,75,80,40,45,65,5,False


In [74]:
# Use ~ as NOT operator
df[~df.Name.str.endswith('a')]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True


In [75]:
# Filter the higest/lowest values in an specific column
# It's very useful
df.nlargest(3, 'Attack')
# df.nsmallest(2, 'Speed')

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
163,150,MewtwoMega Mewtwo X,Psychic,Fighting,106,190,100,154,100,130,1,True
232,214,HeracrossMega Heracross,Bug,Fighting,80,185,115,40,105,75,2,False
424,383,GroudonPrimal Groudon,Ground,Fire,100,180,160,150,90,90,3,True


In [76]:
# Finally and pretty different we can filter row and columns labels
# For example

df_2 = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
                  index=['mouse', 'rabbit'],
                  columns=['one', 'two', 'three'])
df_2

Unnamed: 0,one,two,three
mouse,1,2,3
rabbit,4,5,6


In [77]:
df_2.filter(regex='e$', axis=1)

Unnamed: 0,one,three
mouse,1,3
rabbit,4,6


In [78]:
df_2.filter(regex='e$', axis=0)

Unnamed: 0,one,two,three
mouse,1,2,3


### Reset Index

In [79]:
# After filtering our table will be with wrong indexes
# so you can fix it using .reset_index like this
df_index = df[df['Sp. Atk'] <= 10]
# Notice that our indexes are not ordered from 0 to 2
df_index

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
230,213,Shuckle,Bug,Rock,20,10,230,10,230,5,2,False
381,349,Feebas,Water,,20,15,20,10,55,80,3,False
486,438,Bonsly,Rock,,50,80,95,10,45,10,4,False


In [80]:
# Drop parameter deletes old index column
# But you can keep both without setting drop=True
df_index.reset_index(drop=True)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,213,Shuckle,Bug,Rock,20,10,230,10,230,5,2,False
1,349,Feebas,Water,,20,15,20,10,55,80,3,False
2,438,Bonsly,Rock,,50,80,95,10,45,10,4,False
