### INDEXING DATA FRAMES

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
#reading the data
data = pd.read_csv('pokemon.csv')
data = data.set_index('#')
data.head()

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
5,Charmander,Fire,,39,52,43,60,50,65,1,False


In [6]:
#by using square brackets
data['HP'][1]

45

In [7]:
#by directly entering the column and the row attributes
data.HP[1]

45

In [9]:
# "take the HP column from the 1st row"
data.loc[1, ['HP']]

HP    45
Name: 1, dtype: object

In [11]:
#only selecting the columns
data[['HP', 'Attack']]

Unnamed: 0_level_0,HP,Attack
#,Unnamed: 1_level_1,Unnamed: 2_level_1
1,45,49
2,60,62
3,80,82
4,80,100
5,39,52
...,...,...
796,50,100
797,50,160
798,80,110
799,80,160


### SLICING THE DATAFRAME

In [12]:
#difference between selecting columns, series and dataframes

print(type(data['HP'])) #series
print(type(data[['HP']])) #dataframes

<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>


In [14]:
#slicing and indexing series

data.loc[1:10, 'HP':'Defense'] #select rows from 1 to 10,
                               #and columns from HP to Defense

Unnamed: 0_level_0,HP,Attack,Defense
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,45,49,49
2,60,62,63
3,80,82,83
4,80,100,123
5,39,52,43
6,58,64,58
7,78,84,78
8,78,130,111
9,78,104,78
10,44,48,65


In [19]:
data.loc[10:1:-1, 'HP':'Defense'] #rows from 10 to 1, descending by 1

Unnamed: 0_level_0,HP,Attack,Defense
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,44,48,65
9,78,104,78
8,78,130,111
7,78,84,78
6,58,64,58
5,39,52,43
4,80,100,123
3,80,82,83
2,60,62,63
1,45,49,49


In [21]:
data.loc[1:10, 'Speed':] #from Speed column to the end

Unnamed: 0_level_0,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,45,1,False
2,60,1,False
3,80,1,False
4,80,1,False
5,65,1,False
6,80,1,False
7,100,1,False
8,100,1,False
9,100,1,False
10,43,1,False


### FILTERING DATA FRAMES

In [22]:
boolean = data.HP > 200
data[boolean]

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
122,Chansey,Normal,,250,5,5,35,105,50,1,False
262,Blissey,Normal,,255,10,10,75,135,55,2,False


In [23]:
#combining the filters
first_filter = data.HP > 150
second_filter = data.Speed > 35
data[first_filter & second_filter]

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
122,Chansey,Normal,,250,5,5,35,105,50,1,False
262,Blissey,Normal,,255,10,10,75,135,55,2,False
352,Wailord,Water,,170,90,45,90,45,60,3,False
656,Alomomola,Water,,165,75,80,40,45,65,5,False


In [24]:
#column based filtering
data.HP [data.Speed < 15]

#
231     20
360     45
487     50
496    135
659     44
Name: HP, dtype: int64

### TRANSFORMING DATA

In [27]:
#vanilla python functions
def div(n):
  return n/2
data.HP.apply(div)

#
1      22.5
2      30.0
3      40.0
4      40.0
5      19.5
       ... 
796    25.0
797    25.0
798    40.0
799    40.0
800    40.0
Name: HP, Length: 800, dtype: float64

In [28]:
#with lambda functions
data.HP.apply(lambda n : n/2)

#
1      22.5
2      30.0
3      40.0
4      40.0
5      19.5
       ... 
796    25.0
797    25.0
798    40.0
799    40.0
800    40.0
Name: HP, Length: 800, dtype: float64

In [29]:
#defining column using other columns
data['total_power'] = data.Attack + data.Defense
data.head()

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,total_power
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,98
2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,125
3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,165
4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,223
5,Charmander,Fire,,39,52,43,60,50,65,1,False,95
