In [3]:
#We will now take a look at the indexing, selecting and filtering methods applocable on Series and DataFrames

import pandas as pd
import numpy as np
from pandas import Series, DataFrame

series_obj = Series(np.arange(5.), index = ['a','b','c','d','e'])
series_obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [4]:
frames_obj = DataFrame({'July':Series([12,24], index = [2001,2003]), 'December':Series([13,24], index = [2003,1967])})
frames_obj

Unnamed: 0,July,December
1967,,24.0
2001,12.0,
2003,24.0,13.0


In [9]:
#Series Indexing
#Same as in ndarray but indexes can also be called out with their Series index names

series_obj[1]

1.0

In [6]:
series_obj['d']

3.0

In [10]:
series_obj[2:4]

c    2.0
d    3.0
dtype: float64

In [11]:
series_obj[['a','b','e']]

a    0.0
b    1.0
e    4.0
dtype: float64

In [12]:
series_obj[[1,4]]

b    1.0
e    4.0
dtype: float64

In [13]:
series_obj[series_obj%2 == 0]

a    0.0
c    2.0
e    4.0
dtype: float64

In [18]:
#In case of slicing with Series indexes, the endpoint is inclusive
series_obj['b':'d']

b    3.0
c    3.0
d    3.0
dtype: float64

In [19]:
#We can use assignment operation to assign values like we did for ndarrays
series_obj['b':'c'] = 5
series_obj

a    0.0
b    5.0
c    5.0
d    3.0
e    4.0
dtype: float64

In [21]:
#DataFrame Indexing
#Selects/Retrieves one or more columns from given DataFrame

frames_obj

Unnamed: 0,July,December
1967,,24.0
2001,12.0,
2003,24.0,13.0


In [23]:
frames_obj['July']

1967     NaN
2001    12.0
2003    24.0
Name: July, dtype: float64

In [25]:
frames_obj[['July','December']]

Unnamed: 0,July,December
1967,,24.0
2001,12.0,
2003,24.0,13.0


In [27]:
#Using above indexing method, we can perform certain special methods like

#Retrieving rows by slicing
frames_obj[1:]

Unnamed: 0,July,December
2001,12.0,
2003,24.0,13.0


In [29]:
#Retrieving rows by Boolean array
frames_obj[frames_obj['July']%8!=0]

Unnamed: 0,July,December
1967,,24.0
2001,12.0,


In [31]:
#Boolean comparison can create a new Boolean DataFrame from an old one
frames_obj < 15

Unnamed: 0,July,December
1967,False,False
2001,True,False
2003,False,True


In [41]:
#We can use that to our advantage to put certain values where condition is False
#Suppose we want to represent all the values that are NaN as 0

frames_obj[frames_obj.isnull() == True] = 0
frames_obj

Unnamed: 0,July,December
1967,0.0,24.0
2001,12.0,0.0
2003,24.0,13.0


In [44]:
#For label indexing in DataFrames, we use the special indexing field .loc as shown below

frames_obj.loc[2001, ['July']]

July    12.0
Name: 2001, dtype: float64

In [45]:
frames_obj.loc[[1967,2003], ['July','December'] ]

Unnamed: 0,July,December
1967,0.0,24.0
2003,24.0,13.0


In [48]:
#In case, we have to refer to an integer index value, we use .iloc
frames_obj.iloc[1]

July        12.0
December     0.0
Name: 2001, dtype: float64

In [56]:
frames_obj.loc[:2003, 'December']

1967    24.0
2001     0.0
2003    13.0
Name: December, dtype: float64

In [63]:
frames_obj.loc[frames_obj.July>=12]

Unnamed: 0,July,December
2001,12.0,0.0
2003,24.0,13.0


In [67]:
#Some other methods to perform these operations for DataFrames and Series are as follows:

#xs method: selects single row or column as a Series by label value
frames_obj.xs(2001)

July        12.0
December     0.0
Name: 2001, dtype: float64

In [72]:
#at: Selects single value by row and column label
frames_obj.at[2001, 'July']

12.0

In [73]:
#iat: Selects single value by row and column integer labels
frames_obj.iat[1,0]

12.0