### Indexing, Selecting & Filtering the Data in Pandas

In [1]:
import pandas as pd

In [2]:
obj_series =  pd.Series([0,1,2,3,4],index=['a','b','c','d','e'])

In [3]:
obj_series

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [4]:
type(obj_series)

pandas.core.series.Series

In [6]:
obj_series['b']

1

In [7]:
obj_series[3]

3

In [8]:
obj_series[0:]

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [9]:
obj_series1 =  pd.Series([10,11,12,13,14],index=['a','b','c','d','e'])

In [10]:
obj_series1[0:]

a    10
b    11
c    12
d    13
e    14
dtype: int64

In [11]:
obj_series1[0:3]

a    10
b    11
c    12
dtype: int64

In [12]:
obj_series1[:4]

a    10
b    11
c    12
d    13
dtype: int64

In [14]:
obj_series1['a':'c']

a    10
b    11
c    12
dtype: int64

In [15]:
obj_series1['a']

10

In [16]:
obj_series1['a','c']

KeyError: ('a', 'c')

In [17]:
obj_series1[['a','c']]

a    10
c    12
dtype: int64

In [18]:
obj_series1[['a','d','c']]

a    10
d    13
c    12
dtype: int64

In [19]:
obj_series1[obj_series1 < 2]

Series([], dtype: int64)

In [20]:
obj_series1[obj_series1 < 13]

a    10
b    11
c    12
dtype: int64

In [21]:
obj_series1['b':'d'] = 199

In [22]:
obj_series1

a     10
b    199
c    199
d    199
e     14
dtype: int64

### DataFrame Indexing

In [23]:
import numpy as np

In [25]:
np.arange(16)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [26]:
data = pd.DataFrame(np.arange(16).reshape(4,4),
                   index = ['London','Paris','Berlin','India'],
                   columns = ['one','two','three','four'])

In [27]:
data

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7
Berlin,8,9,10,11
India,12,13,14,15


In [28]:
data['two']

London     1
Paris      5
Berlin     9
India     13
Name: two, dtype: int32

In [29]:
data['one']

London     0
Paris      4
Berlin     8
India     12
Name: one, dtype: int32

In [31]:
data[['one','three']]

Unnamed: 0,one,three
London,0,2
Paris,4,6
Berlin,8,10
India,12,14


In [32]:
data[0:]

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7
Berlin,8,9,10,11
India,12,13,14,15


In [33]:
data[0:3]

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7
Berlin,8,9,10,11


In [34]:
data[:2]

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7


In [36]:
data[data['two'] > 2]

Unnamed: 0,one,two,three,four
Paris,4,5,6,7
Berlin,8,9,10,11
India,12,13,14,15


In [41]:
data[data<5] = 199

In [42]:
data

Unnamed: 0,one,two,three,four
London,199,199,199,199
Paris,199,5,6,7
Berlin,8,9,10,11
India,12,13,14,15


### Selecting with iloc and loc

In [43]:
data.iloc[1]

one      199
two        5
three      6
four       7
Name: Paris, dtype: int32

In [44]:
data.iloc[1,2]

6

In [45]:
data.iloc[1,[1,2]]

two      5
three    6
Name: Paris, dtype: int32

In [46]:
data.iloc[[0,2]]

Unnamed: 0,one,two,three,four
London,199,199,199,199
Berlin,8,9,10,11


In [47]:
data.iloc[[0,2],[1,2]]

Unnamed: 0,two,three
London,199,199
Berlin,9,10


In [48]:
data.loc['London',['one','two']]

one    199
two    199
Name: London, dtype: int32

In [49]:
data.loc['Berlin',['one','two']]

one    8
two    9
Name: Berlin, dtype: int32

### Arithemetic Operations

In [50]:
s1 = pd.Series(np.arange(4),index=['a','c','d','e'])

In [51]:
s2 = pd.Series(np.arange(5),index = ['a','c','b','f','g'])

In [52]:
print(s1)
print(s2)

a    0
b    1
c    2
d    3
dtype: int32
a    0
c    1
b    2
f    3
g    4
dtype: int32


In [53]:
s1 + s2

a    0.0
b    3.0
c    3.0
d    NaN
f    NaN
g    NaN
dtype: float64

In [58]:
df1 = pd.DataFrame(np.arange(6).reshape(2,3),
                  columns=list('ABC'),
                  index=['Tim','Tom'])

In [59]:
df2 = pd.DataFrame(np.arange(9).reshape(3,3),
                  columns=list('ACD'),
                  index = ['Tim','Kate','Tom'])

In [60]:
print(df1)

     A  B  C
Tim  0  1  2
Tom  3  4  5


In [61]:
print(df2)

      A  C  D
Tim   0  1  2
Kate  3  4  5
Tom   6  7  8


In [62]:
df1 + df2

Unnamed: 0,A,B,C,D
Kate,,,,
Tim,0.0,,3.0,
Tom,9.0,,12.0,
