### Indexing, Selecting & Filtering the Data in Pandas

In [15]:
import pandas as pd

In [16]:
obj_series =  pd.Series([0,1,2,3,4],index=['a','b','c','d','e'])

In [17]:
obj_series

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [18]:
type(obj_series)

pandas.core.series.Series

In [19]:
obj_series['b']

1

In [20]:
obj_series[3]

3

In [21]:
obj_series[0:]

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [22]:
obj_series1 =  pd.Series([10,11,12,13,14],index=['a','b','c','d','e'])

In [23]:
obj_series1[0:]

a    10
b    11
c    12
d    13
e    14
dtype: int64

In [24]:
obj_series1[0:3]

a    10
b    11
c    12
dtype: int64

In [25]:
obj_series1[:4]

a    10
b    11
c    12
d    13
dtype: int64

In [26]:
obj_series1['a':'c']

a    10
b    11
c    12
dtype: int64

In [27]:
obj_series1['a']

10

In [29]:
obj_series1[['a','c']]

a    10
c    12
dtype: int64

In [30]:
obj_series1[['a','d','c']]

a    10
d    13
c    12
dtype: int64

In [31]:
obj_series1[obj_series1 < 2]

Series([], dtype: int64)

In [32]:
obj_series1[obj_series1 < 13]

a    10
b    11
c    12
dtype: int64

In [33]:
obj_series1['b':'d'] = 199

In [34]:
obj_series1

a     10
b    199
c    199
d    199
e     14
dtype: int64

### DataFrame Indexing

In [35]:
import numpy as np

In [36]:
np.arange(16)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [37]:
data = pd.DataFrame(np.arange(16).reshape(4,4),
                   index = ['London','Paris','Berlin','India'],
                   columns = ['one','two','three','four'])

In [38]:
data

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7
Berlin,8,9,10,11
India,12,13,14,15


In [39]:
data['two']

London     1
Paris      5
Berlin     9
India     13
Name: two, dtype: int32

In [40]:
data['one']

London     0
Paris      4
Berlin     8
India     12
Name: one, dtype: int32

In [41]:
data[['one','three']]

Unnamed: 0,one,three
London,0,2
Paris,4,6
Berlin,8,10
India,12,14


In [42]:
data[0:]

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7
Berlin,8,9,10,11
India,12,13,14,15


In [43]:
data[0:3]

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7
Berlin,8,9,10,11


In [44]:
data[:2]

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7


In [45]:
data[data['two'] > 2]

Unnamed: 0,one,two,three,four
Paris,4,5,6,7
Berlin,8,9,10,11
India,12,13,14,15


In [46]:
data[data<5] = 199

In [47]:
data

Unnamed: 0,one,two,three,four
London,199,199,199,199
Paris,199,5,6,7
Berlin,8,9,10,11
India,12,13,14,15


### Selecting with iloc and loc

In [48]:
data.iloc[1]

one      199
two        5
three      6
four       7
Name: Paris, dtype: int32

In [49]:
data.iloc[1,2]

6

In [50]:
data.iloc[1,[1,2]]

two      5
three    6
Name: Paris, dtype: int32

In [51]:
data.iloc[[0,2]]

Unnamed: 0,one,two,three,four
London,199,199,199,199
Berlin,8,9,10,11


In [52]:
data.iloc[[0,2],[1,2]]

Unnamed: 0,two,three
London,199,199
Berlin,9,10


In [53]:
data.loc['London',['one','two']]

one    199
two    199
Name: London, dtype: int32

In [54]:
data.loc['Berlin',['one','two']]

one    8
two    9
Name: Berlin, dtype: int32

### Arithemetic Operations

In [55]:
s1 = pd.Series(np.arange(4),index=['a','c','d','e'])

In [56]:
s2 = pd.Series(np.arange(5),index = ['a','c','b','f','g'])

In [57]:
print(s1)
print(s2)

a    0
c    1
d    2
e    3
dtype: int32
a    0
c    1
b    2
f    3
g    4
dtype: int32


In [58]:
s1 + s2

a    0.0
b    NaN
c    2.0
d    NaN
e    NaN
f    NaN
g    NaN
dtype: float64

In [59]:
df1 = pd.DataFrame(np.arange(6).reshape(2,3),
                  columns=list('ABC'),
                  index=['Tim','Tom'])

In [60]:
df2 = pd.DataFrame(np.arange(9).reshape(3,3),
                  columns=list('ACD'),
                  index = ['Tim','Kate','Tom'])

In [61]:
print(df1)

     A  B  C
Tim  0  1  2
Tom  3  4  5


In [62]:
print(df2)

      A  C  D
Tim   0  1  2
Kate  3  4  5
Tom   6  7  8


In [63]:
df1 + df2

Unnamed: 0,A,B,C,D
Kate,,,,
Tim,0.0,,3.0,
Tom,9.0,,12.0,


In [67]:
df3 =  df1.add(df2)

In [65]:
df1

Unnamed: 0,A,B,C
Tim,0,1,2
Tom,3,4,5


In [66]:
df2

Unnamed: 0,A,C,D
Tim,0,1,2
Kate,3,4,5
Tom,6,7,8


In [68]:
df3

Unnamed: 0,A,B,C,D
Kate,,,,
Tim,0.0,,3.0,
Tom,9.0,,12.0,


In [69]:
df4 =  df1.add(df2,fill_value = 0)

In [70]:
df4

Unnamed: 0,A,B,C,D
Kate,3.0,,4.0,5.0
Tim,0.0,1.0,3.0,2.0
Tom,9.0,4.0,12.0,8.0


In [71]:
2/df3

Unnamed: 0,A,B,C,D
Kate,,,,
Tim,inf,,0.666667,
Tom,0.222222,,0.166667,


In [72]:
3/df4

Unnamed: 0,A,B,C,D
Kate,1.0,,0.75,0.6
Tim,inf,3.0,1.0,1.5
Tom,0.333333,0.75,0.25,0.375


In [73]:
df4 * 4

Unnamed: 0,A,B,C,D
Kate,12.0,,16.0,20.0
Tim,0.0,4.0,12.0,8.0
Tom,36.0,16.0,48.0,32.0


In [74]:
df1.mul(3)

Unnamed: 0,A,B,C
Tim,0,3,6
Tom,9,12,15


In [75]:
df1

Unnamed: 0,A,B,C
Tim,0,1,2
Tom,3,4,5


In [76]:
x =  df2.iloc[1]
x

A    3
C    4
D    5
Name: Kate, dtype: int32

In [77]:
df2-x

Unnamed: 0,A,C,D
Tim,-3,-3,-3
Kate,0,0,0
Tom,3,3,3


In [78]:
x1 = df2['A']
x1

Tim     0
Kate    3
Tom     6
Name: A, dtype: int32

In [79]:
df2.sub(x1,axis='index')

Unnamed: 0,A,C,D
Tim,0,1,2
Kate,0,1,2
Tom,0,1,2


### Applying a Function

In [80]:
df = pd.DataFrame(np.random.randn(4,3),
                 columns = list('ABC'),
                 index = ['John','Smith','Jack','Adam'])

In [81]:
df

Unnamed: 0,A,B,C
John,4.069722,-0.383224,-1.355654
Smith,1.547024,-0.493322,0.3743
Jack,1.728754,0.703218,-1.034061
Adam,0.628552,-0.067021,0.553984


In [82]:
np.abs(df)

Unnamed: 0,A,B,C
John,4.069722,0.383224,1.355654
Smith,1.547024,0.493322,0.3743
Jack,1.728754,0.703218,1.034061
Adam,0.628552,0.067021,0.553984


In [83]:
s1 = lambda x:x.max() - x.min()

In [84]:
s1

<function __main__.<lambda>(x)>

In [85]:
df.apply(s1)

A    3.441171
B    1.196541
C    1.909639
dtype: float64

In [86]:
df.apply(s1,axis=0) # Here axis = 0 represents the Columns

A    3.441171
B    1.196541
C    1.909639
dtype: float64

In [87]:
df.apply(s1,axis=1) # Here axis =1 represents the row

John     5.425377
Smith    2.040346
Jack     2.762815
Adam     0.695573
dtype: float64