In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [11]:
np.random.seed(101)

In [62]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['w','x','y','z']) # DataFrame(data,row,column)
df

Unnamed: 0,w,x,y,z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


# Conditional Selection

In [13]:
df > 0

Unnamed: 0,w,x,y,z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [14]:
booldf = df>0
booldf

Unnamed: 0,w,x,y,z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [15]:
df[booldf]

Unnamed: 0,w,x,y,z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [16]:
df[df>0] # the same as above

Unnamed: 0,w,x,y,z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [17]:
df['w']>0

A     True
B     True
C    False
D     True
E     True
Name: w, dtype: bool

In [19]:
df[df['w']>0] # it won't return row C

Unnamed: 0,w,x,y,z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [20]:
# grab all rows where z is less than zero

In [28]:
df[df['z']<0]

Unnamed: 0,w,x,y,z
C,-2.018168,0.740122,0.528813,-0.589001


In [29]:
# balik dulu lagi

In [31]:
resultdf = df[df['w']>0] # it won't return row C
resultdf

Unnamed: 0,w,x,y,z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [32]:
resultdf['x']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: x, dtype: float64

In [34]:
# you don't have to do it in 2 steps, instead:
df[df['w']>0]['x'] # since it's a dataframe, we can stack the comment

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: x, dtype: float64

In [35]:
df[df['w']>0][['y','z']]

Unnamed: 0,y,z
A,0.907969,0.503826
B,-0.848077,0.605965
D,-0.933237,0.955057
E,2.605967,0.683509


In [36]:
# example

In [39]:
boolser = df['w']>0
result = df[boolser]
mycols = ['y','x']
result[mycols]

Unnamed: 0,y,x
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


In [40]:
df[df['w']>0][['y','x']] # you can do it in one line
# df[df['column']<>=num] [[list]]

Unnamed: 0,y,x
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


# Multiple conditions

In [42]:
df[(df['w']>0) and df['y']>1] # 'The truth value of a Series is ambiguous'

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [44]:
df['w']>0
# and operator begins to get confuse

A     True
B     True
C    False
D     True
E     True
Name: w, dtype: bool

In [45]:
# 'and' can only deal with these single instances; it'll evaluate the boolean as a series(?)
True and True

True

In [49]:
# hence, here's how it would work
df

Unnamed: 0,w,x,y,z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [51]:
df[(df['w']>0) & (df['y']>1)] # and

Unnamed: 0,w,x,y,z
E,0.190794,1.978757,2.605967,0.683509


In [52]:
df[(df['w']>0) | (df['y']<0)] # or

Unnamed: 0,w,x,y,z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [55]:
df[(df['y']<0) | (df['w']>0)] # or

Unnamed: 0,w,x,y,z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


# Index

resetting the index or setting it into something else

In [86]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['w','x','y','z']) # DataFrame(data,row,column)
df

Unnamed: 0,w,x,y,z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [87]:
df.reset_index()

Unnamed: 0,index,w,x,y,z
0,A,-1.467514,-0.494095,-0.162535,0.485809
1,B,0.392489,0.221491,-0.855196,1.54199
2,C,0.666319,-0.538235,-0.568581,1.407338
3,D,0.641806,-0.9051,-0.391157,1.028293
4,E,-1.972605,-0.866885,0.720788,-1.223082


In [88]:
df

Unnamed: 0,w,x,y,z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [89]:
df.reset_index(inplace=True) # hence your old index become a column on the dataframe
df

Unnamed: 0,index,w,x,y,z
0,A,-1.467514,-0.494095,-0.162535,0.485809
1,B,0.392489,0.221491,-0.855196,1.54199
2,C,0.666319,-0.538235,-0.568581,1.407338
3,D,0.641806,-0.9051,-0.391157,1.028293
4,E,-1.972605,-0.866885,0.720788,-1.223082


In [90]:
df.drop('index',1,inplace=True)
df

Unnamed: 0,w,x,y,z
0,-1.467514,-0.494095,-0.162535,0.485809
1,0.392489,0.221491,-0.855196,1.54199
2,0.666319,-0.538235,-0.568581,1.407338
3,0.641806,-0.9051,-0.391157,1.028293
4,-1.972605,-0.866885,0.720788,-1.223082


In [91]:
newind = 'JKT BDG SMG SBY CLG'.split()
newind

['JKT', 'BDG', 'SMG', 'SBY', 'CLG']

In [92]:
df['Ibu Kota'] = newind
df

Unnamed: 0,w,x,y,z,Ibu Kota
0,-1.467514,-0.494095,-0.162535,0.485809,JKT
1,0.392489,0.221491,-0.855196,1.54199,BDG
2,0.666319,-0.538235,-0.568581,1.407338,SMG
3,0.641806,-0.9051,-0.391157,1.028293,SBY
4,-1.972605,-0.866885,0.720788,-1.223082,CLG


In [93]:
df.set_index('Ibu Kota') # this method overwrite your old index and not be able to retain the info as a new column

Unnamed: 0_level_0,w,x,y,z
Ibu Kota,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
JKT,-1.467514,-0.494095,-0.162535,0.485809
BDG,0.392489,0.221491,-0.855196,1.54199
SMG,0.666319,-0.538235,-0.568581,1.407338
SBY,0.641806,-0.9051,-0.391157,1.028293
CLG,-1.972605,-0.866885,0.720788,-1.223082


In [94]:
df # the default still has not move the 'ibu kota' column into the index, you have to set the inplace into 'True'

Unnamed: 0,w,x,y,z,Ibu Kota
0,-1.467514,-0.494095,-0.162535,0.485809,JKT
1,0.392489,0.221491,-0.855196,1.54199,BDG
2,0.666319,-0.538235,-0.568581,1.407338,SMG
3,0.641806,-0.9051,-0.391157,1.028293,SBY
4,-1.972605,-0.866885,0.720788,-1.223082,CLG


In [95]:
df.set_index('Ibu Kota',inplace=True)

In [96]:
df

Unnamed: 0_level_0,w,x,y,z
Ibu Kota,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
JKT,-1.467514,-0.494095,-0.162535,0.485809
BDG,0.392489,0.221491,-0.855196,1.54199
SMG,0.666319,-0.538235,-0.568581,1.407338
SBY,0.641806,-0.9051,-0.391157,1.028293
CLG,-1.972605,-0.866885,0.720788,-1.223082
