In [7]:
import numpy as np
import pandas as pd

In [9]:
from numpy.random import randn

In [11]:
np.random.seed(101)

In [13]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])


In [15]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Coditional Selection using bracket notation

In [22]:
df > 0 # Returning where in the data frame is greater then 0

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [24]:
booldf = df > 0

In [26]:
booldf

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


Passing the booldf into the original data frame returns values where true and NaN where false 

In [31]:
df[booldf]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


Another shorter way to do it is df[df>0]

In [36]:
df[df>0] #

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [38]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [63]:
df['W']>0 # we get a series back. When we do the conditions on colums

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [44]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

Notice that the two above match. At C the value is less then 0 so it returns False

In [61]:
df[df['W']>0] # Passing in a series means you wont be getting those NaN values anymore. You only get them when you are doing a condition on an entire data frame.

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [65]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


### Return all the rows in the Data Frame where Z is less the zero

In [68]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [72]:
df[df['Z']<0] 

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [78]:
resultdf= df[df['W']>0] 

In [80]:
resultdf

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [84]:
resultdf['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

The above can all be done in one step by following the below.

In [91]:
df[df['W']>0]['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [100]:
df[df['W']>0][['Y','X']]

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


In [106]:
Boolser = df['W']>0
result = df[Boolser]

In [104]:
Boolser

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [110]:
result

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [114]:
result[['Y','X']] # You can Call specific colums

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


In [116]:
mycols = ['Y','X']

In [128]:
result[mycols]

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


# Breaking the steps down
* This method takes up more memory

In [135]:
boolser = df['W']>0
result =df[boolser]
mycols = ['Y','X']
result[mycols]

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


## Using Multiple conditions
* Use the '&' as opposed to using the 'and' .
* For the 'OR' operation you can replace the '&' with "|"

In [143]:
df[(df['W']>0) & (df['W']>1)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826


In [147]:
df[(df['W']>0) | (df['W']>1)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Resting the index

In [153]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [161]:
df.reset_index() # if you want the changes to happen inplace then you add the (inplace=True)

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


## Setting index

In [168]:
newind = 'CA NY WY OR CO'.split()

In [170]:
newind

['CA', 'NY', 'WY', 'OR', 'CO']

In [184]:
df['States'] = newind

In [186]:
df

Unnamed: 0,W,X,Y,Z,Sattes,Sates,States
A,2.70685,0.628133,0.907969,0.503826,CA,CA,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY,NY,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY,WY,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR,OR,OR
E,0.190794,1.978757,2.605967,0.683509,CO,CO,CO


## Changing the index to an already existing colum
* To do that we use the set function

In [189]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z,Sattes,Sates
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CA,2.70685,0.628133,0.907969,0.503826,CA,CA
NY,0.651118,-0.319318,-0.848077,0.605965,NY,NY
WY,-2.018168,0.740122,0.528813,-0.589001,WY,WY
OR,0.188695,-0.758872,-0.933237,0.955057,OR,OR
CO,0.190794,1.978757,2.605967,0.683509,CO,CO


In [191]:
df

Unnamed: 0,W,X,Y,Z,Sattes,Sates,States
A,2.70685,0.628133,0.907969,0.503826,CA,CA,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY,NY,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY,WY,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR,OR,OR
E,0.190794,1.978757,2.605967,0.683509,CO,CO,CO


In [197]:
df.drop('Sattes', axis=1, inplace=True)

In [None]:
## 

In [199]:
df

Unnamed: 0,W,X,Y,Z,Sates
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO
