In [5]:
import numpy as np
import pandas as pd

In [7]:
from numpy.random import randn

In [14]:
np.random.seed(101)

### Syntax:  df = pd.DataFrame(data, index, columns)

In [15]:
df = pd.DataFrame(randn(5,4), ['A','B','C','D','E'], ['W','X','Y','Z'])

In [16]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


# 1. Conditional Selection on entire DataFrame ( here df )

In [17]:
booldf = df > 0

In [18]:
booldf

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [20]:
df[booldf]   # True = Values  &  False = NaN

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


### NOTE:-  When performing Conditional Selection on the entire DataFrame (here 'df '),  the 'False' output values are displayed as 'NaN'.

In [22]:
# above steps can be done in a single step as follows:-

df[df > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


# 2. Conditional Selection on Column

In [23]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


#### Example No.1:  Display all the rows in DataFrame, where column 'W' > 0

In [24]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [25]:
df['W']>0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [26]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


### NOTE:-  When performing Conditional Selection on a column,  the 'False' output values are NOT displayed.

#### Example No.2:  Display all the rows in DataFrame, where column 'Z' < 0

In [27]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [29]:
df['Z']<0

A    False
B    False
C     True
D    False
E    False
Name: Z, dtype: bool

In [30]:
df[df['Z']<0]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


#### Example No.3:  Display all the rows in DataFrame, where column 'Y' > 0  and further grab column 'X'

In [31]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [34]:
df[df['Y']>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001
E,0.190794,1.978757,2.605967,0.683509


In [36]:
df[df['Y']>0]['X']

A    0.628133
C    0.740122
E    1.978757
Name: X, dtype: float64

#### Example No.4:  Display all the rows in DataFrame, where column 'X' < 0  and further grab columns 'W' & 'Z'

In [37]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [39]:
df[df['X']<0]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [40]:
df[df['X']<0][['W', 'Z']]

Unnamed: 0,W,Z
B,0.651118,0.605965
D,0.188695,0.955057


# 3. Multi-Conditional Selections
### Syntax:  NOTE-  put seperate conditions in seperate parantheses
### 1.  df [ ( condn1 )  &  ( condn2 ) ]
### 2.  df [ ( condn1 )  |  ( condn2 ) ]   
## NOTE: Here we use '&' (ampersand) and not 'and'. Also we use '|' (pipe) and not 'or' 


#### Example No.1: Display all rows in DataFrame, where column 'W' > 0  AND  'Z' > 0

In [42]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [43]:
df[(df['W']>0) & (df['Z']>0)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


#### Example No.2: Display all rows in DataFrame, where column 'X' > 0  OR  'Y' > 1

In [44]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [45]:
df[(df['X']>0) | (df['Y']>1)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001
E,0.190794,1.978757,2.605967,0.683509


# 4. Resetting index in DataFrame
#### NOTE:  To permanently update changes, provide the ' inplace = True ' parameter.
#### by-default it is set to False, so changes will not be updated.

In [46]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [47]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


# 5. Setting new index in DataFrame
#### NOTE:  To permanently update changes, provide the ' inplace = True ' parameter.  
#### by-default it is set to False, so changes will not be updated.

In [62]:
newind = 'IND RUS USA JAP EGY'.split()
# basically assigning a list of new indexes to a variable
# NOTE: dimensions of new index must match with pre-existing index

In [63]:
newind

['IND', 'RUS', 'USA', 'JAP', 'EGY']

In [64]:
df['Countries'] = newind

In [65]:
df

Unnamed: 0,W,X,Y,Z,Countries
A,2.70685,0.628133,0.907969,0.503826,IND
B,0.651118,-0.319318,-0.848077,0.605965,RUS
C,-2.018168,0.740122,0.528813,-0.589001,USA
D,0.188695,-0.758872,-0.933237,0.955057,JAP
E,0.190794,1.978757,2.605967,0.683509,EGY


In [66]:
df.set_index('Countries')

Unnamed: 0_level_0,W,X,Y,Z
Countries,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
IND,2.70685,0.628133,0.907969,0.503826
RUS,0.651118,-0.319318,-0.848077,0.605965
USA,-2.018168,0.740122,0.528813,-0.589001
JAP,0.188695,-0.758872,-0.933237,0.955057
EGY,0.190794,1.978757,2.605967,0.683509


In [68]:
df
# index doesnt update, unless 'inplace=True' is specified in arguement

Unnamed: 0,W,X,Y,Z,Countries
A,2.70685,0.628133,0.907969,0.503826,IND
B,0.651118,-0.319318,-0.848077,0.605965,RUS
C,-2.018168,0.740122,0.528813,-0.589001,USA
D,0.188695,-0.758872,-0.933237,0.955057,JAP
E,0.190794,1.978757,2.605967,0.683509,EGY
