In [1]:
import pandas as pd
import numpy as np

In [2]:
people = {
    'first':['Corey', 'Jane', 'John', 'Chris', np.nan, None, 'NA'],
    'last':['Schafer', 'Doe', 'Doe', 'Schafer', np.nan, np.nan, 'Missing'],
    'email':['corey@gmail.com', 'jane@gmail.com', 'john@gmail.com', None, np.nan, 'anonymous@gmail.com', 'NA'],
    'age':['33', '55', '63', '36', None, None, 'Missing']
}

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,corey@gmail.com,33
1,Jane,Doe,jane@gmail.com,55
2,John,Doe,john@gmail.com,63
3,Chris,Schafer,,36
4,,,,
5,,,anonymous@gmail.com,
6,,Missing,,Missing


## dropna()
Syntax:<br>
dropna()
<br>

Default arguments: <br>
dropna(axis = 'index', how = 'any', inplace = False)<br>
returns a DataFrame obj

'index' means look up by the rows<br>
'any' means, drop if even any of the columns in the row has missing value<br>
---> This means, drop any row that has any num of missing value, be it just 1 or many <br>

We could set this to:<br>
dropna(axis = 'index', how = 'all')<br>
---> This would drop only rows where all the values(columns) are missing<br>


We could set this to:<br>
dropna(axis = 'columns',  how = 'any')<br>
---> This would drop the column where even a single value is missing<br>

We could set this to:<br>
dropna(axis = 'columns', how = 'all')
---> This would drop the columns where all the values are missing<br>


<b>NOTE: </b><br>
The axis parameter takes two possible arguments:<br>
index or columns (alternatively, 0 or 1)

The how paramenter takes two possible arguments:<br>
any or all

In [5]:
df.dropna()

Unnamed: 0,first,last,email,age
0,Corey,Schafer,corey@gmail.com,33
1,Jane,Doe,jane@gmail.com,55
2,John,Doe,john@gmail.com,63
6,,Missing,,Missing


## Row-wise dropping
(axis = 'index' or 0)

In [9]:
df.dropna()

Unnamed: 0,first,last,email,age
0,Corey,Schafer,corey@gmail.com,33
1,Jane,Doe,jane@gmail.com,55
2,John,Doe,john@gmail.com,63
6,,Missing,,Missing


### Dropping when 'any' of the values in a row is missing
(how = 'any')

In [7]:
#Alternative for above
#Same as above
df.dropna(axis = 'index', how = 'any')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,corey@gmail.com,33
1,Jane,Doe,jane@gmail.com,55
2,John,Doe,john@gmail.com,63
6,,Missing,,Missing


The row at index 6 despite having all the values missing, was not dropped, because the missing values specified in there are user-custom and not understood by pandas

In [8]:
#Alternative for above
#Same as above
df.dropna(axis = 0 , how = 'any')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,corey@gmail.com,33
1,Jane,Doe,jane@gmail.com,55
2,John,Doe,john@gmail.com,63
6,,Missing,,Missing


### Dropping only when all of the values in the row are missing
(how = all)

In [10]:
df.dropna(axis = 'index', how = 'all')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,corey@gmail.com,33
1,Jane,Doe,jane@gmail.com,55
2,John,Doe,john@gmail.com,63
3,Chris,Schafer,,36
5,,,anonymous@gmail.com,
6,,Missing,,Missing


In the above output, the row at index 6 is not being dropped because it has some custom Missing values which are not recognized by pandas

## Column-wise dropping
(axis = 'columns' or 1)

### Dropping when 'any' of the values in a column is missing
(how = 'any')

In [12]:
df.dropna(axis = 'columns', how = 'any')
#This will return a data frame with only indexes because all the columns have at least one 

0
1
2
3
4
5
6


##### All the columns were dropped because every column has at least one missing value

### Dropping only when 'all' of the values in a column are missing
(how = 'all')

In [13]:
df.dropna(axis = 'columns', how = 'all')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,corey@gmail.com,33
1,Jane,Doe,jane@gmail.com,55
2,John,Doe,john@gmail.com,63
3,Chris,Schafer,,36
4,,,,
5,,,anonymous@gmail.com,
6,,Missing,,Missing


In the above output, not a single column was dropped because there is not a single column where all the values are missing