In [2]:
import pandas as pd
import numpy as np

people = {
    'first': ['Corey', 'Jane', 'John', 'Chris', np.nan, None, 'NA'], 
    'last': ['Schafer', 'Doe', 'Doe', 'Schafer', np.nan, np.nan, 'Missing'], 
    'email': ['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com', None, np.nan, 'Anonymous@email.com', 'NA'],
    'age': ['33', '55', '63', '36', None, None, 'Missing']
}

In [3]:
df = pd.DataFrame(people)

In [8]:
df.isna()

Unnamed: 0,first,last,email,age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,True,False
4,True,True,True,True
5,True,True,False,True
6,False,False,False,False


In [15]:
# np.nan -> NaN -> BLANK
# None -> None -> BLANK

df.columns = df.columns.str.title()

In [16]:
df

Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
4,,,,
5,,,Anonymous@email.com,
6,,Missing,,Missing


In [21]:
# 🧭🧭 Two major scenarios of null (na) values
# 1 of 2: deal with True Null cells: np.nan or None 处理真空值
# 2 of 2: deal with Fake Non-Null cells: 'NA', 'n/a', 'missing', ... (pretending in string format) 处理伪装成非空的干扰值

# CAUTION: dropna() will drop ALL rows with any missing values -> leave ONLY perfect data 慎用
df.dropna()

Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
6,,Missing,,Missing


In [23]:
df.dropna(axis=0, how='any')

Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
6,,Missing,,Missing


In [24]:
df.dropna(axis=1, how='any')

0
1
2
3
4
5
6


In [27]:
df.dropna(axis=0, how='all')  # ⭐️

Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
5,,,Anonymous@email.com,
6,,Missing,,Missing


In [28]:
df.dropna(axis=1, how='all')

Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
4,,,,
5,,,Anonymous@email.com,
6,,Missing,,Missing


In [22]:
df.columns

Index(['First', 'Last', 'Email', 'Age'], dtype='object')

In [40]:
# Drop any rows where the 3 columns 'First', 'Last', 'Email' are missing 

df.dropna(axis=0, subset=['First', 'Last'], how='all')


Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
6,,Missing,,Missing


In [44]:
df.columns

Index(['First', 'Last', 'Email', 'Age'], dtype='object')

In [45]:
df.dropna(axis=0, subset=['First', 'Last', 'Email'], how='all')

Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
5,,,Anonymous@email.com,
6,,Missing,,Missing


In [50]:
df['First'].isna()

0    False
1    False
2    False
3    False
4     True
5     True
6    False
Name: First, dtype: bool

In [52]:
df['Email'].isna()

0    False
1    False
2    False
3     True
4     True
5    False
6    False
Name: Email, dtype: bool

In [53]:
(df['First'].isna()) & (df['Email'].isna())

0    False
1    False
2    False
3    False
4     True
5    False
6    False
dtype: bool

In [69]:
filt = ((df['First'].isna()) & (df['Email'].isna())) & (df['Email'].isna())

In [66]:
df.isna()

Unnamed: 0,First,Last,Email,Age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,True,False
4,True,True,True,True
5,True,True,False,True
6,False,False,False,False


In [71]:
df[~filt]

Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
5,,,Anonymous@email.com,
6,,Missing,,Missing


In [72]:
df

Unnamed: 0,First,Last,Email,Age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
4,,,,
5,,,Anonymous@email.com,
6,,Missing,,Missing
