In [1]:
import numpy as np
import pandas as pd

In [None]:
data = {
    'A': [1, 2, np.nan, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, np.nan, np.nan],   
    'D': [13, np.nan, 15, np.nan]
}

df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C,D
0,1.0,5.0,9.0,13.0
1,2.0,,10.0,
2,,7.0,11.0,15.0
3,4.0,8.0,,


In [4]:
df.isna() # this will return a DataFrame of the same shape as df, with True for each cell that is NaN and False for each cell that is not NaN

Unnamed: 0,A,B,C,D
0,False,False,False,False
1,False,True,False,True
2,True,False,False,False
3,False,False,True,True


In [5]:
df.isna().sum() # this will return a Series with the count of NaN values in each column of the DataFrame

A    1
B    1
C    1
D    2
dtype: int64

In [6]:
df.isna().any() # this will return a Series with True for each column that contains at least one NaN value and False for each column that does not contain any NaN values

A    True
B    True
C    True
D    True
dtype: bool

In [7]:
df.dropna() # this will return a new DataFrame with all rows that contain at least one NaN value removed

Unnamed: 0,A,B,C,D
0,1.0,5.0,9.0,13.0


In [8]:
df.dropna(thresh=3) # this will return a new DataFrame with all rows that contain at least 3 non-NaN values removed

Unnamed: 0,A,B,C,D
0,1.0,5.0,9.0,13.0
2,,7.0,11.0,15.0


### fill the missing data

In [9]:
df.fillna(0) # this will return a new DataFrame with all NaN values replaced with 0

Unnamed: 0,A,B,C,D
0,1.0,5.0,9.0,13.0
1,2.0,0.0,10.0,0.0
2,0.0,7.0,11.0,15.0
3,4.0,8.0,0.0,0.0


In [10]:
values = {
    'A': 10,
    'B': 20,
    'C': 30,
    'D': 40
}
df.fillna(value=values) # this will return a new DataFrame with all NaN values replaced with the corresponding value from the values dictionary for each column

Unnamed: 0,A,B,C,D
0,1.0,5.0,9.0,13.0
1,2.0,20.0,10.0,40.0
2,10.0,7.0,11.0,15.0
3,4.0,8.0,30.0,40.0


In [14]:
df

Unnamed: 0,A,B,C,D
0,1.0,5.0,9.0,13.0
1,2.0,,10.0,
2,,7.0,11.0,15.0
3,4.0,8.0,,


In [None]:
mean_values = {
    'A': df['A'].mean(),
    'B': df['B'].mean(),
    'C': df['C'].mean(),
    'D': df['D'].mean()
}
mean_values

{'A': np.float64(2.3333333333333335),
 'B': np.float64(6.666666666666667),
 'C': np.float64(10.0),
 'D': np.float64(14.0)}

In [13]:
df.fillna(df.mean()) # this will return a new DataFrame with all NaN values replaced with the mean of the corresponding column for each column that contains NaN values (only works for numeric columns)

Unnamed: 0,A,B,C,D
0,1.0,5.0,9.0,13.0
1,2.0,6.666667,10.0,14.0
2,2.333333,7.0,11.0,15.0
3,4.0,8.0,10.0,14.0
