In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.DataFrame([[np.nan,1,2,4],
                [4,np.nan,6,2],
                [5,np.nan,np.nan,np.nan],
                [np.nan,np.nan,np.nan,np.nan]],
                columns=list("ABCD")
               )
df

Unnamed: 0,A,B,C,D
0,,1.0,2.0,4.0
1,4.0,,6.0,2.0
2,5.0,,,
3,,,,


In [3]:
df.isnull().sum()

A    2
B    3
C    2
D    2
dtype: int64

In [4]:
df.isna()

Unnamed: 0,A,B,C,D
0,True,False,False,False
1,False,True,False,False
2,False,True,True,True
3,True,True,True,True


In [5]:
pd.isna([df>3])

array([[[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]]])

In [6]:
df.notna()

Unnamed: 0,A,B,C,D
0,False,True,True,True
1,True,False,True,True
2,True,False,False,False
3,False,False,False,False


In [7]:
df.notnull().sum()

A    2
B    1
C    2
D    2
dtype: int64

In [8]:
df

Unnamed: 0,A,B,C,D
0,,1.0,2.0,4.0
1,4.0,,6.0,2.0
2,5.0,,,
3,,,,


In [9]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,0.0,1.0,2.0,4.0
1,4.0,0.0,6.0,2.0
2,5.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0


In [10]:
df.fillna({"A":0,"B":2,"C":3,"D":1})

Unnamed: 0,A,B,C,D
0,0.0,1.0,2.0,4.0
1,4.0,2.0,6.0,2.0
2,5.0,2.0,3.0,1.0
3,0.0,2.0,3.0,1.0


In [11]:
df.info

<bound method DataFrame.info of      A    B    C    D
0  NaN  1.0  2.0  4.0
1  4.0  NaN  6.0  2.0
2  5.0  NaN  NaN  NaN
3  NaN  NaN  NaN  NaN>

In [12]:
df.value_counts()

Series([], dtype: int64)

In [13]:
df.fillna(df.mean())

Unnamed: 0,A,B,C,D
0,4.5,1.0,2.0,4.0
1,4.0,1.0,6.0,2.0
2,5.0,1.0,4.0,3.0
3,4.5,1.0,4.0,3.0


In [14]:
df.fillna(method="ffill")

Unnamed: 0,A,B,C,D
0,,1.0,2.0,4.0
1,4.0,1.0,6.0,2.0
2,5.0,1.0,6.0,2.0
3,5.0,1.0,6.0,2.0


In [15]:
df.dropna()

Unnamed: 0,A,B,C,D


In [16]:
df.dropna(axis=0)

Unnamed: 0,A,B,C,D


In [17]:
df.dropna(axis=1)

0
1
2
3


In [18]:
df["A"].dropna()

1    4.0
2    5.0
Name: A, dtype: float64

In [19]:
df.dropna(thresh=2) #drop rows that have not at least 2 non-NaN values

Unnamed: 0,A,B,C,D
0,,1.0,2.0,4.0
1,4.0,,6.0,2.0


In [20]:
df.dropna(subset=["B"]) # only drop rows where NaN appear in specific columns B

Unnamed: 0,A,B,C,D
0,,1.0,2.0,4.0


In [21]:
df.dropna(how="all") #only drop rows where all columns are NaN

Unnamed: 0,A,B,C,D
0,,1.0,2.0,4.0
1,4.0,,6.0,2.0
2,5.0,,,


In [22]:
df

Unnamed: 0,A,B,C,D
0,,1.0,2.0,4.0
1,4.0,,6.0,2.0
2,5.0,,,
3,,,,


In [23]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

In [24]:
#Imputing missing values
from sklearn.impute import SimpleImputer

In [25]:
imp=SimpleImputer(missing_values=np.nan,strategy="mean")
imp=imp.fit(df)
imputed_df=imp.transform(df.values)

In [26]:
df

Unnamed: 0,A,B,C,D
0,,1.0,2.0,4.0
1,4.0,,6.0,2.0
2,5.0,,,
3,,,,


In [27]:
imputed_df

array([[4.5, 1. , 2. , 4. ],
       [4. , 1. , 6. , 2. ],
       [5. , 1. , 4. , 3. ],
       [4.5, 1. , 4. , 3. ]])