Ref: https://towardsdatascience.com/8-methods-for-handling-missing-values-with-python-pandas-842544cdf891

In [22]:
import numpy as np
import pandas as pd

df = pd.DataFrame({
    "Date":pd.date_range(start="2021-10-31", periods=10, freq='D'),
    "Item":1014,
    "Mesuare_1":np.random.randint(1,10,size=10),
    "Mesuare_2":np.random.random(10).round(2),
    "Mesuare_3":np.random.random(10).round(2),
    "Mesuare_4":np.random.randn(10)
})

df.loc[[2,9], "Item"] = np.nan
df.loc[[2,7,9], "Mesuare_1"] = np.nan
df.loc[[2,3], "Mesuare_2"] = np.nan
df.loc[[2], "Mesuare_3"] = np.nan
df.loc[:6, "Mesuare_4"] = np.nan

df = df.astype({
    "Item": pd.Int64Dtype(),
    "Mesuare_1": pd.Int64Dtype()})

df.head()

Unnamed: 0,Date,Item,Mesuare_1,Mesuare_2,Mesuare_3,Mesuare_4
0,2021-10-31,1014.0,6.0,0.42,0.05,
1,2021-11-01,1014.0,6.0,0.21,0.24,
2,2021-11-02,,,,,
3,2021-11-03,1014.0,1.0,,0.45,
4,2021-11-04,1014.0,6.0,0.59,0.99,


### 1. Drop rows or columns that have a missing value

In [23]:
df_copy = df.copy()
df_copy.dropna()
df_copy.dropna(axis=1) # drop columns that have at least one missing value by using the axis parameter.

Unnamed: 0,Date
0,2021-10-31
1,2021-11-01
2,2021-11-02
3,2021-11-03
4,2021-11-04
5,2021-11-05
6,2021-11-06
7,2021-11-07
8,2021-11-08
9,2021-11-09


### 2. Drop rows or columns that only have missing values

In [26]:
# Since the data frame does not have a row full of missing values, no row has been dropped.
df_copy.dropna(how="all")

Unnamed: 0,Date,Item,Mesuare_1,Mesuare_2,Mesuare_3,Mesuare_4
0,2021-10-31,1014.0,6.0,0.42,0.05,
1,2021-11-01,1014.0,6.0,0.21,0.24,
2,2021-11-02,,,,,
3,2021-11-03,1014.0,1.0,,0.45,
4,2021-11-04,1014.0,6.0,0.59,0.99,
5,2021-11-05,1014.0,7.0,0.5,0.54,
6,2021-11-06,1014.0,6.0,0.01,0.83,
7,2021-11-07,1014.0,,0.89,0.45,0.757517
8,2021-11-08,1014.0,7.0,0.83,0.28,0.163417
9,2021-11-09,,,0.31,0.67,-2.088547


### 3. Drop rows or columns based on a threshold value

In [27]:
df_copy.dropna(thresh=4)

Unnamed: 0,Date,Item,Mesuare_1,Mesuare_2,Mesuare_3,Mesuare_4
0,2021-10-31,1014.0,6.0,0.42,0.05,
1,2021-11-01,1014.0,6.0,0.21,0.24,
3,2021-11-03,1014.0,1.0,,0.45,
4,2021-11-04,1014.0,6.0,0.59,0.99,
5,2021-11-05,1014.0,7.0,0.5,0.54,
6,2021-11-06,1014.0,6.0,0.01,0.83,
7,2021-11-07,1014.0,,0.89,0.45,0.757517
8,2021-11-08,1014.0,7.0,0.83,0.28,0.163417
9,2021-11-09,,,0.31,0.67,-2.088547


### 4. Drop based on a particular subset of columns

In [30]:
df_copy.dropna(subset=["Mesuare_2", "Mesuare_3"])

Unnamed: 0,Date,Item,Mesuare_1,Mesuare_2,Mesuare_3,Mesuare_4
0,2021-10-31,1014.0,6.0,0.42,0.05,
1,2021-11-01,1014.0,6.0,0.21,0.24,
4,2021-11-04,1014.0,6.0,0.59,0.99,
5,2021-11-05,1014.0,7.0,0.5,0.54,
6,2021-11-06,1014.0,6.0,0.01,0.83,
7,2021-11-07,1014.0,,0.89,0.45,0.757517
8,2021-11-08,1014.0,7.0,0.83,0.28,0.163417
9,2021-11-09,,,0.31,0.67,-2.088547


### 5. Fill with a constant value

In [32]:
values = {"Item":1014, "Mesuare_1":1}
df_copy.fillna(value=values)

Unnamed: 0,Date,Item,Mesuare_1,Mesuare_2,Mesuare_3,Mesuare_4
0,2021-10-31,1014,6,0.42,0.05,
1,2021-11-01,1014,6,0.21,0.24,
2,2021-11-02,1014,1,,,
3,2021-11-03,1014,1,,0.45,
4,2021-11-04,1014,6,0.59,0.99,
5,2021-11-05,1014,7,0.5,0.54,
6,2021-11-06,1014,6,0.01,0.83,
7,2021-11-07,1014,1,0.89,0.45,0.757517
8,2021-11-08,1014,7,0.83,0.28,0.163417
9,2021-11-09,1014,1,0.31,0.67,-2.088547


### 6. Fill with an aggregated value

In [33]:
df_copy["Mesuare_2"].fillna(df_copy["Mesuare_2"].mean())

0    0.42
1    0.21
2    0.47
3    0.47
4    0.59
5    0.50
6    0.01
7    0.89
8    0.83
9    0.31
Name: Mesuare_2, dtype: float64

### 7. Replace with the previous or next value

In [34]:
df_copy.fillna(method='bfill')

Unnamed: 0,Date,Item,Mesuare_1,Mesuare_2,Mesuare_3,Mesuare_4
0,2021-10-31,1014.0,6.0,0.42,0.05,0.757517
1,2021-11-01,1014.0,6.0,0.21,0.24,0.757517
2,2021-11-02,1014.0,1.0,0.59,0.45,0.757517
3,2021-11-03,1014.0,1.0,0.59,0.45,0.757517
4,2021-11-04,1014.0,6.0,0.59,0.99,0.757517
5,2021-11-05,1014.0,7.0,0.5,0.54,0.757517
6,2021-11-06,1014.0,6.0,0.01,0.83,0.757517
7,2021-11-07,1014.0,7.0,0.89,0.45,0.757517
8,2021-11-08,1014.0,7.0,0.83,0.28,0.163417
9,2021-11-09,,,0.31,0.67,-2.088547


In [39]:
# If we set the limit parameter as 1, then a missing value can only be replaced with its next value.
df_copy.fillna(method='bfill', limit=1)

Unnamed: 0,Date,Item,Mesuare_1,Mesuare_2,Mesuare_3,Mesuare_4
0,2021-10-31,1014.0,6.0,0.42,0.05,
1,2021-11-01,1014.0,6.0,0.21,0.24,
2,2021-11-02,1014.0,1.0,0.59,0.45,
3,2021-11-03,1014.0,1.0,0.59,0.45,0.757517
4,2021-11-04,1014.0,6.0,0.59,0.99,0.757517
5,2021-11-05,1014.0,7.0,0.5,0.54,0.757517
6,2021-11-06,1014.0,6.0,0.01,0.83,0.757517
7,2021-11-07,1014.0,7.0,0.89,0.45,0.757517
8,2021-11-08,1014.0,7.0,0.83,0.28,0.163417
9,2021-11-09,,,0.31,0.67,-2.088547


### 8. Fill by using another data frame

In [None]:
df_copy.fillna(df2)