In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)
data = {
    "Product": ["Product_A", "Product_B", "Product_C", "Product_D", "Product_E"] * 4,
    "Region": ["North", "South", "East", "West"] * 5,
    "Sales": [np.random.randint(100, 1000) if i % 3 != 0 else np.nan for i in range(20)],
    "Month": [1, 2, 3, 4] * 5,
}
df = pd.DataFrame(data)
print(df)

      Product Region  Sales  Month
0   Product_A  North    NaN      1
1   Product_B  South  202.0      2
2   Product_C   East  535.0      3
3   Product_D   West    NaN      4
4   Product_E  North  960.0      1
5   Product_A  South  370.0      2
6   Product_B   East    NaN      3
7   Product_C   West  206.0      4
8   Product_D  North  171.0      1
9   Product_E  South    NaN      2
10  Product_A   East  800.0      3
11  Product_B   West  120.0      4
12  Product_C  North    NaN      1
13  Product_D  South  714.0      2
14  Product_E   East  221.0      3
15  Product_A   West    NaN      4
16  Product_B  North  566.0      1
17  Product_C  South  314.0      2
18  Product_D   East    NaN      3
19  Product_E   West  430.0      4


In [4]:
df.isnull().sum()

Product    0
Region     0
Sales      7
Month      0
dtype: int64

In [7]:
df[df.isnull().any(axis=1)]

Unnamed: 0,Product,Region,Sales,Month
0,Product_A,North,,1
3,Product_D,West,,4
6,Product_B,East,,3
9,Product_E,South,,2
12,Product_C,North,,1
15,Product_A,West,,4
18,Product_D,East,,3


In [8]:
df_no_na = df.dropna()
df_no_na_col = df.dropna(axis=1)

In [10]:
df_fill_0 = df.fillna(0)
df_fill_0.head()

Unnamed: 0,Product,Region,Sales,Month
0,Product_A,North,0.0,1
1,Product_B,South,202.0,2
2,Product_C,East,535.0,3
3,Product_D,West,0.0,4
4,Product_E,North,960.0,1


In [12]:
df_fill_avg = df.fillna(df["Sales"].mean())
df_fill_avg.head()

Unnamed: 0,Product,Region,Sales,Month
0,Product_A,North,431.461538,1
1,Product_B,South,202.0,2
2,Product_C,East,535.0,3
3,Product_D,West,431.461538,4
4,Product_E,North,960.0,1


In [13]:
df_interpolate = df.copy()
df_interpolate["Sales"] = df["Sales"].interpolate(method='linear')
print(df_interpolate)

      Product Region  Sales  Month
0   Product_A  North    NaN      1
1   Product_B  South  202.0      2
2   Product_C   East  535.0      3
3   Product_D   West  747.5      4
4   Product_E  North  960.0      1
5   Product_A  South  370.0      2
6   Product_B   East  288.0      3
7   Product_C   West  206.0      4
8   Product_D  North  171.0      1
9   Product_E  South  485.5      2
10  Product_A   East  800.0      3
11  Product_B   West  120.0      4
12  Product_C  North  417.0      1
13  Product_D  South  714.0      2
14  Product_E   East  221.0      3
15  Product_A   West  393.5      4
16  Product_B  North  566.0      1
17  Product_C  South  314.0      2
18  Product_D   East  372.0      3
19  Product_E   West  430.0      4


In [14]:
df_group_fill = df.copy()
df_group_fill["Sales"] = df_group_fill.groupby("Region")["Sales"].transform(lambda x: x.fillna(x.mean()))
print(df_group_fill)


      Product Region       Sales  Month
0   Product_A  North  565.666667      1
1   Product_B  South  202.000000      2
2   Product_C   East  535.000000      3
3   Product_D   West  252.000000      4
4   Product_E  North  960.000000      1
5   Product_A  South  370.000000      2
6   Product_B   East  518.666667      3
7   Product_C   West  206.000000      4
8   Product_D  North  171.000000      1
9   Product_E  South  400.000000      2
10  Product_A   East  800.000000      3
11  Product_B   West  120.000000      4
12  Product_C  North  565.666667      1
13  Product_D  South  714.000000      2
14  Product_E   East  221.000000      3
15  Product_A   West  252.000000      4
16  Product_B  North  566.000000      1
17  Product_C  South  314.000000      2
18  Product_D   East  518.666667      3
19  Product_E   West  430.000000      4
