In [1]:
import pandas as pd
import numpy as np

# Sample dataset
data = {
    "age": [25, 30, np.nan, 45, 35, np.nan, 40],
    "salary": [35000, 42000, 50000, np.nan, 58000, 61000, np.nan],
    "city": ["Delhi", "Mumbai", np.nan, "Chennai", "Delhi", np.nan, "Mumbai"],
    "department": ["Sales", "IT", "HR", np.nan, "IT", "HR", "Sales"]
}

df = pd.DataFrame(data)
print("Original Dataset with Missing Values:")
print(df)


Original Dataset with Missing Values:
    age   salary     city department
0  25.0  35000.0    Delhi      Sales
1  30.0  42000.0   Mumbai         IT
2   NaN  50000.0      NaN         HR
3  45.0      NaN  Chennai        NaN
4  35.0  58000.0    Delhi         IT
5   NaN  61000.0      NaN         HR
6  40.0      NaN   Mumbai      Sales


Identify Columns by Data Type

In [2]:
# Numeric columns
num_cols = df.select_dtypes(include=['float64', 'int64']).columns

# Categorical columns
cat_cols = df.select_dtypes(include=['object']).columns

print("Numeric Columns:", num_cols.tolist())
print("Categorical Columns:", cat_cols.tolist())


Numeric Columns: ['age', 'salary']
Categorical Columns: ['city', 'department']


Impute Missing Values

In [3]:
for col in num_cols:
    df[col].fillna(df[col].median(), inplace=True)
for col in cat_cols:
    df[col].fillna(df[col].mode()[0], inplace=True)


Verify Missing Values Are Handled

In [4]:
print("Dataset After Imputation:")
print(df)
print("\nMissing Values per Column:")
print(df.isnull().sum())


Dataset After Imputation:
    age   salary     city department
0  25.0  35000.0    Delhi      Sales
1  30.0  42000.0   Mumbai         IT
2  35.0  50000.0    Delhi         HR
3  45.0  50000.0  Chennai         HR
4  35.0  58000.0    Delhi         IT
5  35.0  61000.0    Delhi         HR
6  40.0  50000.0   Mumbai      Sales

Missing Values per Column:
age           0
salary        0
city          0
department    0
dtype: int64
