# Implementation: Handling Missing Data

We will use Scikit-Learn's `SimpleImputer` and `KNNImputer`.

In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer, KNNImputer

# Create data with missing values
data = {
    'Age': [25, 27, np.nan, 32, 29],
    'Salary': [50000, np.nan, 60000, 90000, 75000],
    'Siblings': [1, 0, 2, np.nan, 1]
}
df = pd.DataFrame(data)
print("Original Data:")
display(df)

## 1. Simple Imputation (Mean)

In [None]:
imputer_mean = SimpleImputer(strategy='mean')
df_mean = pd.DataFrame(imputer_mean.fit_transform(df), columns=df.columns)

print("Mean Imputed:")
display(df_mean)

## 2. KNN Imputation
Uses distance to other samples to estimate the value.

In [None]:
imputer_knn = KNNImputer(n_neighbors=2)
df_knn = pd.DataFrame(imputer_knn.fit_transform(df), columns=df.columns)

print("KNN Imputed:")
display(df_knn)
print("Notice how values might differ from simple mean, as they account for row similarity.")