#**Missing Value Imputation**

This code imputes missing values in NUMERICAL feautures in the Titanic dataset

In [1]:
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.datasets import fetch_openml
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')

# Load the Titanic dataset
titanic = fetch_openml('titanic', version=1, as_frame=True)
df1 = titanic.data
df2 = titanic.data

# Print NaNs at the outset
print("\n NaNs before median imputation\n")
print(df1.isnull().sum())  # Check the remaining missing values


# Impute missing values using median for numerical columns
numerical_cols = df1.select_dtypes(include=['float64', 'int64']).columns
df1[numerical_cols] = df1[numerical_cols].apply(lambda x: x.fillna(x.median()), axis=0)

# Print NaN's after median imputation
print("\n NaNs after median imputation\n")
print(df1.isnull().sum())  # Check the remaining missing values

# Create the imputer object
knn_imputer = KNNImputer(n_neighbors=5)

# Since KNNImputer works with numerical data, ensure to select only numerical columns or preprocess categorical data accordingly
df2_numerical = df2.select_dtypes(include=['float64', 'int64'])

# Fit the imputer and transform the data
imputed_data = knn_imputer.fit_transform(df2_numerical)

# Convert the array back to a pandas DataFrame (optional)
df2_imputed = pd.DataFrame(imputed_data, columns=df2_numerical.columns)

# Print NaN's after median imputation
print("\n NaNs after kNN imputation\n")
print(df2_imputed.isnull().sum())  # Check the remaining missing values


 NaNs before median imputation

pclass          0
name            0
sex             0
age           263
sibsp           0
parch           0
ticket          0
fare            1
cabin        1014
embarked        2
boat          823
body         1188
home.dest     564
dtype: int64

 NaNs after median imputation

pclass          0
name            0
sex             0
age             0
sibsp           0
parch           0
ticket          0
fare            0
cabin        1014
embarked        2
boat          823
body            0
home.dest     564
dtype: int64

 NaNs after kNN imputation

pclass    0
age       0
sibsp     0
parch     0
fare      0
body      0
dtype: int64
