In [2]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler

# Step 2: Load the dataset
data = {
    'Size': [2100, 1600, np.nan, 2400, 1800],
    'Bedrooms': [3, np.nan, 4, 3, 2],
    'Bathrooms': [2, 2, 3, 2, 1],
    'Location': [1, 2, 3, np.nan, 2],
    'Age': [10, 5, 8, 20, 15],
    'Price': [500000, 350000, 450000, 600000, 400000]
}

df = pd.DataFrame(data)

# Step 3: Preprocess the data
# Convert categorical variables to numerical format if necessary
# In this example, 'Location' is already numerical

# Standardize the data (excluding the target variable 'Price')
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop('Price', axis=1))

# Step 4: Apply KNN Imputation
imputer = KNNImputer(n_neighbors=3)
imputed_data = imputer.fit_transform(scaled_features)

# Convert the imputed data back to the original scale
imputed_data = scaler.inverse_transform(imputed_data)

# Create a new DataFrame with the imputed data
imputed_df = pd.DataFrame(imputed_data, columns=df.columns[:-1])
imputed_df['Price'] = df['Price']

print("Original DataFrame with Missing Values:")
print(df)
print("\nDataFrame after KNN Imputation:")
print(imputed_df)

# Step 5: Evaluate the result
# For this case study, we simply print the before and after data.
# In a real-world scenario, further analysis and modeling would follow.


Original DataFrame with Missing Values:
     Size  Bedrooms  Bathrooms  Location  Age   Price
0  2100.0       3.0          2       1.0   10  500000
1  1600.0       NaN          2       2.0    5  350000
2     NaN       4.0          3       3.0    8  450000
3  2400.0       3.0          2       NaN   20  600000
4  1800.0       2.0          1       2.0   15  400000

DataFrame after KNN Imputation:
          Size  Bedrooms  Bathrooms  Location   Age   Price
0  2100.000000       3.0        2.0       1.0  10.0  500000
1  1600.000000       3.0        2.0       2.0   5.0  350000
2  2033.333333       4.0        3.0       3.0   8.0  450000
3  2400.000000       3.0        2.0       2.0  20.0  600000
4  1800.000000       2.0        1.0       2.0  15.0  400000
