#### Import relevant libraries

In [1]:
import pandas as pd
from sklearn.impute import KNNImputer

#### Load dataset and subset columns

In [2]:
houseprice_data = pd.read_csv("data/HousingPricesData.csv")

In [3]:
houseprice_data = houseprice_data[['Price', 'Area', 'Room','Lon','Lat']]

#### Inspect first 5 rows and data types of the dataset

In [4]:
houseprice_data.head()

Unnamed: 0,Price,Area,Room,Lon,Lat
0,685000.0,64,3,4.907736,52.356157
1,475000.0,60,3,4.850476,52.348586
2,850000.0,109,4,4.944774,52.343782
3,580000.0,128,6,4.789928,52.343712
4,720000.0,138,5,4.902503,52.410538


In [5]:
houseprice_data.shape

(924, 5)

#### Identify Missing Values

In [6]:
houseprice_data.isnull().sum()

Price    4
Area     0
Room     0
Lon      0
Lat      0
dtype: int64

#### View rows with Missing Values

In [7]:
houseprice_data[houseprice_data['Price'].isnull()]

Unnamed: 0,Price,Area,Room,Lon,Lat
73,,147,3,4.897454,52.360707
321,,366,12,4.787874,52.383877
610,,107,3,4.945022,52.369244
727,,81,3,4.880976,52.389623


#### Identify the index of missing values

In [8]:
missing_values_index = houseprice_data[houseprice_data['Price'].isnull()].index
missing_values_index

Int64Index([73, 321, 610, 727], dtype='int64')

#### Replace Missing Values with the KNN Model values

In [9]:
imputer = KNNImputer(n_neighbors=5)
houseprice_data_knn_imputed = pd.DataFrame(imputer.fit_transform(houseprice_data),columns = houseprice_data.columns)

In [10]:
houseprice_data_knn_imputed.loc[missing_values_index,:]

Unnamed: 0,Price,Area,Room,Lon,Lat
73,1052000.0,147.0,3.0,4.897454,52.360707
321,3856000.0,366.0,12.0,4.787874,52.383877
610,694000.0,107.0,3.0,4.945022,52.369244
727,632000.0,81.0,3.0,4.880976,52.389623
