#### Import relevant libraries

In [1]:
import numpy as np
import pandas as pd

#### Load dataset and subset columns

In [2]:
houseprice_data = pd.read_csv("data/HousingPricesData.csv")

In [3]:
houseprice_data = houseprice_data[['Zip','Price', 'Area', 'Room']]

#### Inspect first 5 rows and data types of the dataset

In [4]:
houseprice_data.head()

Unnamed: 0,Zip,Price,Area,Room
0,1091 CR,685000.0,64,3
1,1059 EL,475000.0,60,3
2,1097 SM,850000.0,109,4
3,1060 TH,580000.0,128,6
4,1036 KN,720000.0,138,5


In [5]:
houseprice_data.shape

(924, 4)

#### Identify Missing Values

In [6]:
houseprice_data.isnull().sum()

Zip      0
Price    4
Area     0
Room     0
dtype: int64

#### View rows with Missing Values

In [7]:
houseprice_data[houseprice_data['Price'].isnull()].head()

Unnamed: 0,Zip,Price,Area,Room
73,1017 VV,,147,3
321,1067 HP,,366,12
610,1019 HT,,107,3
727,1013 CK,,81,3


#### Compute Replacement values

In [8]:
mean = houseprice_data['Price'].mean()
median = houseprice_data['Price'].median()
mode = houseprice_data['Zip'].mode()[0]
print("mean: ",mean,"median: " ,median,"mode: ", mode)

mean:  622065.4195652173 median:  467000.0 mode:  1075 XR


#### Replace Missing Values with the Mean

In [9]:
houseprice_data['price_with_mean'] = houseprice_data['Price'].fillna(mean)
houseprice_data.isnull().sum()

Zip                0
Price              4
Area               0
Room               0
price_with_mean    0
dtype: int64

#### Replace Missing Values with the Median

In [10]:
houseprice_data['price_with_median'] = houseprice_data['Price'].fillna(median)
houseprice_data.isnull().sum()

Zip                  0
Price                4
Area                 0
Room                 0
price_with_mean      0
price_with_median    0
dtype: int64

#### Perform Group Replacement of Missing Values

In [11]:
houseprice_data['group_mean'] = houseprice_data.groupby('Room')['Price'].transform(lambda x: np.nanmean(x))
houseprice_data['group_median'] = houseprice_data.groupby('Room')['Price'].transform(lambda x: np.nanmedian(x))

  """Entry point for launching an IPython kernel.
  overwrite_input=overwrite_input)


#### View all replacements

In [12]:
houseprice_data[houseprice_data['Price'].isnull()]

Unnamed: 0,Zip,Price,Area,Room,price_with_mean,price_with_median,group_mean,group_median
73,1017 VV,,147,3,622065.419565,467000.0,512416.39697,450000.0
321,1067 HP,,366,12,622065.419565,467000.0,,
610,1019 HT,,107,3,622065.419565,467000.0,512416.39697,450000.0
727,1013 CK,,81,3,622065.419565,467000.0,512416.39697,450000.0
