## Download dataset and import library

In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [29]:
!gdown 1qeJqFtRdjjHqExbWJcgKy0yJbczTTAE3

Downloading...
From: https://drive.google.com/uc?id=1qeJqFtRdjjHqExbWJcgKy0yJbczTTAE3
To: /content/Housing.csv
  0% 0.00/30.0k [00:00<?, ?B/s]100% 30.0k/30.0k [00:00<00:00, 57.0MB/s]


In [30]:
dataset_path = '/content/Housing.csv'
df = pd.read_csv(dataset_path)
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


## Processing data

In [31]:
categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
print(categorical_cols)

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']


In [32]:
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(df[categorical_cols])

endcoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols,
    columns=categorical_cols
)

numerical_df = df.drop(categorical_cols, axis=1)

encoded_df = pd.concat([numerical_df, endcoded_categorical_df], axis=1)

In [33]:
encoded_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking,mainroad,guestroom,basement,hotwaterheating,airconditioning,prefarea,furnishingstatus
0,13300000,7420,4,2,3,2,1.0,0.0,0.0,0.0,1.0,1.0,0.0
1,12250000,8960,4,4,4,3,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2,12250000,9960,3,2,2,2,1.0,0.0,1.0,0.0,0.0,1.0,1.0
3,12215000,7500,4,2,2,3,1.0,0.0,1.0,0.0,1.0,1.0,0.0
4,11410000,7420,4,1,2,2,1.0,1.0,1.0,0.0,1.0,0.0,0.0


## Normalize data

In [34]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)

print(dataset_arr)

[[ 4.56636513  1.04672629  1.40341936 ...  1.4726183   1.80494113
  -1.40628573]
 [ 4.00448405  1.75700953  1.40341936 ...  1.4726183  -0.55403469
  -1.40628573]
 [ 4.00448405  2.21823241  0.04727831 ... -0.67906259  1.80494113
  -0.09166185]
 ...
 [-1.61432675 -0.70592066 -1.30886273 ... -0.67906259 -0.55403469
   1.22296203]
 [-1.61432675 -1.03338891  0.04727831 ... -0.67906259 -0.55403469
  -1.40628573]
 [-1.61432675 -0.5998394   0.04727831 ... -0.67906259 -0.55403469
   1.22296203]]


## Training time

In [35]:
from operator import is_
# Split X, y
X, y = dataset_arr[:, 1:], dataset_arr[:, 0]

# Split train, val 7:3
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=0.3,
    random_state=1,
    shuffle=True
)

# Training model
# RandomForest
regressor_rf = RandomForestRegressor(random_state=1)
regressor_rf.fit(X_train, y_train)

# AdaBoost
regressor_ab = AdaBoostRegressor(random_state=1)
regressor_ab.fit(X_train, y_train)

# Gradient Boosting
regressor_gb = GradientBoostingRegressor(random_state=1)
regressor_gb.fit(X_train, y_train)

## Evaluate model

In [38]:
y_pred_rf = regressor_rf.predict(X_val)
y_pred_ab = regressor_ab.predict(X_val)
y_pred_gb = regressor_gb.predict(X_val)

mae_rf = mean_absolute_error(y_val, y_pred_rf)
mse_rf = mean_squared_error(y_val, y_pred_rf)
print('Random Forest:')
print(f'MAE: {mae_rf}')
print(f'MSE: {mse_rf}\n')

mae_ab = mean_absolute_error(y_val, y_pred_ab)
mse_ab = mean_squared_error(y_val, y_pred_ab)
print('AdaBoost:')
print(f'MAE: {mae_ab}')
print(f'MSE: {mse_ab}\n')

mae_gb = mean_absolute_error(y_val, y_pred_gb)
mse_gb = mean_squared_error(y_val, y_pred_gb)
print('Gradient Boosting:')
print(f'MAE: {mae_gb}')
print(f'MSE: {mse_gb}')

Random Forest:
MAE: 0.46093873321571177
MSE: 0.37944418523089524

AdaBoost:
MAE: 0.567680019897059
MSE: 0.5739244030038942

Gradient Boosting:
MAE: 0.4516626127750995
MSE: 0.39610445936979427
