In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
dataset_path = './Housing_Data/Housing.csv'
df = pd.read_csv(dataset_path)

In [3]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [9]:
categorical_cols = df.select_dtypes(include=['object']).columns
categorical_cols

Index(['mainroad', 'guestroom', 'basement', 'hotwaterheating',
       'airconditioning', 'prefarea', 'furnishingstatus'],
      dtype='object')

In [10]:
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(df[categorical_cols])
encoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols,
    columns = categorical_cols
    )
numerical_df = df.drop(categorical_cols, axis =1)
encoded_df = pd.concat([numerical_df, encoded_categorical_df], axis =1)

In [12]:
numerical_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,13300000,7420,4,2,3,2
1,12250000,8960,4,4,4,3
2,12250000,9960,3,2,2,2
3,12215000,7500,4,2,2,3
4,11410000,7420,4,1,2,2


In [13]:
encoded_categorical_df.head()

Unnamed: 0,mainroad,guestroom,basement,hotwaterheating,airconditioning,prefarea,furnishingstatus
0,1.0,0.0,0.0,0.0,1.0,1.0,0.0
1,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2,1.0,0.0,1.0,0.0,0.0,1.0,1.0
3,1.0,0.0,1.0,0.0,1.0,1.0,0.0
4,1.0,1.0,1.0,0.0,1.0,0.0,0.0


In [11]:
encoded_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking,mainroad,guestroom,basement,hotwaterheating,airconditioning,prefarea,furnishingstatus
0,13300000,7420,4,2,3,2,1.0,0.0,0.0,0.0,1.0,1.0,0.0
1,12250000,8960,4,4,4,3,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2,12250000,9960,3,2,2,2,1.0,0.0,1.0,0.0,0.0,1.0,1.0
3,12215000,7500,4,2,2,3,1.0,0.0,1.0,0.0,1.0,1.0,0.0
4,11410000,7420,4,1,2,2,1.0,1.0,1.0,0.0,1.0,0.0,0.0


In [14]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)

In [15]:
dataset_arr

array([[ 4.56636513,  1.04672629,  1.40341936, ...,  1.4726183 ,
         1.80494113, -1.40628573],
       [ 4.00448405,  1.75700953,  1.40341936, ...,  1.4726183 ,
        -0.55403469, -1.40628573],
       [ 4.00448405,  2.21823241,  0.04727831, ..., -0.67906259,
         1.80494113, -0.09166185],
       ...,
       [-1.61432675, -0.70592066, -1.30886273, ..., -0.67906259,
        -0.55403469,  1.22296203],
       [-1.61432675, -1.03338891,  0.04727831, ..., -0.67906259,
        -0.55403469, -1.40628573],
       [-1.61432675, -0.5998394 ,  0.04727831, ..., -0.67906259,
        -0.55403469,  1.22296203]], shape=(545, 13))

In [16]:
X, y = dataset_arr[:, 1:], dataset_arr[:, 0]

In [17]:
X

array([[ 1.04672629,  1.40341936,  1.42181174, ...,  1.4726183 ,
         1.80494113, -1.40628573],
       [ 1.75700953,  1.40341936,  5.40580863, ...,  1.4726183 ,
        -0.55403469, -1.40628573],
       [ 2.21823241,  0.04727831,  1.42181174, ..., -0.67906259,
         1.80494113, -0.09166185],
       ...,
       [-0.70592066, -1.30886273, -0.57018671, ..., -0.67906259,
        -0.55403469,  1.22296203],
       [-1.03338891,  0.04727831, -0.57018671, ..., -0.67906259,
        -0.55403469, -1.40628573],
       [-0.5998394 ,  0.04727831, -0.57018671, ..., -0.67906259,
        -0.55403469,  1.22296203]], shape=(545, 12))

In [18]:
y

array([ 4.56636513e+00,  4.00448405e+00,  4.00448405e+00,  3.98575468e+00,
        3.55497918e+00,  3.25530927e+00,  2.88072189e+00,  2.88072189e+00,
        2.73088693e+00,  2.69342819e+00,  2.69342819e+00,  2.62974834e+00,
        2.43121702e+00,  2.39375829e+00,  2.39375829e+00,  2.31884081e+00,
        2.31884081e+00,  2.24392333e+00,  2.20646459e+00,  2.18773522e+00,
        2.13154711e+00,  2.09408838e+00,  2.07535901e+00,  2.07535901e+00,
        2.03790027e+00,  2.01917090e+00,  1.97796629e+00,  1.94425342e+00,
        1.94425342e+00,  1.94425342e+00,  1.94425342e+00,  1.94425342e+00,
        1.88806531e+00,  1.83187721e+00,  1.79441847e+00,  1.77351649e+00,
        1.75321385e+00,  1.71950099e+00,  1.71013630e+00,  1.68204225e+00,
        1.66331288e+00,  1.64458351e+00,  1.56966604e+00,  1.56966604e+00,
        1.49474856e+00,  1.49474856e+00,  1.47601919e+00,  1.45728982e+00,
        1.43856045e+00,  1.41983108e+00,  1.41983108e+00,  1.41983108e+00,
        1.38237234e+00,  

In [19]:
test_size = 0.3
random_state = 1
is_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size = test_size,
    random_state = random_state,
    shuffle = is_shuffle
)

In [22]:
regressor_Random_Forest = RandomForestRegressor(random_state=random_state)
regressor_Random_Forest.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [25]:
regressor_AdaBoost = AdaBoostRegressor(random_state=random_state)
regressor_AdaBoost.fit(X_train, y_train)

0,1,2
,estimator,
,n_estimators,50
,learning_rate,1.0
,loss,'linear'
,random_state,1


In [26]:
regressor_Gradient_Boosting = GradientBoostingRegressor(random_state=random_state)
regressor_Gradient_Boosting.fit(X_train, y_train)

0,1,2
,loss,'squared_error'
,learning_rate,0.1
,n_estimators,100
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,3
,min_impurity_decrease,0.0


In [30]:
y_pred = regressor_Random_Forest.predict(X_val)

In [31]:
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

In [33]:
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')

Mean Absolute Error: 0.46093873321571177
Mean Squared Error: 0.37944418523089524
