In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression,SGDRegressor, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

In [2]:
data17 = pd.read_csv("2017c.csv")
data18 = pd.read_csv("2018c.csv")
merge_data = pd.concat([data17, data18])

In [3]:
def remove_outliers(data,col):
    '''removing potential outlier based on reasonably house price'''
    data=data[(data[col] < 160000000) & (data[col] > 35000)]
    Min_Price=data[col].min()
    Max_Price=data[col].max()
    print("The minimum house price is: £{}, while the maximum house price is: £{}." .format(Min_Price,Max_Price))
    return data

In [4]:
def get_dummie(data,cat_feat=None):
    dummie_variables=pd.get_dummies(data[cat_feat], drop_first= True)
    df=data.drop(cat_feat, axis=1,)
    return pd.concat([df,dummie_variables],axis=1)

In [5]:
#x train, y train, x test, y test
def train_test_data(X, Y):
    '''Using sci-kit-learn train_test_split method'''
    X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size = 0.25, random_state=42)
    print('The number of training set is: {}. \nThe number of test set is: {}' .format(train_set.shape[0],test_set.shape[0]) )
    return X_test,test_set

In [6]:
def train_model(model, features_df, target_df, mean_mse, mean_rmse):
    print('Training model')
    neg_mse = cross_val_score(model, features_df, target_df,cv=5, scoring='neg_mean_squared_error')
    mean_mse[model] = np.mean(-neg_mse)
    mean_rmse[model] =np.sqrt(mean_mse[model])

In [7]:
def print_summary(model, mean_rmse):
    print('Summary: ')
    print('\nModel:\n', model)
    print('Average root mean square error is:\n', mean_rmse[model])

In [8]:
def save_results(model, rmse, predictions, feature_importances):
    '''saves model, model summary, feature importances, and predictions'''
    with open('model.txt', 'w') as file:
        file.write(str(model))
    feature_importances.to_csv('feature_importances.csv') 
    np.savetxt('UK_house_price_prediction.csv', predictions, delimiter=',')

In [9]:
merge_data.describe()

Unnamed: 0,price,number_of_rooms,total_floor_area,epc_energy_consumption_current,epc_lighting_cost_current,epc_heating_cost_current,epc_hot_water_cost_current,epc_flat_storey_count,epc_number_open_fireplaces,epc_floor_height,latitude,longitude,closeststop,crimesin3km_antisocial,closestschool,closestpark,address_id,distance_center
count,1978777.0,850133.0,960356.0,960356.0,960356.0,960356.0,960356.0,85957.0,853001.0,525532.0,1888177.0,1888177.0,1866905.0,1887698.0,1787248.0,1746376.0,1978777.0,1888177.0
mean,344198.1,4.564547,90.015271,982.718065,78.93079,701.405613,140.895424,3.168922,0.200909,2.453044,52.35284,-1.365917,311.237,2356.189,483.5813,964.0953,98559890.0,6.190759
std,1942431.0,1.730543,52.947399,4775.125172,40.838095,540.060623,75.444502,2.482652,0.572482,0.605628,1.14413,1.327566,475.6355,2905.756,431.8005,792.546,5385521.0,7.257374
min,1.0,1.0,0.0,-1323.0,-8.0,-801.0,-31.0,0.0,0.0,0.0,49.91321,-6.352647,0.53217,0.0,0.0,3.0,88575930.0,0.003301802
25%,144000.0,3.0,64.0,196.0,54.0,394.0,95.0,2.0,0.0,2.33,51.45793,-2.247356,96.74732,449.0,242.0,412.0,93817030.0,1.796913
50%,226500.0,4.0,81.0,262.0,72.0,594.0,116.0,3.0,0.0,2.4,52.19065,-1.402552,160.5978,1433.0,382.0,712.0,99180910.0,3.754652
75%,352000.0,5.0,104.0,349.0,95.0,852.0,161.0,4.0,0.0,2.51,53.37324,-0.273181,293.8576,3040.0,574.0,1241.0,103615000.0,7.704068
max,707504200.0,91.0,5327.89,416354.0,6760.0,62707.0,1411.0,63.0,75.0,230.0,55.79742,1.758397,4815.169,23074.0,4743.0,4838.0,105613600.0,83.74496


In [10]:
merge_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1978777 entries, 0 to 986564
Data columns (total 43 columns):
 #   Column                          Dtype  
---  ------                          -----  
 0   transaction_id                  object 
 1   created                         object 
 2   duration                        object 
 3   price                           int64  
 4   old_new                         object 
 5   ppd_category                    object 
 6   property_type                   object 
 7   flat                            object 
 8   floor_level                     object 
 9   number_of_rooms                 float64
 10  total_floor_area                float64
 11  epc_current_energy_rating       object 
 12  epc_potential_energy_rating     object 
 13  epc_property_type               object 
 14  epc_built_form                  object 
 15  epc_energy_consumption_current  float64
 16  epc_lighting_cost_current       float64
 17  epc_heating_cost_current    

In [11]:
#Remove outliers
data=remove_outliers(merge_data,'price')

The minimum house price is: £35022, while the maximum house price is: £159477751.


In [12]:
data=data.iloc[:,1:]
data.head()

Unnamed: 0,created,duration,price,old_new,ppd_category,property_type,flat,floor_level,number_of_rooms,total_floor_area,...,building_number,latitude,longitude,postcode_sector,closeststop,crimesin3km_antisocial,closestschool,closestpark,address_id,distance_center
0,2017-06-22,F,86787,Y,B,T,,,,,...,24,53.177751,-1.191828,NG19 8,130.419369,741.0,735.0,674.0,102877980,3.6583
1,2017-10-13,F,805000,N,A,D,,,9.0,173.04,...,3,51.380891,-1.605473,SN8 3,446.805795,10.0,633.0,,97804281,27.205352
2,2017-03-08,F,275000,N,A,D,,,,,...,36,52.469943,-2.134401,DY5 3,310.743404,1576.0,845.0,745.0,101429204,5.659612
3,2017-10-27,L,220000,N,A,T,,,,,...,63,52.377534,-1.762286,B93 8,265.38864,184.0,817.0,686.0,100150188,4.190032
4,2017-05-05,F,237000,N,A,D,,,,105.0,...,1,52.531956,-1.376669,LE10 2,236.354546,643.0,582.0,761.0,93361793,14.267637


In [15]:
merge_data.shape

(1978777, 43)

In [14]:
data.shape

(1957971, 42)

In [17]:
features=['duration', 'old_new', 'ppd_category', 'property_type']
dataset_new = data.drop('city', axis = 1) # drop City column to reduce high cardinality
#transform price to logarithm form
data['price'] = data['price'].apply(lambda x: np.log(x))
#onehote encode the categorical variables
trsfm_data=get_dummie(dataset_new,cat_feat=features)

trsfm_data.head()

Unnamed: 0,created,price,flat,floor_level,number_of_rooms,total_floor_area,epc_current_energy_rating,epc_potential_energy_rating,epc_property_type,epc_built_form,...,closestpark,address_id,distance_center,duration_L,old_new_Y,ppd_category_B,property_type_F,property_type_O,property_type_S,property_type_T
0,2017-06-22,11.371212,,,,,,,,,...,674.0,102877980,3.6583,0,1,1,0,0,0,1
1,2017-10-13,13.598598,,,9.0,173.04,E,D,House,Detached,...,,97804281,27.205352,0,0,0,0,0,0,0
2,2017-03-08,12.524526,,,,,,,,,...,745.0,101429204,5.659612,0,0,0,0,0,0,0
3,2017-10-27,12.301383,,,,,,,,,...,686.0,100150188,4.190032,1,0,0,0,0,0,1
4,2017-05-05,12.375815,,,,105.0,B,A,House,Detached,...,761.0,93361793,14.267637,0,0,0,0,0,0,0


In [19]:
trsfm_data.shape

(1957971, 44)

In [20]:
corr_matrix = trsfm_data.corr()
corr_matrix["price"].sort_values(ascending=False)

price                             1.000000
total_floor_area                  0.431143
number_of_rooms                   0.397383
epc_lighting_cost_current         0.390103
longitude                         0.297868
epc_heating_cost_current          0.265143
epc_number_open_fireplaces        0.209218
property_type_O                   0.139047
closestschool                     0.125287
epc_flat_storey_count             0.087725
address_id                        0.086848
epc_hot_water_cost_current        0.084412
old_new_Y                         0.078672
crimesin3km_antisocial            0.072124
epc_energy_consumption_current    0.044188
closeststop                       0.042566
closestpark                       0.030399
epc_floor_height                  0.016036
ppd_category_B                    0.003201
distance_center                  -0.015806
property_type_F                  -0.044732
property_type_S                  -0.082968
duration_L                       -0.106911
property_ty

In [21]:
for col in trsfm_data.columns:
    print(col)

created
price
flat
floor_level
number_of_rooms
total_floor_area
epc_current_energy_rating
epc_potential_energy_rating
epc_property_type
epc_built_form
epc_energy_consumption_current
epc_lighting_cost_current
epc_heating_cost_current
epc_hot_water_cost_current
epc_flat_storey_count
epc_glazed_area
epc_number_open_fireplaces
epc_windows_description
epc_windows_energy_eff
epc_walls_description
epc_walls_energy_eff
epc_roof_description
epc_roof_energy_eff
epc_floor_height
postcode
street
building_name
building_number
latitude
longitude
postcode_sector
closeststop
crimesin3km_antisocial
closestschool
closestpark
address_id
distance_center
duration_L
old_new_Y
ppd_category_B
property_type_F
property_type_O
property_type_S
property_type_T


In [22]:
trsfm_data.drop(columns=['address_id', 'flat', 'floor_level','epc_current_energy_rating', 
                         'epc_potential_energy_rating','epc_property_type', 'epc_built_form',
                         'epc_glazed_area','epc_windows_description', 'epc_windows_energy_eff', 
                         'epc_walls_description','epc_walls_energy_eff', 'epc_roof_description', 'epc_roof_energy_eff', 
                         'postcode','street', 'building_name', 'building_number', 
                         'postcode_sector'], axis=1, inplace=True)

In [23]:
for col in trsfm_data.columns:
    print(col)

created
price
number_of_rooms
total_floor_area
epc_energy_consumption_current
epc_lighting_cost_current
epc_heating_cost_current
epc_hot_water_cost_current
epc_flat_storey_count
epc_number_open_fireplaces
epc_floor_height
latitude
longitude
closeststop
crimesin3km_antisocial
closestschool
closestpark
distance_center
duration_L
old_new_Y
ppd_category_B
property_type_F
property_type_O
property_type_S
property_type_T


In [25]:
trsfm_data.isnull().sum().sort_values(ascending=False)

epc_flat_storey_count             1873016
epc_floor_height                  1435992
number_of_rooms                   1114461
epc_number_open_fireplaces        1111599
epc_heating_cost_current          1004684
total_floor_area                  1004684
epc_energy_consumption_current    1004684
epc_lighting_cost_current         1004684
epc_hot_water_cost_current        1004684
closestpark                        229871
closestschool                      189803
closeststop                        111317
crimesin3km_antisocial              90751
latitude                            90282
distance_center                     90282
longitude                           90282
price                                   0
property_type_T                         0
property_type_S                         0
duration_L                              0
old_new_Y                               0
ppd_category_B                          0
property_type_F                         0
property_type_O                   

In [26]:
trsfm_data.shape

(1957971, 25)

In [39]:
#median = trsfm_data["epc_flat_storey_count"].median()
#trsfm_data["epc_flat_storey_count"].fillna(median, inplace=True)

#median = trsfm_data["epc_floor_height"].median()
#trsfm_data["epc_floor_height"].fillna(median, inplace=True)

#median = trsfm_data["number_of_rooms"].median()
#trsfm_data["number_of_rooms"].fillna(median, inplace=True)

#median = trsfm_data["epc_number_open_fireplaces"].median()
#trsfm_data["epc_number_open_fireplaces"].fillna(median, inplace=True)

#median = trsfm_data["epc_heating_cost_current"].median()
#trsfm_data["epc_heating_cost_current"].fillna(median, inplace=True)

#median = trsfm_data["total_floor_area"].median()
#trsfm_data["total_floor_area"].fillna(median, inplace=True)

#median = trsfm_data["epc_energy_consumption_current"].median()
#trsfm_data["epc_energy_consumption_current"].fillna(median, inplace=True)

#median = trsfm_data["epc_lighting_cost_current"].median()
#trsfm_data["epc_lighting_cost_current"].fillna(median, inplace=True)

#median = trsfm_data["epc_hot_water_cost_current"].median()
#trsfm_data["epc_hot_water_cost_current"].fillna(median, inplace=True)

#median = trsfm_data["closestpark"].median()
#trsfm_data["closestpark"].fillna(median, inplace=True)

#median = trsfm_data["closestschool"].median()
#trsfm_data["closestschool"].fillna(median, inplace=True)

#median = trsfm_data["closeststop"].median()
#trsfm_data["closeststop"].fillna(median, inplace=True)

#median = trsfm_data["crimesin3km_antisocial"].median()
#trsfm_data["crimesin3km_antisocial"].fillna(median, inplace=True)

#median = trsfm_data["latitude"].median()
#trsfm_data["latitude"].fillna(median, inplace=True)

#median = trsfm_data["latitude"].median()
#trsfm_data["longitude"].fillna(median, inplace=True)

#median = trsfm_data["distance_center"].median()
#trsfm_data["distance_center"].fillna(median, inplace=True)

In [38]:
trsfm_data.dropna(axis=0,inplace=True)

In [28]:
trsfm_data.shape

(79073, 25)

In [29]:
trsfm_data.isnull().sum().sort_values(ascending=False)

property_type_T                   0
latitude                          0
price                             0
number_of_rooms                   0
total_floor_area                  0
epc_energy_consumption_current    0
epc_lighting_cost_current         0
epc_heating_cost_current          0
epc_hot_water_cost_current        0
epc_flat_storey_count             0
epc_number_open_fireplaces        0
epc_floor_height                  0
longitude                         0
property_type_S                   0
closeststop                       0
crimesin3km_antisocial            0
closestschool                     0
closestpark                       0
distance_center                   0
duration_L                        0
old_new_Y                         0
ppd_category_B                    0
property_type_F                   0
property_type_O                   0
created                           0
dtype: int64

In [30]:
trsfm_data=trsfm_data.iloc[:,1:]
trsfm_data.head()

Unnamed: 0,price,number_of_rooms,total_floor_area,epc_energy_consumption_current,epc_lighting_cost_current,epc_heating_cost_current,epc_hot_water_cost_current,epc_flat_storey_count,epc_number_open_fireplaces,epc_floor_height,...,closestschool,closestpark,distance_center,duration_L,old_new_Y,ppd_category_B,property_type_F,property_type_O,property_type_S,property_type_T
82,11.21182,2.0,43.07,295.0,37.0,149.0,121.0,3.0,0.0,2.35,...,715.0,586.0,6.939272,1,0,0,1,0,0,0
92,13.840203,3.0,98.15,191.0,111.0,453.0,107.0,4.0,0.0,2.6,...,410.0,164.0,4.861022,1,0,0,1,0,0,0
96,12.542545,3.0,51.25,370.0,49.0,203.0,152.0,2.0,0.0,2.53,...,288.0,939.0,1.80647,1,0,0,1,0,0,0
100,12.7513,3.0,59.47,424.0,67.0,317.0,209.0,3.0,0.0,2.33,...,321.0,771.0,3.069383,1,0,1,1,0,0,0
134,13.161584,4.0,70.45,210.0,53.0,262.0,126.0,6.0,0.0,2.38,...,445.0,477.0,1.437258,1,0,0,1,0,0,0


In [42]:
trsfm_data.head()

Unnamed: 0,price,number_of_rooms,total_floor_area,epc_energy_consumption_current,epc_lighting_cost_current,epc_heating_cost_current,epc_hot_water_cost_current,epc_flat_storey_count,epc_number_open_fireplaces,epc_floor_height,...,closestschool,closestpark,distance_center,duration_L,old_new_Y,ppd_category_B,property_type_F,property_type_O,property_type_S,property_type_T
82,11.21182,2.0,43.07,295.0,37.0,149.0,121.0,3.0,0.0,2.35,...,715.0,586.0,6.939272,1,0,0,1,0,0,0
92,13.840203,3.0,98.15,191.0,111.0,453.0,107.0,4.0,0.0,2.6,...,410.0,164.0,4.861022,1,0,0,1,0,0,0
96,12.542545,3.0,51.25,370.0,49.0,203.0,152.0,2.0,0.0,2.53,...,288.0,939.0,1.80647,1,0,0,1,0,0,0
100,12.7513,3.0,59.47,424.0,67.0,317.0,209.0,3.0,0.0,2.33,...,321.0,771.0,3.069383,1,0,1,1,0,0,0
134,13.161584,4.0,70.45,210.0,53.0,262.0,126.0,6.0,0.0,2.38,...,445.0,477.0,1.437258,1,0,0,1,0,0,0


In [56]:
X = trsfm_data.drop("price", axis=1)
Y = trsfm_data['price'].reshape(-1,1)

AttributeError: 'Series' object has no attribute 'reshape'

In [49]:
X.shape

(79073, 23)

In [50]:
Y.shape

(79073,)

In [51]:
#train_set, test_set=train_test_data(trsfm_data)

In [52]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size = 0.25, random_state=42)

In [41]:
test_set.head()

Unnamed: 0,price,number_of_rooms,total_floor_area,epc_energy_consumption_current,epc_lighting_cost_current,epc_heating_cost_current,epc_hot_water_cost_current,epc_flat_storey_count,epc_number_open_fireplaces,epc_floor_height,...,closestschool,closestpark,distance_center,duration_L,old_new_Y,ppd_category_B,property_type_F,property_type_O,property_type_S,property_type_T
968673,11.877569,3.0,65.96,225.0,55.0,347.0,86.0,2.0,0.0,2.24,...,514.0,872.0,2.095841,1,0,0,1,0,0,0
407475,14.077875,3.0,86.48,495.0,81.0,821.0,164.0,4.0,0.0,2.4,...,140.0,750.0,3.975426,1,0,1,0,1,0,0
133520,11.849398,6.0,134.11,234.0,125.0,776.0,124.0,5.0,0.0,3.34,...,316.0,823.0,1.758383,1,0,1,1,0,0,0
180318,12.206073,3.0,70.74,214.0,71.0,297.0,134.0,3.0,0.0,2.4,...,290.0,190.0,2.183395,1,0,1,0,1,0,0
600988,12.94801,4.0,65.0,15334.0,82.0,433.0,128.0,1.0,0.0,2.0,...,213.0,462.0,5.242171,0,0,0,0,0,0,0


In [32]:
train_set.head()

Unnamed: 0,price,number_of_rooms,total_floor_area,epc_energy_consumption_current,epc_lighting_cost_current,epc_heating_cost_current,epc_hot_water_cost_current,epc_flat_storey_count,epc_number_open_fireplaces,epc_floor_height,...,closestschool,closestpark,distance_center,duration_L,old_new_Y,ppd_category_B,property_type_F,property_type_O,property_type_S,property_type_T
329450,11.798104,3.0,66.15,213.0,35.0,287.0,182.0,2.0,0.0,2.4,...,92.0,627.0,10.606081,1,0,0,1,0,0,0
311930,12.296827,2.0,46.0,208.0,26.0,87.0,104.0,3.0,0.0,2.3,...,395.0,812.0,5.60794,1,0,0,1,0,0,0
419371,11.163368,4.0,59.51,305.0,58.0,377.0,135.0,2.0,0.0,2.52,...,205.0,770.0,12.356994,0,0,1,0,1,0,0
332863,11.982617,3.0,50.2,308.0,51.0,175.0,134.0,3.0,0.0,2.3,...,322.0,411.0,0.428934,1,0,0,1,0,0,0
523600,12.154779,2.0,41.13,280.0,38.0,319.0,71.0,3.0,1.0,2.2,...,251.0,841.0,1.308336,1,0,0,1,0,0,0


In [34]:
features_df=train_set.iloc[:,1:]
target_df=train_set[['price']]
test_df=test_set.iloc[:,1:]
test_Ac=test_set[['price']]

In [35]:
print('features_df', features_df.shape)
print('target_df', target_df.shape)
print('test_df',test_df.shape)

features_df (55351, 23)
target_df (55351, 1)
test_df (23722, 23)


In [36]:
#initialize model list and dicts
models = []
mean_mse = {}
mean_rmse={}

In [64]:
#create models
LR = LinearRegression()
LR_std_pca = make_pipeline(StandardScaler(), PCA(), LinearRegression())
lasso_reg = make_pipeline(StandardScaler(),Lasso(alpha=0.1))
Elastic_reg=make_pipeline(StandardScaler(),ElasticNet(alpha=0.1, l1_ratio=0.5))
Rf_reg=RandomForestRegressor(n_estimators=150, max_features=0.9, max_depth=25, verbose=0)
GB_reg = GradientBoostingRegressor(n_estimators=150, max_depth=5, loss='ls', verbose=0)

models=[LR, LR_std_pca, lasso_reg, Elastic_reg, Rf_reg,GB_reg]

#models.extend([LR, LR_std_pca, lasso_reg, Elastic_reg, Rf_reg,GB_reg, voting_Reg])

#print("Starts cross validation")
#for model in models:
    
    
#    train_model(model, features_df, target_df, mean_mse, mean_rmse)
#    print_summary(model,mean_rmse)

In [69]:
def model(X_train,X_test,y_train, y_test,model):
    for i in model:
        i.fit(X_train,y_train)
        y_pred = i.predict(X_test)
        mae = MAE(y_test,y_pred)
        mdape = MdAPE(y_test,y_pred)
        print(f"{i} -> MAE: {mae} -> MdAPE: {mdape}")

In [None]:
data19 = pd.read_csv("2019c.csv")

In [None]:
def model(X_train,X_test,y_train, y_test,model):
    for i in model:
        i.fit(X_train,y_train)
        y_pred = i.predict(X_test)
        mae = MAE(y_test,y_pred)
        mdape = MdAPE(y_test,y_pred)
        print(f"{i} -> MAE: {mae} -> MdAPE: {mdape}")

In [70]:
def MAE(y_true, y_pred):
    return np.mean(abs(y_true - y_pred))

def MdAPE(y_true, y_pred):
    return np.median(abs(y_true - y_pred) / y_true) * 100

In [71]:
model(X_train, X_test, y_train, y_test, models)

LinearRegression() -> MAE: 0.36505187028758146 -> MdAPE: 2.4411895342227163
Pipeline(steps=[('standardscaler', StandardScaler()), ('pca', PCA()),
                ('linearregression', LinearRegression())]) -> MAE: 0.3650518702875811 -> MdAPE: 2.4411895342227012
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('lasso', Lasso(alpha=0.1))]) -> MAE: 0.4118464201173965 -> MdAPE: 2.7911547437440034
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('elasticnet', ElasticNet(alpha=0.1))]) -> MAE: 0.3866884632951028 -> MdAPE: 2.594950346306899
RandomForestRegressor(max_depth=25, max_features=0.9, n_estimators=150) -> MAE: 0.19039770409538423 -> MdAPE: 1.0290084787670088
GradientBoostingRegressor(max_depth=5, n_estimators=150) -> MAE: 0.2104319285553203 -> MdAPE: 1.2248305683735439


In [95]:
EPSILON = 1e-10

In [96]:
def _error(actual: np.ndarray, predicted: np.ndarray):
    """ Simple error """
    return actual - predicted

In [97]:
def _percentage_error(actual: np.ndarray, predicted: np.ndarray):
    """
    Percentage error
    Note: result is NOT multiplied by 100
    """
    return _error(actual, predicted) / (actual + EPSILON)

In [98]:
def MAPE(Y_actual,Y_Predicted):
    mape = np.mean(np.abs((Y_actual - Y_Predicted)/Y_actual))*100
    return mape

In [101]:
def mdape(actual: np.ndarray, predicted: np.ndarray):
        return np.median(np.abs(_percentage_error(actual, predicted)))

In [83]:
MAPE(y_test ,y_pred)

1.7280620594672884

In [103]:
mdape(y_test, y_pred)*100

1.2245636525305976

In [59]:
GB_reg.fit(X_train, y_train)

GradientBoostingRegressor(max_depth=5, n_estimators=150)

In [60]:
y_pred = GB_reg.predict(X_test)

In [61]:
MdAPE(y_test, y_pred)

1.2245636525402144