In [55]:
# installing required packages
import pandas as pd 
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [29]:
# Reading data
Energy_eff  = pd.read_csv("ENB2012_data.csv")

In [30]:
# Print data
Energy_eff
Energy_eff.columns.values

array(['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'Y1', 'Y2'],
      dtype=object)

In [31]:
# Rename the columns
Energy_eff =Energy_eff.rename(columns = {
      "X1" : "Relative_Compact", "X2" : "Surf_Area", "X3" : "Wall_Area" , "X4" : "Roof_Area", "X5" : "Overall_Ht","X6" : "Orien","X7" : "Glazing_Area","X8" : "Glazing_Area_Dist","Y1" : "Heat_Load","Y2" : "Cool_Load" 
    })

print(Energy_eff)

     Relative_Compact  Surf_Area  Wall_Area  Roof_Area  Overall_Ht  Orien  \
0                0.98      514.5      294.0     110.25         7.0      2   
1                0.98      514.5      294.0     110.25         7.0      3   
2                0.98      514.5      294.0     110.25         7.0      4   
3                0.98      514.5      294.0     110.25         7.0      5   
4                0.90      563.5      318.5     122.50         7.0      2   
5                0.90      563.5      318.5     122.50         7.0      3   
6                0.90      563.5      318.5     122.50         7.0      4   
7                0.90      563.5      318.5     122.50         7.0      5   
8                0.86      588.0      294.0     147.00         7.0      2   
9                0.86      588.0      294.0     147.00         7.0      3   
10               0.86      588.0      294.0     147.00         7.0      4   
11               0.86      588.0      294.0     147.00         7.0      5   

In [32]:
Energy_eff.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 10 columns):
Relative_Compact     768 non-null float64
Surf_Area            768 non-null float64
Wall_Area            768 non-null float64
Roof_Area            768 non-null float64
Overall_Ht           768 non-null float64
Orien                768 non-null int64
Glazing_Area         768 non-null float64
Glazing_Area_Dist    768 non-null int64
Heat_Load            768 non-null float64
Cool_Load            768 non-null float64
dtypes: float64(8), int64(2)
memory usage: 60.1 KB


In [33]:
Energy_eff.isnull().sum()

Relative_Compact     0
Surf_Area            0
Wall_Area            0
Roof_Area            0
Overall_Ht           0
Orien                0
Glazing_Area         0
Glazing_Area_Dist    0
Heat_Load            0
Cool_Load            0
dtype: int64

In [38]:
# Data prtitioning
nr = Normalizer(copy=False)

X_set = nr.fit_transform(Energy_eff.drop(['Heat_Load','Cool_Load'], axis=1))
y_set = Energy_eff[['Heat_Load','Cool_Load']]


X_Train_EE, X_Test_EE, y_Train_EE, y_Test_EE = train_test_split(X_set, y_set, test_size = 0.3, random_state = 141)

In [41]:
# RandomForest Model:
RF_model = RandomForestRegressor(n_estimators = 500, random_state = 143)

In [42]:
RF_model.fit(X_Train_EE, y_Train_EE)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=None,
           oob_score=False, random_state=143, verbose=0, warm_start=False)

In [45]:
pred_RF_model = RF_model.predict(X_Test_EE)
print(pred_RF_model)

[[11.15542 14.4782 ]
 [16.77344 16.302  ]
 [36.13724 37.00518]
 [35.03528 35.71262]
 [16.6474  17.13042]
 [11.39338 14.81832]
 [17.80156 24.50912]
 [10.71584 13.98548]
 [24.26012 25.75836]
 [10.2647  13.46842]
 [15.02956 17.9969 ]
 [12.86356 16.00246]
 [26.23828 28.2142 ]
 [32.43036 33.81632]
 [11.23374 14.18434]
 [19.65636 25.51154]
 [39.82922 39.58394]
 [36.25102 37.00056]
 [36.67628 37.07914]
 [11.27282 14.12028]
 [10.74862 13.93802]
 [31.8538  35.99384]
 [14.27974 17.01904]
 [32.6679  33.44346]
 [32.2897  33.96968]
 [32.75002 34.03792]
 [14.94424 15.66408]
 [16.98076 20.17094]
 [14.95206 15.80426]
 [13.84294 16.39762]
 [29.52822 29.97918]
 [32.6048  34.93488]
 [32.01564 35.6358 ]
 [25.23878 28.37028]
 [28.80858 31.17186]
 [14.2858  15.2293 ]
 [12.21174 15.2783 ]
 [36.56654 37.016  ]
 [14.40956 16.83924]
 [32.94602 34.44036]
 [20.0164  25.33176]
 [14.18288 16.10608]
 [31.90914 33.64282]
 [16.7395  20.2722 ]
 [29.37286 29.49506]
 [16.95436 20.59778]
 [39.50146 42.01904]
 [11.2761  13

In [63]:
# Absolute error
err = abs(pred_RF_model - y_Test_EE)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(err), 2))

Mean Absolute Error: Heat_Load    0.32
Cool_Load    1.16
dtype: float64


In [66]:
# cal MAPE
MAP_percent = 100*(err / y_Test_EE)

Unnamed: 0,Heat_Load,Cool_Load
273,0.138420,1.387955
767,0.801923,1.696818
453,0.858052,0.666975
725,0.072208,7.762885
668,1.077110,1.363432
229,0.233100,1.011043
13,2.249052,14.208388
173,0.335581,1.564851
145,0.573279,1.005534
216,1.832341,2.033485


In [67]:
# Accuracy
ACC = 100 - np.mean(MAP_percent)
print('Accuracy of Random Forest :', round(ACC, 2))

Accuracy of Random Forest : Heat_Load    98.61
Cool_Load    96.06
dtype: float64
