# Project 3

## 1) Exploring data

In [94]:
import pandas as pd
import numpy as np
raw_data = pd.read_csv('ENB2012_data.csv')

In [95]:
raw_data.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


In [96]:
raw_data.shape

(768, 10)

In [97]:
raw_data.describe()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,0.764167,671.708333,318.5,176.604167,5.25,3.5,0.234375,2.8125,22.307201,24.58776
std,0.105777,88.086116,43.626481,45.16595,1.75114,1.118763,0.133221,1.55096,10.090196,9.513306
min,0.62,514.5,245.0,110.25,3.5,2.0,0.0,0.0,6.01,10.9
25%,0.6825,606.375,294.0,140.875,3.5,2.75,0.1,1.75,12.9925,15.62
50%,0.75,673.75,318.5,183.75,5.25,3.5,0.25,3.0,18.95,22.08
75%,0.83,741.125,343.0,220.5,7.0,4.25,0.4,4.0,31.6675,33.1325
max,0.98,808.5,416.5,220.5,7.0,5.0,0.4,5.0,43.1,48.03


In [98]:
raw_data.isnull().values.any()

False

In [99]:
x_t1 = raw_data.drop('Y1', axis=1)
x = x_t1.drop('Y2', axis=1)
y_1 = raw_data['Y1']
y_2 = raw_data['Y2']

## 2) Part 1: Running models for Heating and Cooling loads

### 2.1) Prepering data for Heating Load

In [100]:
from sklearn.model_selection import train_test_split

# default is 75% / 25% train-test split
x_train_orig, x_test_orig, y_train, y_test = train_test_split(x, y_1, random_state=0)

In [101]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

x_train = scaler.fit_transform(x_train_orig)
x_test = scaler.transform(x_test_orig)

#### 2.1.1 KNN Regression

In [102]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor()
param_grid = {'n_neighbors':[1, 5, 7, 10, 15]}
grid_search = GridSearchCV(knn, param_grid)
grid_search.fit(x_train, y_train)
print(grid_search.best_params_)
print(grid_search.best_score_)

{'n_neighbors': 7}
0.9278282806768371


In [103]:
from sklearn.metrics import r2_score

knn = KNeighborsRegressor(n_neighbors=7).fit(x_train, y_train)
y_predict = knn.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))

r2_score: 0.92461


#### 2.1.2 Linear Regression

In [104]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression().fit(x_train, y_train)
print("Training score: ",lr.score(x_train , y_train))
y_predict = lr.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))

Training score:  0.9173512129865965
r2_score: 0.91129


#### 2.1.3 Gradient Boosting

In [105]:
from sklearn.ensemble import GradientBoostingRegressor

gb = GradientBoostingRegressor().fit(x_train, y_train)
print('Training Score', gb.score(x_train, y_train))
y_predict = gb.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))

Training Score 0.99844846657622
r2_score: 0.99736


#### 2.1.4 ADABoost

In [106]:
from sklearn.ensemble import AdaBoostRegressor

ada_reg = AdaBoostRegressor(KNeighborsRegressor(n_neighbors=7), 
        n_estimators=200, learning_rate=0.5, random_state=42).fit(x_train, y_train)
print('Training Score', ada_reg.score(x_train, y_train))
y_predict = ada_reg.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))


Training Score 0.9640555509200671
r2_score: 0.95918


#### 2.1.5 Bagging with Decision Tree

In [107]:
from sklearn.ensemble import BaggingRegressor

bag_reg = BaggingRegressor().fit(x_train, y_train)
print('Training Score', bag_reg.score(x_train, y_train))
y_predict = bag_reg.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))



Training Score 0.9995881621221753
r2_score: 0.99688


#### 2.1.6 Deep Learning Resression

In [108]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(13, input_dim=8, kernel_initializer='normal', activation='relu'))
model.add(Dense(6, kernel_initializer = 'normal', activation = 'relu'))
model.add(Dense(1, kernel_initializer='normal'))

model.compile(loss='mse', optimizer='adam' , metrics = ['mse'])
model.fit(x_train, y_train, epochs = 100, batch_size = 20)

y_train_predict = model.predict(x_train)
y_test_predict = model.predict(x_test)
print('Train r2: {:.2f}'.format(r2_score(y_train, y_train_predict)))
print('Test r2: {:.2f}'.format(r2_score(y_test, y_test_predict)))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

### 2.2) Prepering data for Cooling Load

In [109]:
from sklearn.model_selection import train_test_split

# default is 75% / 25% train-test split
x_train_orig, x_test_orig, y_train, y_test = train_test_split(x, y_2, random_state=0)

In [110]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

x_train = scaler.fit_transform(x_train_orig)
x_test = scaler.transform(x_test_orig)

#### 2.2.1 KNN Regression

In [111]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor()
param_grid = {'n_neighbors':[1, 5, 7, 10, 15]}
grid_search = GridSearchCV(knn, param_grid)
grid_search.fit(x_train, y_train)
print(grid_search.best_params_)
print(grid_search.best_score_)

{'n_neighbors': 7}
0.904699837697031


In [112]:
from sklearn.metrics import r2_score

knn = KNeighborsRegressor(n_neighbors=7).fit(x_train, y_train)
y_predict = knn.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))

r2_score: 0.90959


#### 2.2.2 Linear Regression

In [113]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression().fit(x_train, y_train)
print("Training score: ",lr.score(x_train , y_train))
y_predict = lr.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))

Training score:  0.8864209149755682
r2_score: 0.89100


#### 2.2.3 Gradient Boosting

In [114]:
from sklearn.ensemble import GradientBoostingRegressor

gb = GradientBoostingRegressor().fit(x_train, y_train)
print('Training Score', gb.score(x_train, y_train))
y_predict = gb.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))

Training Score 0.981647084317214
r2_score: 0.97542


#### 2.2.4 ADABoost

In [115]:
from sklearn.ensemble import AdaBoostRegressor

ada_reg = AdaBoostRegressor(KNeighborsRegressor(n_neighbors=7), 
        n_estimators=200, learning_rate=0.5, random_state=42).fit(x_train, y_train)
print('Training Score', ada_reg.score(x_train, y_train))
y_predict = ada_reg.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))

Training Score 0.9508164546176986
r2_score: 0.92563


#### 2.2.5 Bagging with Decision Tree

In [116]:
from sklearn.ensemble import BaggingRegressor

bag_reg = BaggingRegressor().fit(x_train, y_train)
print('Training Score', bag_reg.score(x_train, y_train))
y_predict = bag_reg.predict(x_test)
print("r2_score: {:.5f}".format(r2_score(y_test, y_predict)))


Training Score 0.9941123486352204
r2_score: 0.96382


#### 2.2.6 Deep Learning Regression

In [117]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(13, input_dim=8, kernel_initializer='normal', activation='relu'))
model.add(Dense(3, kernel_initializer = 'normal', activation = 'linear'))
model.add(Dense(1, kernel_initializer='normal'))

model.compile(loss='mse', optimizer='adam' , metrics = ['mse'])
model.fit(x_train, y_train, epochs = 100, batch_size = 20)

y_train_predict = model.predict(x_train)
y_test_predict = model.predict(x_test)
print('Train r2: {:.2f}'.format(r2_score(y_train, y_train_predict)))
print('Test r2: {:.2f}'.format(r2_score(y_test, y_test_predict)))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## 3) Part 2: Running classification model for High/Meduim/Low load efficiency

### 3.1 Creating dataset and required variables

In [118]:
raw_data_2 = raw_data

In [119]:
raw_data_2['Y'] = raw_data_2['Y1'] + raw_data_2['Y2']

In [120]:
raw_data_2.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2,Y
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33,36.88
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33,36.88
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33,36.88
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33,36.88
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28,49.12


In [121]:
raw_data_2.describe()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2,Y
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,0.764167,671.708333,318.5,176.604167,5.25,3.5,0.234375,2.8125,22.307201,24.58776,46.894961
std,0.105777,88.086116,43.626481,45.16595,1.75114,1.118763,0.133221,1.55096,10.090196,9.513306,19.484947
min,0.62,514.5,245.0,110.25,3.5,2.0,0.0,0.0,6.01,10.9,16.95
25%,0.6825,606.375,294.0,140.875,3.5,2.75,0.1,1.75,12.9925,15.62,28.75
50%,0.75,673.75,318.5,183.75,5.25,3.5,0.25,3.0,18.95,22.08,40.97
75%,0.83,741.125,343.0,220.5,7.0,4.25,0.4,4.0,31.6675,33.1325,64.335
max,0.98,808.5,416.5,220.5,7.0,5.0,0.4,5.0,43.1,48.03,89.95


In [122]:
def f(row):
    if row['Y'] > 64.335:
        val = 2
    elif row['Y'] < 28.75:
        val = 0
    else:
        val = 1
    return val

raw_data_2['Y_label'] = raw_data_2.apply(f, axis=1)
raw_data_2.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2,Y,Y_label
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33,36.88,1
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33,36.88,1
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33,36.88,1
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33,36.88,1
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28,49.12,1


In [123]:
y_label = raw_data['Y_label']

In [124]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

encoder = LabelEncoder()
encoder.fit(y_label)
encoded_y = encoder.transform(y_label)
y_label_new = np_utils.to_categorical(encoded_y)
y_label_new

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       ...,
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]])

In [125]:
from sklearn.model_selection import train_test_split

x_train_orig, x_test_orig, y_train, y_test = train_test_split(x, y_label_new, random_state=0)

In [126]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

x_train = scaler.fit_transform(x_train_orig)
x_test = scaler.transform(x_test_orig)

### 3.2 Creating, compiling and fitting the Deep Learning model

In [127]:
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu')) #hidden layer
model.add(Dense(3, activation='sigmoid')) #output layer

In [128]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [129]:
# Fit the model
model.fit(x_train, y_train, epochs=100, batch_size=20)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x2c218fdd710>

### 3.3 Model evaluation

In [130]:
# evaluate the model
scores = model.evaluate(x_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


acc: 69.27%
