<h2>Building a Regression Model in Keras</h2>

In [22]:
import pandas as pd
import numpy as np

import sklearn
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense

## Download and read data into a pandas dataframe.

In [23]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


# $PART A:$ $BASELINE$ $MODEL$

### Seperate the Data into Predictors and Target

In [24]:
predictors = concrete_data.drop(columns=['Strength'])
target = concrete_data['Strength']

### Setting up the number of columns

In [25]:
n_cols = predictors.shape[1]

### Setting up the Neural Network model with one hidden layer and Adam optimizer

In [26]:
def regression_model():
    # creating model
    model = Sequential()
    
    # one hidden layer
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compiling the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### Repeating the training and evaluation 50 times

In [27]:
n_repeats = 50
mse_list = []

for i in range(n_repeats):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size= 0.3, random_state= 42)
    
    model = regression_model()
    
    # fitting the model and predicting
    model.fit(X_train, y_train, epochs=50, verbose=0)
    y_p = model.predict(X_test)
    
    mean_squared_error = sklearn.metrics.mean_squared_error(y_test, y_p)
    mse_list.append(mean_squared_error)

mean_mse = np.mean(mse_list)
std_mse= np.std(mse_list)



### Reporting the mean and standard deviation of Mean Squared Errors

In [28]:
Report = {"Metrics":["MEAN_MSE","STD_MSE"],"Result": 
    [mean_mse,std_mse]}
pd.DataFrame(Report)

Unnamed: 0,Metrics,Result
0,MEAN_MSE,285.589836
1,STD_MSE,293.512829


# $PART B:$ $Normalize$ $the$ $data $

### Normalising the predictors

In [29]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


### Setting up the number of predictors

In [30]:
n_cols2 = predictors_norm.shape[1]

### Setting up the Neural Network model with one hidden layer and Adam optimizer

In [31]:
def regression_model():
    # creating model
    model = Sequential()
    
    # one hidden layer
    model.add(Dense(10, activation='relu', input_shape=(n_cols2,)))
    model.add(Dense(1))
    
    # compiling the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### Repeating the training and evaluation 50 times

In [32]:
n_repeats = 50
mse_list_norm = []

for i in range(n_repeats):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size= 0.3, random_state= 42)
    
    model = regression_model()
    
    # fitting the model and predicting
    model.fit(X_train, y_train, epochs=50, verbose=0)
    y_p = model.predict(X_test)
    
    mean_squared_error_n = sklearn.metrics.mean_squared_error(y_test, y_p)
    mse_list_norm.append(mean_squared_error_n)

mean_mse_n = np.mean(mse_list_norm)
std_mse_n= np.std(mse_list_norm)




### Reporting the mean and standard deviation of Mean Squared Errors

In [33]:
Report = {"Metrics":["MEAN_MSE_normalised)","STD_MSE_normalised"],"Result": 
    [mean_mse_n,std_mse_n]}
pd.DataFrame(Report)

Unnamed: 0,Metrics,Result
0,MEAN_MSE_normalised),368.316776
1,STD_MSE_normalised,90.742066


### Comparison of the Mean of MSE

In [34]:
print(f'Original Data: {mean_mse}')
print(f'Normalized Data: {mean_mse_n}')

Original Data: 285.5898358078848
Normalized Data: 368.31677643308984


# $PART C:$ $Increate$ $the$ $number$ $of$ $epochs$

### Repeating Part B but with 100 epochs

In [35]:
n_repeats = 50
mse_list_norm_100Ep = []

for i in range(n_repeats):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size= 0.3, random_state= 42)
    
    model = regression_model()
    
    # fitting the model and predicting
    model.fit(X_train, y_train, epochs=100, verbose=0)
    y_p = model.predict(X_test)
    
    mean_squared_error_n_100EP = sklearn.metrics.mean_squared_error(y_test, y_p)
    mse_list_norm_100Ep.append(mean_squared_error_n_100EP)

mean_mse_n_100Ep = np.mean(mse_list_norm_100Ep)
std_mse_n_100Ep= np.std(mse_list_norm_100Ep)



### Reporting the mean and standard deviation of Mean Squared Errors

In [36]:
Report = {"Metrics":["MEAN_MSE_normalised_100Ep","STD_MSE_normalised_100Ep"],"Result": 
    [mean_mse_n_100Ep,std_mse_n_100Ep]}
pd.DataFrame(Report)

Unnamed: 0,Metrics,Result
0,MEAN_MSE_normalised_100Ep,159.763715
1,STD_MSE_normalised_100Ep,25.198426


### Comparison of the Mean of MSE

In [37]:
print(f'Normalized Data, 50 Epochs: {mean_mse_n}')
print(f'Normalized Data, 100 Epochs: {mean_mse_n_100Ep}')

Normalized Data, 50 Epochs: 368.31677643308984
Normalized Data, 100 Epochs: 159.76371530318423


# $Part D:$ $Increase$ $the$ $number$ $of$ $hidden$ $layers$ 

### Setting up the Neural Network model with three hidden layers, Adam optimizer, and ReLu

In [38]:
def regression_model():
    # creating model
    model = Sequential()
    
    # three hidden layer
    model.add(Dense(10, activation='relu', input_shape=(n_cols2,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compiling the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### Repeating Training and Evaluation 50 Times with Normalized Data and the Three Hidden Layers

In [39]:
n_repeats = 50
mse_list_norm_3Layers = []

for i in range(n_repeats):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size= 0.3, random_state= 42)
    
    model = regression_model()
    
    # fitting the model and predicting
    model.fit(X_train, y_train, epochs=50, verbose=0)
    y_p = model.predict(X_test)
    
    mean_squared_error_n_3Layers = sklearn.metrics.mean_squared_error(y_test, y_p)
    mse_list_norm_3Layers.append(mean_squared_error_n_3Layers)

mean_mse_n_3L = np.mean(mse_list_norm_3Layers)
std_mse_n_3L= np.std(mse_list_norm_3Layers)



### Reporting the mean and standard deviation of Mean Squared Errors for 3 layers

In [40]:
Report = {"Metrics":["MEAN_MSE_normalised_3Layers","STD_MSE_normalised_3Layers"],"Result": 
    [mean_mse_n_3L,std_mse_n_3L]}
pd.DataFrame(Report)

Unnamed: 0,Metrics,Result
0,MEAN_MSE_normalised_3Layers,121.763647
1,STD_MSE_normalised_3Layers,14.609522


### Comparison of the Mean of MSE

In [42]:
print(f'Normalized Data, 1 Hidden Layer: {mean_mse_n}')
print(f'Normalized Data, 3 Hidden Layers: {mean_mse_n_3L}')

Normalized Data, 1 Hidden Layer: 368.31677643308984
Normalized Data, 3 Hidden Layers: 121.76364731259828
