In [1]:
'''
PART A

'''

'\nPART A\n\n'

In [2]:
import keras
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from random import seed 
from random import randint


Using TensorFlow backend.


In [3]:
#Lets import our dataset
file = r'https://cocl.us/concrete_data'
df = pd.read_csv(file)
concrete_data = df
concrete_data.head()
concrete_data.shape


(1030, 9)

In [4]:
#Lets check the dataset for any missing values
concrete_data.describe() #Generating summary metrics
concrete_data.isnull().sum() #Checking for missing values

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [5]:
#We set the 'concrete sample strength' as the y (Target) and X as all the columns that are not 'Strength'
concrete_data_columns = concrete_data.columns
X = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
X.head()
y = concrete_data['Strength']
y.head()


0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [6]:
#Saving the number of predictor columns (which is 8)
n_cols = X.shape[1] # number of predictor columns
n_cols

8

In [7]:
#Defining the Baseline Regression Neural Network
def Baseline_Regression_Network():
    model = Sequential()

    #We use the add method to add each Dense layer. We will add 10 neurons in the first layer. 
    #We specify the input shape parameter
    #We will use the ReLU activation function
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))

    #Output layer has 1 neuron
    model.add(Dense(1))

    #Next we need to specifiy an optimizer. We will use Adam.
    #For measuring the error, we will use Mean Squared Error
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [8]:
#Creating an empty list to fill with Mean Squared Errors
MeanSquaredErrorsList = [] 
for i in range (1, 51): #Creating a loop that will execute 50 times with a random data split each time
     
     #Split random state
     random_state = i
     print("Using random_state split: ", random_state)
     
     #Splitting the dataset using 'i'
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_state)   
     
     #Creating the model
     model = Baseline_Regression_Network()
    
     #Lets train the model using X_train and y_train for 50 epochs.
     model.fit(X_train, y_train, epochs=50, verbose=0)
     
     #Model Evaluation
     Evaluation = model.evaluate(X_test, y_test, verbose=0)
     print("Mean Squared Error "+str(i)+" = "+str(Evaluation))
     
     #Predicting Strength for Test set
     y_pred = model.predict(X_test)
     MSE = mean_squared_error(y_test, y_pred)
     MeanSquaredErrorsList.append(MSE)

Using random_state split:  1
Mean Squared Error 1 = 731.5953487655491
Using random_state split:  2
Mean Squared Error 2 = 225.34628039116228
Using random_state split:  3
Mean Squared Error 3 = 1065.697603787419
Using random_state split:  4
Mean Squared Error 4 = 574.7925774534158
Using random_state split:  5
Mean Squared Error 5 = 156.0038158330331
Using random_state split:  6
Mean Squared Error 6 = 180.1203368350526
Using random_state split:  7
Mean Squared Error 7 = 769.7018263733503
Using random_state split:  8
Mean Squared Error 8 = 936.1649296337732
Using random_state split:  9
Mean Squared Error 9 = 280.0433878975779
Using random_state split:  10
Mean Squared Error 10 = 185.85428330504777
Using random_state split:  11
Mean Squared Error 11 = 185.1558908258827
Using random_state split:  12
Mean Squared Error 12 = 717.1332909050111
Using random_state split:  13
Mean Squared Error 13 = 181.32017126978408
Using random_state split:  14
Mean Squared Error 14 = 270.6450071859514
Using r

In [9]:
#Creating a numpy array using MeanSquaredErrorList
MeanSquaredErrorsList = np.array(MeanSquaredErrorsList)     
MeanSquaredErrorsList

array([ 731.59535373,  225.34627618, 1065.69760619,  574.79257658,
        156.00381534,  180.12033641,  769.70182163,  936.16491502,
        280.04338507,  185.85428276,  185.15588685,  717.13329422,
        181.32017925,  270.64499809,  105.35797928,  102.80556062,
        391.67632969, 1434.34859819,  217.15485905,  103.88814545,
        189.04855339,  225.35316062,  110.39507078,  242.26046534,
       1455.53451673,  343.19761385,  112.90645588,  226.13357277,
        179.59903553,  137.43296846,  262.72007791,  405.16989385,
        411.56700443,  191.46929064,  105.4351342 ,  230.32545349,
        123.30339766,  110.12661998, 1093.54121818, 1525.72666406,
        484.62392163,  255.60583014,  137.91081342,  117.0579381 ,
        204.66210384,  192.06412265, 2936.02134299,  176.14422066,
        102.34210605,  674.93543638])

In [10]:
#Calculating the mean of the Mean Squared Errors and the Standard Deviation
Mean = np.mean(MeanSquaredErrorsList)   
Standard_deviation = np.std(MeanSquaredErrorsList)
print("The Mean of the Mean Squared Errors = ", Mean)
print("The Standard Deviation of the Mean Squared Errors = ", Standard_deviation)

The Mean of the Mean Squared Errors =  435.5484040641082
The Standard Deviation of the Mean Squared Errors =  514.6417299310801


In [11]:
#Done

'''

PART B - (Normalized Predictors)

'''

In [11]:
X = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]

In [12]:
#Normalization is required for Part B of the project
X_normalized = (X - X.mean()) / X.std()
X_normalized.head()


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [13]:
#Creating an empty list to fill with Mean Squared Errors
MeanSquaredErrorsList = [] 
for i in range (1, 51): #Creating a loop that will execute 50 times with a random data split each time
     
     #Split random state
     random_state = i
     print("Using random_state split: ", random_state)
     
     #Splitting the dataset using 'i' (We use the X_normalized set)
     X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=random_state)   
     
     #Creating the model
     model = Baseline_Regression_Network()
    
     #Lets train the model using X_train and y_train for 50 epochs.
     model.fit(X_train, y_train, epochs=50, verbose=0)
     
     #Model Evaluation
     Evaluation = model.evaluate(X_test, y_test, verbose=0)
     print("Mean Squared Error "+str(i)+" = "+str(Evaluation))
     
     #Predicting Strength for Test set
     y_pred = model.predict(X_test)
     MSE = mean_squared_error(y_test, y_pred)
     MeanSquaredErrorsList.append(MSE)

Using random_state split:  1
Mean Squared Error 1 = 325.8589441984603
Using random_state split:  2
Mean Squared Error 2 = 768.5732552241353
Using random_state split:  3
Mean Squared Error 3 = 275.3453907395644
Using random_state split:  4
Mean Squared Error 4 = 324.54524309966945
Using random_state split:  5
Mean Squared Error 5 = 353.9187705030719
Using random_state split:  6
Mean Squared Error 6 = 388.2310172763071
Using random_state split:  7
Mean Squared Error 7 = 426.712941191343
Using random_state split:  8
Mean Squared Error 8 = 317.6143131194377
Using random_state split:  9
Mean Squared Error 9 = 255.19959962715222
Using random_state split:  10
Mean Squared Error 10 = 446.5638576865582
Using random_state split:  11
Mean Squared Error 11 = 291.7228201955653
Using random_state split:  12
Mean Squared Error 12 = 245.45743326230343
Using random_state split:  13
Mean Squared Error 13 = 649.5850474533526
Using random_state split:  14
Mean Squared Error 14 = 445.0232662460179
Using ra

In [14]:
#Creating a numpy array using MeanSquaredErrorList
MeanSquaredErrorsList = np.array(MeanSquaredErrorsList)     
MeanSquaredErrorsList

array([325.85894051, 768.57324644, 275.34538674, 324.54524618,
       353.91876179, 388.23100417, 426.71294458, 317.61431272,
       255.19959962, 446.56383499, 291.72281289, 245.45743279,
       649.58504967, 445.02324066, 579.63166779, 455.52464638,
       374.2102799 , 313.40431826, 394.72261594, 287.34946327,
       397.65566165, 455.81945217, 272.70702437, 352.09581358,
       372.06052857, 412.55740839, 355.8189588 , 477.86221794,
       290.52644328, 463.00395731, 271.22404949, 318.16959249,
       486.2072853 , 461.9350864 , 324.92686416, 583.96753372,
       343.88038133, 433.36169257, 305.80206162, 285.13463196,
       282.59888715, 452.16326095, 428.35563366, 325.59459946,
       324.48910867, 321.7999698 , 467.60207736, 383.47261555,
       263.41490423, 355.91192148])

In [16]:
#Calculating the mean of the Mean Squared Errors and the Standard Deviation
Mean = np.mean(MeanSquaredErrorsList)   
Standard_deviation = np.std(MeanSquaredErrorsList)
print("The Mean of the Mean Squared Errors = ", Mean)
print("The Standard Deviation of the Mean Squared Errors = ", Standard_deviation)

The Mean of the Mean Squared Errors =  383.7862885744437
The Standard Deviation of the Mean Squared Errors =  104.44865253165882


'''
Question: How does the mean of the mean squared errors compare to that from Step A?

Answer: There is a slight decrease in the Mean of the Mean Squared Error due to Normalization of the predictors in step B. In addition, it has reduced the variance for the error and the standard deviation significantly. 

'''


In [17]:
#Creating an empty list to fill with Mean Squared Errors
MeanSquaredErrorsList = [] 
for i in range (1, 51): #Creating a loop that will execute 50 times with a random data split each time
     
     #Split random state
     random_state = i
     print("Using random_state split: ", random_state)
     
     #Splitting the dataset using 'i' (We use the X_normalized set)
     X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=random_state)   
     
     #Creating the model
     model = Baseline_Regression_Network()
    
     #Lets train the model using X_train and y_train for 50 epochs.
     model.fit(X_train, y_train, epochs=100, verbose=0)
     
     #Model Evaluation
     Evaluation = model.evaluate(X_test, y_test, verbose=0)
     print("Mean Squared Error "+str(i)+" = "+str(Evaluation))
     
     #Predicting Strength for Test set
     y_pred = model.predict(X_test)
     MSE = mean_squared_error(y_test, y_pred)
     MeanSquaredErrorsList.append(MSE)

Using random_state split:  1
Mean Squared Error 1 = 206.17186591386024
Using random_state split:  2
Mean Squared Error 2 = 143.38587526981885
Using random_state split:  3
Mean Squared Error 3 = 164.0963389078776
Using random_state split:  4
Mean Squared Error 4 = 170.71517692491847
Using random_state split:  5
Mean Squared Error 5 = 147.5462155635303
Using random_state split:  6
Mean Squared Error 6 = 195.01918498906502
Using random_state split:  7
Mean Squared Error 7 = 140.47995721020746
Using random_state split:  8
Mean Squared Error 8 = 161.07381777470167
Using random_state split:  9
Mean Squared Error 9 = 172.8129839357049
Using random_state split:  10
Mean Squared Error 10 = 148.5251587802924
Using random_state split:  11
Mean Squared Error 11 = 142.9070613577142
Using random_state split:  12
Mean Squared Error 12 = 160.9277789662185
Using random_state split:  13
Mean Squared Error 13 = 204.22758794988243
Using random_state split:  14
Mean Squared Error 14 = 172.47166581138438
Us

In [18]:
#Creating a numpy array using MeanSquaredErrorList
MeanSquaredErrorsList = np.array(MeanSquaredErrorsList)     
MeanSquaredErrorsList

array([206.17186364, 143.3858709 , 164.09633768, 170.71518164,
       147.54621222, 195.01918102, 140.4799576 , 161.07381799,
       172.81298815, 148.52515458, 142.90706025, 160.92778036,
       204.22759003, 172.47166208, 147.32476651, 156.41143132,
       145.0340887 , 140.69039619, 180.52076784, 156.50028726,
       138.06293244, 138.03386665, 140.6524631 , 137.32532373,
       150.20972204, 166.06802385, 159.26946126, 166.00794221,
       192.48113972, 215.01552703, 147.65868598, 149.39098696,
       155.8938587 , 171.14130454, 163.9382898 , 157.96382561,
       158.32394133, 157.19667888, 162.43314317, 161.32011253,
       163.560983  , 153.95348836, 151.41462939, 176.47093187,
       154.8212581 , 174.71388105, 162.33365722, 149.53267887,
       161.70851999, 161.60091186])

In [19]:
#Calculating the mean of the Mean Squared Errors and the Standard Deviation
Mean = np.mean(MeanSquaredErrorsList)   
Standard_deviation = np.std(MeanSquaredErrorsList)
print("The Mean of the Mean Squared Errors = ", Mean)
print("The Standard Deviation of the Mean Squared Errors = ", Standard_deviation)

The Mean of the Mean Squared Errors =  161.10681130375417
The Standard Deviation of the Mean Squared Errors =  17.579076396093768


'''

Question: How does the mean of the mean squared errors compare to that from Step B?
Answer: Increasing the number of epochs from 50 to 100 significantly decreased then mean of the mean squared error
and stabalized the variance and standard deviation from one random_split state to another. It also increased the computation time.


'''

'''

PART D - (Normalized Predictors, 50 Epochs and Three Hidden Layers)


'''

In [20]:
def Deeper_Regression_Network():
    model = Sequential()

    #We use the add method to add each Dense layer. We will add 10 neurons in the first layer. We specify the input shape parameter
    #We will use the ReLU activation function
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    
    #Creating a second layer with 10 neurons
    model.add(Dense(10, activation='relu'))
    
    #Creating a third layer with 10 neurons.
    model.add(Dense(10, activation='relu'))

    #Output layer has 1 neuron
    model.add(Dense(1))

    #Next we need to specifiy an optimizer. We will use Adam.
    #For measuring the error, we will use Mean Squared Error
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [21]:
MeanSquaredErrorsList = [] 
for i in range (1, 51): #Creating a loop that will execute 50 times with a random data split each time
     
     #Split random state
     random_state = i
     print("Using random_state split: ", random_state)
     
     #Splitting the dataset using 'i' (We use the X_normalized set)
     X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=random_state)    
     
     #Creating the model
     model = Deeper_Regression_Network()
    
     #Lets train the model using X_train and y_train for 50 epochs.
     model.fit(X_train, y_train, epochs=50, verbose=0)
     
     #Model Evaluation
     Evaluation = model.evaluate(X_test, y_test, verbose=0)
     print("Mean Squared Error "+str(i)+" = "+str(Evaluation))
     
     #Predicting Strength for Test set
     y_pred = model.predict(X_test)
     MSE = mean_squared_error(y_test, y_pred)
     MeanSquaredErrorsList.append(MSE)

Using random_state split:  1
Mean Squared Error 1 = 160.13178107499306
Using random_state split:  2
Mean Squared Error 2 = 111.23005565161844
Using random_state split:  3
Mean Squared Error 3 = 145.955608701243
Using random_state split:  4
Mean Squared Error 4 = 138.87388847869576
Using random_state split:  5
Mean Squared Error 5 = 141.9161035729072
Using random_state split:  6
Mean Squared Error 6 = 141.3855426380935
Using random_state split:  7
Mean Squared Error 7 = 129.5494770926565
Using random_state split:  8
Mean Squared Error 8 = 131.01288212155833
Using random_state split:  9
Mean Squared Error 9 = 141.39000003160396
Using random_state split:  10
Mean Squared Error 10 = 114.8949571874921
Using random_state split:  11
Mean Squared Error 11 = 117.37938109956512
Using random_state split:  12
Mean Squared Error 12 = 105.90313615768088
Using random_state split:  13
Mean Squared Error 13 = 136.06363576747066
Using random_state split:  14
Mean Squared Error 14 = 139.158318973282
Usin

In [22]:
#Creating a numpy array using MeanSquaredErrorList
MeanSquaredErrorsList = np.array(MeanSquaredErrorsList)     
MeanSquaredErrorsList

array([160.13178473, 111.23005123, 145.95560157, 138.87388754,
       141.91609997, 141.38553875, 129.54947739, 131.01287999,
       141.39000116, 114.89495463, 117.37938005, 105.90313721,
       136.06363274, 139.15831757, 113.09361599, 128.4500822 ,
       126.68184533, 119.38351787, 127.78285065, 139.22718087,
        97.26719899, 123.62547058, 130.36649504, 134.87078357,
       131.83637437, 151.86485238, 139.94772862, 137.38279411,
       137.38086138, 145.79711755, 137.92820714, 132.75597552,
       125.56542989, 135.90778288, 144.65855672, 138.80288854,
       102.15152689, 121.07290587, 142.26370818, 129.59465514,
       138.38679743, 117.75338295, 141.60002403, 144.51784834,
       141.69450187, 108.59745976,  97.12515603, 132.63438392,
       137.39786429, 129.49472818])

In [23]:
#Calculating the mean of the Mean Squared Errors and the Standard Deviation
Mean = np.mean(MeanSquaredErrorsList)   
Standard_deviation = np.std(MeanSquaredErrorsList)
print("The Mean of the Mean Squared Errors = ", Mean)
print("The Standard Deviation of the Mean Squared Errors = ", Standard_deviation)

The Mean of the Mean Squared Errors =  130.79414595274227
The Standard Deviation of the Mean Squared Errors =  13.679588658175687


'''

Question: How does the mean of the mean squared errors compare to that from Step B?
Answer: Increasing the number of hidden layers from 1 to 3 significantly decreased then mean of the mean squared error
and stabalized the variance and standard deviation from one random_split state to another. It also increased the computation time.


'''