In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


import pandas as pd
import numpy as np

# Part A (Build a Baseline Model)


In [None]:
# load the data
df = pd.read_csv('concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


## 1. Randomnly Split the data

In [None]:
# seperate the dependant and independant vars
X = df.drop('Strength', axis=1)
y = df['Strength']


In [None]:
def split_data(X, y, test_size=.3, random_state=40):
  """ split the data for training and testing """
  x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
  return x_train, x_test, y_train, y_test

In [None]:
x_train, x_test, y_train, y_test = split_data(X, y, .3, 101)

#### define create_model() method

In [None]:
# create model
def create_model():
  """ create the model """
  model = Sequential([
      # one hidden layer
      Dense(10, activation='relu'),
      # output layer
      Dense(1, activation='relu')
  ])
  model.compile(loss='mean_squared_error', optimizer='adam')
  return model

In [None]:
model = create_model()

## 2. Train the model

In [None]:
def train_nn(model, x_train, y_train, epochs=50):
  """ train NN """
  model.fit(x_train, y_train, epochs=epochs)


In [None]:
train_nn(model, x_train, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## 3. Evaluate the model

In [None]:
def evaluate_nn(model, x_test, y_test):
  # get predictions from model
  y_pred = model.predict(x_test)

  # evaluate using MSE
  mse = mean_squared_error(y_test, y_pred)
  return mse



In [None]:
print('Mean Squared Error on test data:\t' + str(evaluate_nn(model, x_test, y_test)))

Mean Squared Error on test data:	181.34456777582997


## 4. Repeat step 1-3, 50 times

In [None]:
def repeat_train_model(model, n, X, y, test_set, random_state, epochs=50):
  """ train NN, n times and generate list of n MSEs """
  mse_list = []
  for i in range(n):
    print('Round: '+ str(i+1) + '\n')
    # step 1
    x_train, x_test, y_train, y_test = split_data(X, y, test_set, random_state)
    # step 2
    train_nn(model, x_train, y_train, epochs)
    # step 3
    mse_list.append(evaluate_nn(model, x_test, y_test))
    print("MSE = " + str(mse_list[i]))

  return mse_list

In [None]:
mse_list_A = repeat_train_model(model, 50, X, y, .3, 101, 50)
mse_list_A

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 48/50
Epoch 49/50
Epoch 50/50
MSE = 116.72316596515105
Round: 3

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
MSE = 116.7987662658035
Round: 4

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21

[118.6562073339596,
 116.72316596515105,
 116.7987662658035,
 117.7465187771495,
 117.65146183647575,
 117.12738401337555,
 117.4497538997955,
 125.23174048029796,
 116.97262872392324,
 117.16816062408247,
 118.23008654976786,
 118.50918459203845,
 117.10438753510135,
 117.46920325986142,
 118.73721052549894,
 121.18571978734786,
 116.89172890774657,
 121.70127182750952,
 118.92100180113192,
 117.0492763215359,
 121.23851887980666,
 116.76968013655673,
 119.37579296848051,
 119.75866406506313,
 118.46018165792314,
 117.00806121992531,
 118.38376405300043,
 118.64721852591826,
 117.733625180011,
 121.72691200030772,
 119.15040220599532,
 117.11322258247323,
 117.66099241483116,
 117.53909103284222,
 117.6127721371898,
 118.58823396858433,
 118.13352719686408,
 117.97991116135668,
 117.97554601144094,
 117.61616489465287,
 117.64905489066132,
 119.17684565104629,
 118.7469054000891,
 121.14503599748777,
 117.54944136648834,
 118.46283304123389,
 116.86354612656295,
 117.42023593143756,
 

## 5. Report mean and standard deviation

In [None]:
# calculate
mean_A = np.mean(mse_list_A)
std_A = np.std(mse_list_A)

# report
print('Mean:\t', str(mean_A))
print('Standard Deviation:\t', str(std_A))

Mean:	 118.35641453868294
Standard Deviation:	 1.6218997037888356


# Part B (Normalize the Data)

In [None]:
# remove y (target) to normalize X (predictors) only
temp = df.drop('Strength', axis=1)

# preserve column names
columns = temp.columns

# convert to numpy
data_np = temp.to_numpy()

# normalize
mean = data_np.mean()
std = data_np.std()
norm_data = (data_np - mean) / std


# convert to dataframe
df_norm = pd.DataFrame(norm_data, columns=columns)
df_norm

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,0.684912,-0.847509,-0.847509,-0.387782,-0.840414,2.103820,1.070855,-0.768050
1,0.684912,-0.847509,-0.847509,-0.387782,-0.840414,2.146388,1.070855,-0.768050
2,0.096065,-0.443120,-0.847509,-0.200487,-0.847509,1.797336,0.838154,-0.081298
3,0.096065,-0.443120,-0.847509,-0.200487,-0.847509,1.797336,0.838154,0.188294
4,-0.283918,-0.471782,-0.847509,-0.302648,-0.847509,1.929011,1.495109,0.174105
...,...,...,...,...,...,...,...,...
1025,-0.063136,-0.518322,-0.591254,-0.337837,-0.822252,1.621675,1.332786,-0.768050
1026,0.066836,-0.847509,-0.519457,-0.291297,-0.817995,1.473541,1.460771,-0.768050
1027,-0.426093,-0.451917,-0.539322,-0.300662,-0.830198,1.684959,1.365988,-0.768050
1028,-0.396012,-0.317688,-0.847509,-0.349188,-0.815441,1.960794,1.391245,-0.768050


### Repeat part A

In [None]:
# normalized predictors
X = df_norm
# un-normalized target
y = df['Strength']


mse_list_B = repeat_train_model(model, 50, X, y, .3, 101, epochs=50)
mse_list_B

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 48/50
Epoch 49/50
Epoch 50/50
MSE = 312.430324015006
Round: 3

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
MSE = 299.448055361917
Round: 4

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50

[328.59951564830817,
 312.430324015006,
 299.448055361917,
 286.38539652604476,
 273.47189088204067,
 260.88781894514955,
 248.49077742443126,
 234.58444916410556,
 218.21422584662832,
 200.3458218302598,
 181.61805810176807,
 164.6313524372132,
 150.11425956294755,
 138.8650592704279,
 130.9869962019642,
 125.81362301572238,
 122.25861945886511,
 119.83612728868015,
 116.29165689355915,
 113.20953552224313,
 111.46831941847094,
 109.4071501752738,
 107.46398532534432,
 105.90186335422854,
 103.74234873365273,
 102.16038040229122,
 99.79687287429601,
 98.17526837262236,
 95.81873237222295,
 93.94199154128994,
 91.91985661264627,
 90.33210545918305,
 88.69869140905591,
 86.67361752095486,
 84.4887473093541,
 82.7328338362351,
 81.66728319282666,
 80.0657221267234,
 78.15195419316082,
 76.70562411243243,
 74.94279087596124,
 73.38394343651102,
 72.11430697507686,
 70.56381067188552,
 69.12703452614663,
 67.97917849216078,
 66.93680309530515,
 65.59198383117567,
 64.66492489920326,
 63.65

In [None]:
# calculate
mean_B = np.mean(mse_list_B)
std_B = np.std(mse_list_B)

# report
print('Mean:\t', str(mean_B))
print('Standard Deviation:\t', str(std_B))

Mean:	 131.69509762008173
Standard Deviation:	 73.84075745380129


### How does the mean of the MSE compare from the mean of the part A?

In [None]:
print('The mean of the MSE has reduced by: ' + str(mean_A-mean_B))

The mean of the MSE has reduced by: -13.338683081398784


# Part C (increase the number of epochs)

### Repeat part B and increase epochs to 100

In [None]:
# increase epochs to 100
mse_list_C = repeat_train_model(model, 50, X, y, .3, 101, epochs=100)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
MSE = 53.416889835174494
Round: 27

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/1

In [None]:
# calculate
mean_C = np.mean(mse_list_C)
std_C = np.std(mse_list_C)

# report
print('Mean of MSEs:\t', str(mean_C))
print('Standard Deviation of MSEs:\t', str(std_C))
print('The mean of the MSE from Part B to Part C has reduced by: ' + str(mean_B-mean_C))

Mean of MSEs:	 53.89121364090922
Standard Deviation of MSEs:	 1.937840749237217
The mean of the MSE from Part B to Part C has reduced by: 77.8038839791725


# Part D (increase the number of hidden layers)

- since 3 hidden layers are need, we need to change the architecture of the Neural Network
- we just need to redefine one method "create_model()" and the rest will remain the same and will work fine

In [None]:
def create_model():
  # create the model
  model = Sequential([
      # one hidden layer
      Dense(10, activation='relu'),
      # second hidden layer
      Dense(10, activation='relu'),
      # third hidden layer
      Dense(10, activation='relu'),
      # output layer
      Dense(1, activation='relu')
  ])
  model.compile(loss='mean_squared_error', optimizer='adam')
  return model

## Repeat part B

In [22]:
# recreate model with 3 hidden layers
model = create_model()

# train model 50 times
mse_list_D = repeat_train_model(model, 50, X, y, .3, 101, epochs=100)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
MSE = 40.727964364517334
Round: 27

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/1

In [23]:
# calculate mean and std
mean_D = np.mean(mse_list_D)
std_D = np.std(mse_list_D)

# report
print('Mean of MSEs:\t', str(mean_D))
print('Standard Deviation of MSEs:\t', str(std_D))
mean_red = mean_C-mean_D
print('The mean of the MSE from Part B to Part C has reduced by: ' + str(mean_red))

Mean of MSEs:	 49.69995960362882
Standard Deviation of MSEs:	 27.93806854503251
The mean of the MSE from Part B to Part C has reduced by: 4.191254037280402
