# A. Base line model

In [1]:
# import the required libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

In [2]:
# Loading the concrete data
data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
# Splitting the data into features and target
X = data[data.columns[data.columns != 'Strength']] # all columns except Strength 
y = data['Strength'] # Strength column

In [4]:
# no. of features columns = input shape
n_cols = X.shape[1]

In [5]:
'''Building the model with 1 hidden layers containing 10 nodes with 
ReLU activation and compile with adam optimizer and mean squared error loss function'''
model = tf.keras.models.Sequential([
        # 10 neuron hidden layer
        tf.keras.layers.Dense(10, activation='relu',input_shape=(n_cols,)),
        # Only 1 output neuron.
        tf.keras.layers.Dense(1)
    ])

model.compile(optimizer='adam', loss='mean_squared_error')

In [6]:
# Training the model with 70% of training data for 50 epochs
mean_squared_errors = []
i=0
while i<50: 
    from sklearn.model_selection import train_test_split
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)

    hist = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    # mean_squared_error = last value in the history
    mean_squared_error = hist.history['val_loss'][-1]
    mean_squared_errors.append(mean_squared_error)
    print(f"#{i+1} mean squared error is {mean_squared_errors[i]}")
    i+=1

#0 mean squared error is 549.144287109375
#1 mean squared error is 136.66424560546875
#2 mean squared error is 99.11436462402344
#3 mean squared error is 99.34284973144531
#4 mean squared error is 113.36961364746094
#5 mean squared error is 117.33661651611328
#6 mean squared error is 100.48281860351562
#7 mean squared error is 127.06002044677734
#8 mean squared error is 129.47813415527344
#9 mean squared error is 106.9997329711914
#10 mean squared error is 111.30514526367188
#11 mean squared error is 113.25389099121094
#12 mean squared error is 112.67878723144531
#13 mean squared error is 103.37322235107422
#14 mean squared error is 108.85252380371094
#15 mean squared error is 106.38521575927734
#16 mean squared error is 120.29869842529297
#17 mean squared error is 118.88227081298828
#18 mean squared error is 104.03933715820312
#19 mean squared error is 114.67874908447266
#20 mean squared error is 113.03777313232422
#21 mean squared error is 101.07972717285156
#22 mean squared error is

In [18]:
# Mean of mean_squared_error
print("Mean of mean_squared_error",np.mean(mean_squared_errors))

# standard daviation of mean_squared_error
print("Standard Deviation of mean_squared_error",np.std(mean_squared_errors))

Mean of mean_squared_error 53.849550552368164
Standard Deviation of mean_squared_error 19.144246150522008


# B.  Normalize the data

In [21]:
# Normalize the feature data by substracting the mean and dividing by the standard deviation
X_norm = (X - X.mean()) / X.std()
X_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [22]:
# no. of normalize features columns = input shape
n_cols = X_norm.shape[1]

In [23]:
'''Building the model with 1 hidden layers containing 10 nodes with 
ReLU activation and compile with adam optimizer and mean squared error loss function'''
model = tf.keras.models.Sequential([

    # 10 neuron hidden layer
    tf.keras.layers.Dense(10, activation='relu',input_shape=(n_cols,)),
    # Only 1 output neuron.
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

In [24]:
# Training the model with 70% of training data for 50 epochs
mean_squared_errors = []
i=0
while i<50: 
    from sklearn.model_selection import train_test_split
    X_train,X_test,y_train,y_test = train_test_split(X_norm,y,test_size=0.3,random_state=1)

    hist = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    # mean_squared_error = last value in the history
    mean_squared_error = hist.history['val_loss'][-1]
    mean_squared_errors.append(mean_squared_error)
    print(f"#{i+1} mean squared error is {mean_squared_errors[i]}")
    i+=1

#1 mean squared error is 352.0780334472656
#2 mean squared error is 188.66046142578125
#3 mean squared error is 142.20489501953125
#4 mean squared error is 106.27409362792969
#5 mean squared error is 83.33047485351562
#6 mean squared error is 70.05797576904297
#7 mean squared error is 63.00450134277344
#8 mean squared error is 59.0140266418457
#9 mean squared error is 56.065608978271484
#10 mean squared error is 53.67483901977539
#11 mean squared error is 51.8801383972168
#12 mean squared error is 50.173377990722656
#13 mean squared error is 48.885459899902344
#14 mean squared error is 47.93967056274414
#15 mean squared error is 46.94619369506836
#16 mean squared error is 45.79478454589844
#17 mean squared error is 45.05446243286133
#18 mean squared error is 44.65903854370117
#19 mean squared error is 43.85382080078125
#20 mean squared error is 43.46644973754883
#21 mean squared error is 43.19561767578125
#22 mean squared error is 43.362728118896484
#23 mean squared error is 43.1567573

In [25]:
# Mean of mean_squared_error
print("Mean of mean_squared_error",np.mean(mean_squared_errors))

# standard daviation of mean_squared_error
print("Standard Deviation of mean_squared_error",np.std(mean_squared_errors))

Mean of mean_squared_error 58.8994263458252
Standard Deviation of mean_squared_error 49.29113524702969


#### The mean squared error in case of B is greater than A. 

# C. Increase the number of epochs

In [26]:
#  Training the model with 70% of training data for 100 epochs
mean_squared_errors = []
i=0
while i<50: 
    from sklearn.model_selection import train_test_split
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1)

    hist = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, verbose=0)
    # mean_squared_error = last value in the history
    mean_squared_error = hist.history['val_loss'][-1]
    mean_squared_errors.append(mean_squared_error)
    print(f"#{i+1} mean squared error is {mean_squared_errors[i]}")
    i+=1

#1 mean squared error is 548871.25
#2 mean squared error is 44199.61328125
#3 mean squared error is 4667.89501953125
#4 mean squared error is 687.1221313476562
#5 mean squared error is 236.61538696289062
#6 mean squared error is 188.21157836914062
#7 mean squared error is 198.4850616455078
#8 mean squared error is 198.6468505859375
#9 mean squared error is 183.2135467529297
#10 mean squared error is 174.65869140625
#11 mean squared error is 164.7347869873047
#12 mean squared error is 173.63796997070312
#13 mean squared error is 170.30874633789062
#14 mean squared error is 169.2948455810547
#15 mean squared error is 157.70169067382812
#16 mean squared error is 206.5330047607422
#17 mean squared error is 158.2513885498047
#18 mean squared error is 286.57403564453125
#19 mean squared error is 159.1696319580078
#20 mean squared error is 167.07969665527344
#21 mean squared error is 182.7554473876953
#22 mean squared error is 164.34437561035156
#23 mean squared error is 159.13233947753906
#2

In [28]:
# Mean of mean_squared_error
print("Mean of mean_squared_error",np.mean(mean_squared_errors))

# standard daviation of mean_squared_error
print("Standard Deviation of mean_squared_error",np.std(mean_squared_errors))

Mean of mean_squared_error 12118.142562561035
Standard Deviation of mean_squared_error 76927.7986911319


#### The mean squared error in C is very large as compared to B . This implies that more epochs will not help without the additional hidden layers.

# D. Increase the number of hidden layers

In [29]:
'''Building the model with 3 hidden layers containing 10 nodes with 
ReLU activation and compile with adam optimizer and mean squared error loss function'''
model = tf.keras.models.Sequential([

    # 10 neuron hidden layer
    tf.keras.layers.Dense(10, activation='relu',input_shape=(n_cols,)),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    # Only 1 output neuron.
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

In [30]:
# Training the model with 70% of training data for 50 epochs
mean_squared_errors = []
i=0
while i<50: 
    from sklearn.model_selection import train_test_split
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1)

    hist = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    # mean_squared_error = last value in the history
    mean_squared_error = hist.history['val_loss'][-1]
    mean_squared_errors.append(mean_squared_error)
    print(f"#{i+1} mean squared error is {mean_squared_errors[i]}")
    i+=1

#1 mean squared error is 116.83686065673828
#2 mean squared error is 87.96060943603516
#3 mean squared error is 68.04264068603516
#4 mean squared error is 62.599098205566406
#5 mean squared error is 59.56043243408203
#6 mean squared error is 55.86803436279297
#7 mean squared error is 54.970638275146484
#8 mean squared error is 52.91413116455078
#9 mean squared error is 54.48128890991211
#10 mean squared error is 52.76626205444336
#11 mean squared error is 53.61362075805664
#12 mean squared error is 53.14083480834961
#13 mean squared error is 54.251163482666016
#14 mean squared error is 52.33160400390625
#15 mean squared error is 52.99692153930664
#16 mean squared error is 54.72600555419922
#17 mean squared error is 53.22069549560547
#18 mean squared error is 52.07146072387695
#19 mean squared error is 51.5761604309082
#20 mean squared error is 52.03870391845703
#21 mean squared error is 50.868858337402344
#22 mean squared error is 51.590057373046875
#23 mean squared error is 51.4531822

In [32]:
# Mean of mean_squared_error
print("Mean of mean_squared_error",np.mean(mean_squared_errors))

# standard daviation of mean_squared_error
print("Standard Deviation of mean_squared_error",np.std(mean_squared_errors))

Mean of mean_squared_error 54.91884414672852
Standard Deviation of mean_squared_error 10.592987034100332


#### Compared to B it has less mean squared error. This proves the importance of more hidden layers as compared with other parameters.