**A) Build a baseline model**

In [0]:
#Necessary imports
import pandas as pd
import numpy as np
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [0]:
#Reading the data directly from the provided link
data = pd.read_csv('https://cocl.us/concrete_data')
print(data.head())

   Cement  Blast Furnace Slag  Fly Ash  ...  Fine Aggregate  Age  Strength
0   540.0                 0.0      0.0  ...           676.0   28     79.99
1   540.0                 0.0      0.0  ...           676.0   28     61.89
2   332.5               142.5      0.0  ...           594.0  270     40.27
3   332.5               142.5      0.0  ...           594.0  365     41.05
4   198.6               132.4      0.0  ...           825.5  360     44.30

[5 rows x 9 columns]


**Separate data**

In [0]:
X = data[data.columns[0:8]]
print(X.head())

   Cement  Blast Furnace Slag  Fly Ash  ...  Coarse Aggregate  Fine Aggregate  Age
0   540.0                 0.0      0.0  ...            1040.0           676.0   28
1   540.0                 0.0      0.0  ...            1055.0           676.0   28
2   332.5               142.5      0.0  ...             932.0           594.0  270
3   332.5               142.5      0.0  ...             932.0           594.0  365
4   198.6               132.4      0.0  ...             978.4           825.5  360

[5 rows x 8 columns]


**Separate target labels**

In [0]:
y=data['Strength']
print(y.head())

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64


In [0]:
X.shape[1]

8

**Building models**

In [0]:
#Empty list to store the mean squared errors
MSE = []

for x in range(50):#run loop 50 times
    
    #define structure
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X.shape[1],)))#Hidden layer
    model.add(Dense(1))#last layer

    #Splitting the data with the desired parameters
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    #Verbose is set to 0 to prevent excess printing
    model.fit(X_train, y_train, epochs=50, verbose=0)#Fit the model
    
    #Model evaluation
    y_pred = model.predict(X_test)
    MSE.append(mean_squared_error(y_test, y_pred))
    print('{}) Mean Squared Error: {}.'.format(x,MSE[x]))













0) Mean Squared Error: 488.4575115100256.
1) Mean Squared Error: 739.3552825463751.
2) Mean Squared Error: 480.1242127348534.
3) Mean Squared Error: 1716.0368841140498.
4) Mean Squared Error: 673.414981030216.
5) Mean Squared Error: 830.7824065218681.
6) Mean Squared Error: 248.71325030391674.
7) Mean Squared Error: 224.5813283760283.
8) Mean Squared Error: 101.68019536801593.
9) Mean Squared Error: 160.81676336283894.
10) Mean Squared Error: 439.9788830001301.
11) Mean Squared Error: 544.3535929198509.
12) Mean Squared Error: 226.38085511038216.
13) Mean Squared Error: 371.87532472987806.
14) Mean Squared Error: 523.1551716736709.
15) Mean Squared Error: 116.81561296193269.
16) Mean Squared Error: 192.49382011197557.
17) Mean Squared Error: 743.7839541716701.
18) Mean Squared Error: 279.3142167102942.
19) Mean Squared Error: 271.241784161604.
20) Mean Squared Error: 247.35389832461522.
21) Mean Squared Error: 484.1682628757761.
22) Mean Squared Error: 144.80708265454962.
2

In [0]:
print(MSE)

[488.4575115100256, 739.3552825463751, 480.1242127348534, 1716.0368841140498, 673.414981030216, 830.7824065218681, 248.71325030391674, 224.5813283760283, 101.68019536801593, 160.81676336283894, 439.9788830001301, 544.3535929198509, 226.38085511038216, 371.87532472987806, 523.1551716736709, 116.81561296193269, 192.49382011197557, 743.7839541716701, 279.3142167102942, 271.241784161604, 247.35389832461522, 484.1682628757761, 144.80708265454962, 480.84018958796855, 130.624679206568, 185.8239798221698, 596.0226637595281, 434.7688574066644, 121.90408930923483, 190.79681562324762, 451.4274968709368, 107.56795907297024, 143.11349023868206, 116.33112934178024, 386.2915984050053, 718.0948851532845, 365.1280412467294, 109.3211527556322, 139.37587174275023, 110.98622551323675, 110.6082721773025, 105.36545397886402, 122.45453961320852, 215.71845402341268, 114.9154746070428, 1033.880316333562, 563.9002063181326, 105.65317414362073, 253.1642721398341, 2874.29874946436]


**Mean and standard deviation of MSE**

In [0]:
print('Mean of MSE: {}'.format(np.mean(MSE)))
print('Standard deviation of MSE: {}'.format(np.std(MSE)))

Mean of MSE: 410.7612662626049
Standard deviation of MSE: 461.15287074915716


**B) Normalize the data**

In [0]:
X_Norm=(X-X.mean())/X.std()
print(X_Norm.head())

     Cement  Blast Furnace Slag  ...  Fine Aggregate       Age
0  2.476712           -0.856472  ...       -1.217079 -0.279597
1  2.476712           -0.856472  ...       -1.217079 -0.279597
2  0.491187            0.795140  ...       -2.239829  3.551340
3  0.491187            0.795140  ...       -2.239829  5.055221
4 -0.790075            0.678079  ...        0.647569  4.976069

[5 rows x 8 columns]


In [0]:
#Empty list to store the mean squared errors
MSE = []

for x in range(50):#run loop 50 times
    
    #define structure
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X.shape[1],)))#Hidden layer
    model.add(Dense(1))#last layer

    #Splitting the data with the desired parameters
    X_train, X_test, y_train, y_test = train_test_split(X_Norm, y, test_size=0.3, random_state=42)

    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    #Verbose is set to 0 to prevent excess printing
    model.fit(X_train, y_train, epochs=50, verbose=0)#Fit the model
    
    #Model evaluation
    y_pred = model.predict(X_test)
    MSE.append(mean_squared_error(y_test, y_pred))
    print('{}) Mean Squared Error: {}.'.format(x,MSE[x]))













0) Mean Squared Error: 300.8467514704805.
1) Mean Squared Error: 290.081480438938.
2) Mean Squared Error: 256.37060490191317.
3) Mean Squared Error: 423.3747494680695.
4) Mean Squared Error: 432.97336227873933.
5) Mean Squared Error: 341.4521567349626.
6) Mean Squared Error: 301.8119457941616.
7) Mean Squared Error: 265.316572186965.
8) Mean Squared Error: 302.3289378005663.
9) Mean Squared Error: 365.42452484148845.
10) Mean Squared Error: 535.3777459281438.
11) Mean Squared Error: 323.38475904886474.
12) Mean Squared Error: 210.72450058581387.
13) Mean Squared Error: 565.9626206063575.
14) Mean Squared Error: 353.4555031255745.
15) Mean Squared Error: 263.0897190756655.
16) Mean Squared Error: 368.14554226480635.
17) Mean Squared Error: 249.49393882901092.
18) Mean Squared Error: 254.64910685742842.
19) Mean Squared Error: 364.4933690810529.
20) Mean Squared Error: 340.22674178908346.
21) Mean Squared Error: 570.1221279730943.
22) Mean Squared Error: 364.97315356741643.
2

In [0]:
print('Mean of MSE: {}'.format(np.mean(MSE)))
print('Standard deviation of MSE: {}'.format(np.std(MSE)))

Mean of MSE: 340.7533048873136
Standard deviation of MSE: 86.11274215455198


Raw data part A:
Mean of MSE: 410.7612662626049
Standard deviation of MSE: 461.15287074915716

---
Normalized data part B:
Mean of MSE: 340.7533048873136
Standard deviation of MSE: 86.11274215455198

Normalizing the data had a major impact on MSE as can be seen from the mean and standard deviation of part B

**C) Increase the number of epochs**

In [0]:
#Empty list to store the mean squared errors
MSE = []

for x in range(50):#run loop 50 times
    
    #define structure
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X.shape[1],)))#Hidden layer
    model.add(Dense(1))#last layer

    #Splitting the data with the desired parameters
    X_train, X_test, y_train, y_test = train_test_split(X_Norm, y, test_size=0.3, random_state=42)

    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    #Verbose is set to 0 to prevent excess printing
    model.fit(X_train, y_train, epochs=100, verbose=0)#Fit the model
    
    #Model evaluation
    y_pred = model.predict(X_test)
    MSE.append(mean_squared_error(y_test, y_pred))
    print('{}) Mean Squared Error: {}.'.format(x,MSE[x]))

0) Mean Squared Error: 151.76837172210043.
1) Mean Squared Error: 151.28236460628955.
2) Mean Squared Error: 152.07590344350808.
3) Mean Squared Error: 137.3851226874244.
4) Mean Squared Error: 150.17505842954802.
5) Mean Squared Error: 158.1134961766837.
6) Mean Squared Error: 176.53653018356783.
7) Mean Squared Error: 141.24305885685996.
8) Mean Squared Error: 184.90496659202375.
9) Mean Squared Error: 156.08951937963235.
10) Mean Squared Error: 168.6540191259447.
11) Mean Squared Error: 143.68485203931382.
12) Mean Squared Error: 159.2122552209865.
13) Mean Squared Error: 152.93942252133024.
14) Mean Squared Error: 159.86736117470366.
15) Mean Squared Error: 155.4491736139565.
16) Mean Squared Error: 150.6868911759727.
17) Mean Squared Error: 140.3681875001285.
18) Mean Squared Error: 150.7176609720258.
19) Mean Squared Error: 156.19185388216758.
20) Mean Squared Error: 171.99836068873358.
21) Mean Squared Error: 168.52107771544246.
22) Mean Squared Error: 141.9256028398914.
23) Mea

In [0]:
print('Mean of MSE: {}'.format(np.mean(MSE)))
print('Standard deviation of MSE: {}'.format(np.std(MSE)))

Mean of MSE: 156.8822236736143
Standard deviation of MSE: 14.876999573250623


**Normalized data on 50 epochs**
Mean of MSE: 340.7533048873136
Standard deviation of MSE: 86.11274215455198

---
**Normalized data on 100 epochs**
Mean of MSE: 156.8822236736143
Standard deviation of MSE: 14.876999573250623

Increasing the number of epochs in part C on normalized data reduced the MSE alot. It means the model made less errors in prediction

**D) Increase the number of hidden layers**

In [0]:
#Empty list to store the mean squared errors
MSE = []

for x in range(50):#run loop 50 times
    
    #define structure
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X.shape[1],)))#Hidden layer
    model.add(Dense(10, activation='relu'))#Hidden layer
    model.add(Dense(10, activation='relu'))#Hidden layer
    model.add(Dense(1))#last layer

    #Splitting the data with the desired parameters
    X_train, X_test, y_train, y_test = train_test_split(X_Norm, y, test_size=0.3, random_state=42)

    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    #Verbose is set to 0 to prevent excess printing
    model.fit(X_train, y_train, epochs=100, verbose=0)#Fit the model
    
    #Model evaluation
    y_pred = model.predict(X_test)
    MSE.append(mean_squared_error(y_test, y_pred))
    print('{}) Mean Squared Error: {}.'.format(x,MSE[x]))

0) Mean Squared Error: 54.84852170977952.
1) Mean Squared Error: 56.69453649388759.
2) Mean Squared Error: 84.3842197905414.
3) Mean Squared Error: 67.95222161025592.
4) Mean Squared Error: 106.64498971562149.
5) Mean Squared Error: 100.97250208351407.
6) Mean Squared Error: 53.316228809131886.
7) Mean Squared Error: 115.1475458648279.
8) Mean Squared Error: 65.60977507456857.
9) Mean Squared Error: 78.04866256288742.
10) Mean Squared Error: 77.33922547397755.
11) Mean Squared Error: 73.27694332998743.
12) Mean Squared Error: 76.54031714546154.
13) Mean Squared Error: 116.80736798026318.
14) Mean Squared Error: 57.138712547466845.
15) Mean Squared Error: 114.28027674829893.
16) Mean Squared Error: 88.02151751634256.
17) Mean Squared Error: 93.4887710740757.
18) Mean Squared Error: 81.44674365047315.
19) Mean Squared Error: 118.69207504446271.
20) Mean Squared Error: 115.0033813218328.
21) Mean Squared Error: 72.83664690928525.
22) Mean Squared Error: 74.56057883507486.
23) Mean Squared

In [0]:
print('Mean of MSE: {}'.format(np.mean(MSE)))
print('Standard deviation of MSE: {}'.format(np.std(MSE)))

Mean of MSE: 86.41497713579125
Standard deviation of MSE: 22.2545782834827


**Pard D) Normalized data on 100 epochs on a 3 layer densed network**

Mean of MSE: 86.41497713579125

Standard deviation of MSE: 22.2545782834827

---

**Part B)**

Mean of MSE: 340.7533048873136

Standard deviation of MSE: 86.11274215455198

Increasing the number of layers in the network in part D on normalized data reduced the MSE. It means the model's prediction quality has improved. The standard deviation has slightly increased.