### Set up the libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error

Using TensorFlow backend.


### Read data rom url

In [2]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


### Analyse the data

In [3]:
concrete_data.shape

(1030, 9)

In [4]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [5]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

### Split the input and output columns

In [6]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [7]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [8]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

### Normalize the input columns

In [9]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [10]:
n_cols = predictors_norm.shape[1] # number of predictors
n_cols

8

### Define the models and functions used

#### Define the base model with one hidden layer
Model contaons one hidden layer of 10 nodes, Relu activation function, adam optimiser, mean squared error loss function

In [11]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

#### Define the model with three hidden layers
Model contaons three hidden layers of 10 nodes, Relu activation function, adam optimiser, mean squared error loss function

In [12]:
# define regression model with three hidden layers
def regression_model_3():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

#### Define the function to display report

In [36]:
def print_scores(s,scorelist):
    
    print('Mean squared errors')
    print(scorelist)
    
    #Get the mean and standard deviation of scores
    
    mean=np.mean(scorelist)
    sd=np.std(scorelist)
    
    print('\nScores for ',s)
    print('Mean =',mean)
    print('standard deviation=', sd)
    return mean,sd

#### Number of loops to excute and evaluate the model

In [14]:
#define the number of loops for evaluating
iterations=50

#### Build and run the model 50 times and get the scores

In [15]:
def runmodel(predictors,epoch,hiddenlayers):
    print('Started running the model....')
    scores=[]
    for i in range(iterations):
        # Get a new set of test/train values with 30% split for test data
        (x_train,x_test, y_train,y_test) = train_test_split(predictors,target,test_size=0.30,random_state=i)    

        # build the model
        if(hiddenlayers==False):
            model = regression_model()
        else:
            model = regression_model_3()
        verbose=int(i==0)
        #train the model with 50 epoch
        model.fit(x_train,y_train,validation_data=(x_test,y_test), epochs=epoch,verbose=verbose)
               
        #Evaluate the mean squared error
        y_pred=model.predict(x_test)
        score=mean_squared_error(y_test,y_pred)

        scores.append(score)
        print(i+1,'--',score,end=', ')
    print('\nDone evaluating ',iterations, ' times')
    return scores

### Part A

#### Run and evaluate the model with one hidden layer,with columns not normalized and epoch=50 looping 50 times

In [16]:
scores_A=runmodel(predictors,50,False)

Started running the model....
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 721 samples, validate on 309 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1 -- 97.19827809899006, 2 -- 195.3393812136963, 3 -- 170.65669642529267, 4 -- 3901.5368068435773, 5 -- 185.15170299759612, 6 -- 129.52755082825422, 7 -- 152.24452058505378, 8 -- 2008.1202754270942, 9 -- 229.081123

#### Result for Part A

In [37]:
mean_A,sd_A=print_scores('Part A',scores_A)

Mean squared errors
[97.19827809899006, 195.3393812136963, 170.65669642529267, 3901.5368068435773, 185.15170299759612, 129.52755082825422, 152.24452058505378, 2008.1202754270942, 229.08112342287293, 372.91740993880666, 610.2605446756127, 411.89858563510376, 118.34912272461611, 140.6601332952274, 136.5338655294431, 421.06554478067005, 140.00105411976796, 100.9353058783886, 107.7450916616914, 93.44861396708039, 192.2785874114268, 115.06602039099961, 126.07198244990684, 316.13259144081496, 649.265900386334, 108.90544692175197, 137.03753625168093, 590.022147171265, 360.8735215222758, 488.5975657313072, 106.11586534750955, 118.06289316951332, 128.36252485994464, 110.79051154309786, 749.0358843398553, 1027.6409975694137, 115.3865645836732, 119.17121741259365, 197.73906948842486, 246.30621064297557, 532.3454728707567, 118.57780136271651, 187.43341153871674, 230.93446083630138, 295.5744237208724, 4094.733003747336, 418.3414709030016, 541.7970891591913, 82.96090127030362, 108.33816075721899]

S

### Part B

#### Run and evaluate the model with one hidden layer,with columns normalized and epoch=50 looping 50 times

In [18]:
scores_B=runmodel(predictors_norm,50,False)

Started running the model....
Train on 721 samples, validate on 309 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1 -- 276.91408400498057, 2 -- 291.8272995518561, 3 -- 308.7479769291459, 4 -- 416.541989425058, 5 -- 428.64699782198966, 6 -- 406.56883429311137, 7 -- 510.6347746889398, 8 -- 351.932008345743, 9 -- 532.8469911920499, 10 -- 373.24270530695316, 11 -- 310.9289481997141, 12 -- 352.3745596948254, 13 -- 412.00783507193825, 14 -- 287.7658

#### Result for Part B

In [38]:
mean_B,sd_B=print_scores('Part B',scores_B)

Mean squared errors
[276.91408400498057, 291.8272995518561, 308.7479769291459, 416.541989425058, 428.64699782198966, 406.56883429311137, 510.6347746889398, 351.932008345743, 532.8469911920499, 373.24270530695316, 310.9289481997141, 352.3745596948254, 412.00783507193825, 287.76587933219264, 391.4482700297803, 467.7557481774523, 213.2308869082539, 448.27656324615276, 391.3040344267676, 484.37127506196515, 302.0186953808421, 534.4157060384945, 616.7881873252293, 602.7150257528883, 279.19374088526575, 344.7638701698413, 347.0175657797908, 356.58563979301346, 259.543232979604, 292.1894182368466, 287.2612579324277, 566.0116202497262, 449.85227237897453, 450.62590134221927, 605.6388830909577, 261.27356440717085, 705.0917329294978, 367.56828882758214, 306.2954751242048, 368.24614909191286, 439.3912617516961, 302.6680666119842, 255.69936545790614, 315.78908068916894, 279.85546129600726, 313.7261364852315, 352.2384274254742, 584.7150296752629, 285.7974191838663, 253.62019543739885]

Scores for  

### Compare B with A

In [39]:
print('                            A                   B')
print('Mean                  ',mean_A,mean_B)
print('Standard deviation    ',sd_A,sd_B)

                            A                   B
Mean                   446.7314168570003 386.8792866687871
Standard deviation     791.2202896418884 113.53794021335572


We can see the mean and standard deviation of the mean squared errors 

### Part C

#### Run and evaluate the model with one hidden layer,with columns normalized and epoch=100 looping 50 times

In [21]:
scores_C=runmodel(predictors_norm,100,False)

Started running the model....
Train on 721 samples, validate on 309 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/1

#### Result for Part C

In [40]:
mean_C,sd_C=print_scores('Part C',scores_C)

Mean squared errors
[141.92854335919762, 186.94989806782453, 149.04301506993428, 164.1829099491324, 178.26965016277856, 203.1328681701529, 218.41841642669056, 162.0099850883699, 181.01797951499233, 174.7660642084563, 146.5034296146788, 191.651399552885, 156.79607307594262, 194.7565481100017, 221.0854292984087, 162.60987773503106, 143.73498803263706, 144.73352270197373, 126.77528205524843, 177.90584922715948, 158.59745203902668, 186.21192389254588, 147.09449775102547, 158.25657189477963, 136.11760524823518, 160.12828222862612, 156.84991708754453, 141.39982761618117, 178.32103858883144, 160.70189927130147, 190.88149118444346, 154.8269412819327, 163.4323131091946, 155.95681957264654, 171.26439480128937, 159.71703518485543, 157.54484329455792, 158.38797224850455, 147.76391754359247, 148.4311502826942, 183.4718781491018, 153.93963410875386, 150.47915074979844, 190.7430839211624, 162.3678821434916, 179.38248796593135, 156.05414388145272, 172.5633058957641, 139.38509060830194, 171.95805857816

### Compare C with B

In [41]:
print('                            B                   C')
print('Mean                  ',mean_B,mean_C)
print('Standard deviation    ',sd_B,sd_C)

                            B                   C
Mean                   386.8792866687871 165.57004679090457
Standard deviation     113.53794021335572 20.133254919572703


We can see the mean and standard deviation of the mean squared errors 

### Part D

#### Run and evaluate the model with three hidden layers,with columns normalized and epoch=50 looping 50 times

In [24]:
scores_D=runmodel(predictors_norm,50,True)

Started running the model....
Train on 721 samples, validate on 309 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1 -- 123.10693319748826, 2 -- 152.04253616124487, 3 -- 106.04086946708526, 4 -- 147.4061917842086, 5 -- 144.27608557692463, 6 -- 133.00701946755072, 7 -- 161.56936873787498, 8 -- 114.97703520160086, 9 -- 120.59745522312005, 10 -- 117.8075237900726, 11 -- 132.83368500923885, 12 -- 104.74177855788005, 13 -- 134.04214689641236, 14 -- 

#### Result for Part D

In [42]:
mean_D,sd_D=print_scores('Part D',scores_D)

Mean squared errors
[123.10693319748826, 152.04253616124487, 106.04086946708526, 147.4061917842086, 144.27608557692463, 133.00701946755072, 161.56936873787498, 114.97703520160086, 120.59745522312005, 117.8075237900726, 132.83368500923885, 104.74177855788005, 134.04214689641236, 136.62278960004346, 118.67346226409961, 125.92231613751702, 119.66882697487375, 129.59395736456815, 118.9614218417401, 140.69716544154033, 126.91294703234902, 100.79095915087959, 127.306516865522, 88.29362528636628, 122.65176116512181, 123.56615992384711, 140.10618515950833, 120.69207427855349, 125.76263178341999, 140.5822632354284, 169.16413153685286, 138.19007823162596, 122.11263129415698, 102.80310105762968, 142.86684640838448, 143.26547366668345, 146.6349838472493, 145.29507077606428, 139.6528576064576, 138.52952856821773, 93.67417385833039, 115.49863055516454, 120.88050837516154, 87.79544005214098, 141.83584636356724, 127.15051372175633, 119.21644534822309, 137.7986817389667, 114.37261404918553, 135.8552573

### Compare D with B

In [43]:
print('                            B                   D')
print('Mean                  ',mean_B,mean_D)
print('Standard deviation    ',sd_B,sd_D)

                            B                   D
Mean                   386.8792866687871 127.63693013923032
Standard deviation     113.53794021335572 16.99421784371839


We can see the mean and standard deviation of the mean squared errors 