## Import Keras and Packages

In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

from keras.layers.convolutional import Conv2D # to add convolutional layers
from keras.layers.convolutional import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Download concrete data

In [2]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


## A. Build a baseline model (5 marks)

## Build Neural Network Model

In [3]:
def regression_model(n_cols):
    # create model
    model = Sequential()
    
    # One hidden layer of 10 nodes, and a ReLU activation function
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    
    model.add(Dense(1))
    
    # Use the adam optimizer and the mean squared error as the loss function.
    model.compile(optimizer='adam', loss='mean_squared_error')

    return model

In [4]:
def trail_and_test(concrete_data, modeldef, n_epochs=50):
    # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing
    train_data, test_data = train_test_split(concrete_data, test_size=0.3)

    ## Split data into predictors and target
    predictors_train = train_data[train_data.columns[train_data.columns != 'Strength']] # all columns except Strength
    target_train = train_data['Strength'] # Strength column

    predictors_test = test_data[test_data.columns[test_data.columns != 'Strength']] # all columns except Strength
    target_test = test_data['Strength'] # Strength column

    n_cols = predictors_train.shape[1] # number of predictors

    # 2. Train the model on the training data using 50 epochs.
    model = modeldef(n_cols=n_cols)
    model.fit(predictors_train, target_train, validation_split=0.3, epochs=n_epochs, verbose=0)

    # 3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength.
    # You can use the mean_squared_error function from Scikit-learn.
    predictions = model.predict(predictors_test, batch_size=None, verbose=1, steps=None)
    mse = mean_squared_error(target_test, predictions)
    #model.evaluate(predictors_test, target_test, verbose=0)

    return mse

In [5]:
# 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.
mses = np.array([trail_and_test(concrete_data, modeldef=regression_model, n_epochs=50) for i in range(50)])
# 5. Report the mean and the standard deviation of the mean squared errors
print('Mean: {mean}, Stddev: {std}'.format(mean=mses.mean(), std=mses.std()))

Mean: 539.2714937496307, Stddev: 660.4743586716176


## B. Normalize the data 

In [6]:
def train_and_test_normalised(concrete_data, modeldef, n_epochs):
    # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing
    train_data, test_data = train_test_split(concrete_data, test_size=0.3)

    ## Split data into predictors and target
    predictors_train = train_data[train_data.columns[train_data.columns != 'Strength']] # all columns except Strength
    target_train = train_data['Strength'] # Strength column

    predictors_test = test_data[test_data.columns[test_data.columns != 'Strength']] # all columns except Strength
    target_test = test_data['Strength'] # Strength column

    # normalise predictors
    predictors_train_norm = (predictors_train - predictors_train.mean()) / predictors_train.std()
    predictors_test_norm = (predictors_test - predictors_test.mean()) / predictors_test.std()

    n_cols = predictors_train_norm.shape[1] # number of predictors

    # 2. Train the model on the training data using 50 epochs.
    model = modeldef(n_cols=n_cols)
    # Train the model on the training data using 50 epochs.
    model.fit(predictors_train_norm, target_train, validation_split=0.3, epochs=n_epochs, verbose=0)

    # 3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.
    predictions = model.predict(predictors_test_norm, batch_size=None, verbose=1, steps=None)
    mse = mean_squared_error(target_test, predictions)
    print(mse)
    #model.evaluate(predictors_test, target_test, verbose=0)

    return mse

In [7]:
mses = np.array([train_and_test_normalised(concrete_data, modeldef=regression_model, n_epochs=50) for i in range(50)])
print('Mean: {mean}, Stddev: {std}'.format(mean=mses.mean(), std=mses.std()))

895.8366768559019
538.8349123503775
537.6522596509329
718.2132841960835
554.2272068624947
688.2612701336302
630.2078953375573
570.0388945656377
553.0614280542312
573.9188361694559
515.2981959631069
613.4661391813387
974.4034190455617
616.3032867403359
760.9647654819323
617.7627110817731
865.4788951120621
702.5712501664436
806.1935364950012
644.466692149121
605.9526166150196
709.2213573595377
629.098663761657
660.1426338191311
488.9845426263882
686.2913867205162
730.6064014332522
591.6938573122541
732.3091724372127
609.7222085590635
549.360016934034
1016.3516083374506
616.5504204674895
596.9292485302745
549.556820153158
844.2119893611182
557.2660750708391
635.2520690051538
719.8172720522449
957.680689743131
622.9847286661791
1061.4911852101775
526.7092435305942
730.0483210206464
481.9919725872778
733.4981436994185
494.7805452331136
983.1717735984989
746.6283677945105
618.009658677717
Mean: 677.2694909182007, Stddev: 143.2904362488982


### How does the mean of the mean squared errors compare to that from Step A?

#### Step A(Baseline Model)
    - mean: 539.2714937496307
    - std: 660.4743586716176
#### Step B(Normalised feature):
    - mean: 677.2694909182007
    - std: 143.2904362488982

### Result
The mean of the MSE of B is a bit worse than A, but the standard deviation of B is much less than A

## C. Increate the number of epochs (5 marks)

In [8]:
# Repeat Part B but use 100 epochs this time for training.
mses = np.array([train_and_test_normalised(concrete_data, modeldef=regression_model, n_epochs=100) for i in range(50)])
print('Mean: {mean}, Stddev: {std}'.format(mean=mses.mean(), std=mses.std()))

223.85398452039425
517.0112666824906
273.9694154504999
192.82145966930696
210.51179032381697
203.76187841575143
275.7925823897395
248.05057504896035
197.05622773425705
406.27738068568686
225.62654009719517
266.88727204063923
217.397101004695
196.07659873182126
225.6887580436478
219.66798500869552
208.0678900311614
164.07400523585625
185.30048518729134
174.86220887238449
201.65090344579735
255.85863949522448
216.93556424267325
231.12417280880118
218.68054735479228
177.3554266554538
236.39954624102558
171.72589147405694
282.82231217965216
243.52100874970293
234.570346607437
237.2361090737491
201.55175548851577
241.4706696683715
245.4376122436043
230.38018871474776
251.01166881550506
186.15597688970462
224.73719531832234
179.49625466998492
174.67780751457244
192.17686078065307
195.76743850888556
259.42776889111997
182.9980898029085
258.09291555575516
199.05006328710505
270.42549994293506
Mean: 228.31948220629343, Stddev: 57.71126727954665


### How does the mean of the mean squared errors compare to that from Step B?

#### Step B(Normalised feature):
    - mean: 670.1222133203134
    - std: 139.2909256592924
#### Step C(Train w/ double epoch):
    - mean: 228.31948220629343
    - std: 57.71126727954665

### Result
Both of the mean and the standard deviation of C is much better than B

## D. Increase the number of hidden layers (5 marks)

In [9]:
# - Three hidden layers, each of 10 nodes and ReLU activation function.
def regression_model_2(n_cols):
    # create model
    model = Sequential()
    
    # One hidden layer of 10 nodes, and a ReLU activation function
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    
    model.add(Dense(1))
    
    # Use the adam optimizer and the mean squared error as the loss function.
    model.compile(optimizer='adam', loss='mean_squared_error')

    return model

In [10]:
mses = np.array([train_and_test_normalised(concrete_data, modeldef=regression_model_2, n_epochs=50) for i in range(100)])
print('Mean: {mean}, Stddev: {std}'.format(mean=mses.mean(), std=mses.std()))

168.76705515794248
152.37805802615483
138.0862583685326
157.90373219208885
161.08597016054026
150.01149373859212
162.11951288517483
139.1288333203001
134.91886798127697
152.15621183209117
150.97036108938943
140.8121730021355
145.9784960636683
162.02162292561107
153.05217317600633
134.73216695145402
156.16500996431918
168.383752710478
148.44856691489062
136.6344734060594
109.80177418286205
153.11641534341095
154.85225235898724
171.5508089630442
130.81446250241856
160.29521997008908
176.73897479257823
170.04782895007418
152.3746076926652
144.72297767681104
156.49220426032284
155.04916180653356
158.86459121007186
132.2005601721169
160.47720303982177
161.19217174096693
150.59410384474592
143.87387494483778
145.65354379258113
100.89176283856784
145.82646933290005
143.6245864026016
146.15976388336637
159.81794577219415
168.28040202590807
138.4935780092432
155.87470391924612
153.28323843241031
140.29935291001897
152.9753433930656
143.63661852289457
140.55413972206102
147.37991709952468
148.89

### How does the mean of the mean squared errors compare to that from Step B?

#### Step B(Normalised feature):
    - mean: 670.1222133203134
    - std: 139.2909256592924
#### Step D(Enhanced hidden layer):
    - mean: 148.9130410346697
    - std: 14.453772413834463

### Reuslt
Both of the mean and the standart deviation of MSE of D is significant better than B