## Introduction to deep learning - Part A

<b>Project planning</b>

- Use the Keras library to build a neural network with the following:

- One hidden layer of 10 nodes, and a ReLU activation function

- Use the adam optimizer and the mean squared error  as the loss function.

1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_splithelper function from Scikit-learn.

2. Train the model on the training data using 50 epochs.

3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

5. Report the mean and the standard deviation of the mean squared errors.

In [4]:
#importing libraries
import keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [5]:
from keras.layers.convolutional import Conv2D # to add convolutional layers
from keras.layers.convolutional import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers

In [6]:
#Read dataset into pandas dataframe
#importing pandas and numpy
import pandas as pd
import numpy as np
concrete_data=pd.read_csv('concrete_data.csv')
concrete_data.head(5)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [7]:
# explore data
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [8]:
#checking for missing values
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [9]:
#Split data into predictors and target
concrete_data_columns = concrete_data.columns
predictors=concrete_data[concrete_data_columns[concrete_data_columns!='Strength']]
target=concrete_data['Strength'] # Strength column
n_cols = predictors.shape[1] # number of predictors

In [10]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [11]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [12]:
#determining the predictores_norm part
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [13]:
n_cols = predictors_norm.shape[1] # number of predictors

In [14]:
#define regression model
def regression_model():
  model=Sequential()
  model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
  model.add(Dense(1))
   # Compile model
  model.compile(optimizer='adam', loss='mean_squared_error')
  return model

In [15]:
# import scikit-learn in order to randomly split the data into a training and test sets
from sklearn.model_selection import train_test_split
#Splitting the data into a training and test sets by holding 30% of the data for testing
X_train,X_test,y_train,y_test = train_test_split(predictors,target,test_size=0.3,random_state = 42)

In [16]:
# build the model
model = regression_model()

In [17]:
# train the model for 50 epochs.
epochs = 50
model.fit(X_train, y_train, epochs=epochs, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x276e3ebb5b0>

In [18]:
#evaluate the model on the test data.
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



148.01588439941406

In [19]:
# import the mean_squared_error function from Scikit-learn.
from sklearn.metrics import mean_squared_error
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

148.01588293475905 0.0


In [20]:
#Creating a list of 50 mean squared errors and report mean and the standard deviation of the mean squared errors.
total_mean_squared_errors = 50
epochs = 50
mean_squared_errors = []
for i in range(0, total_mean_squared_errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print("MSE "+str(i+1)+": "+str(MSE))
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)



MSE 1: 79.12895965576172
MSE 2: 89.05271911621094
MSE 3: 57.64963912963867
MSE 4: 61.7132568359375
MSE 5: 57.019378662109375
MSE 6: 53.28114700317383
MSE 7: 64.41323852539062
MSE 8: 44.86552047729492
MSE 9: 47.97789001464844
MSE 10: 52.201412200927734
MSE 11: 46.95940017700195
MSE 12: 46.18745422363281
MSE 13: 51.12187576293945
MSE 14: 48.903621673583984
MSE 15: 47.17317581176758
MSE 16: 39.05597686767578
MSE 17: 43.33134460449219
MSE 18: 43.84421157836914
MSE 19: 49.6553840637207
MSE 20: 45.00239181518555
MSE 21: 40.6460075378418
MSE 22: 45.94688034057617
MSE 23: 41.720516204833984
MSE 24: 44.83909225463867
MSE 25: 43.39255905151367
MSE 26: 50.29678726196289
MSE 27: 43.56221389770508
MSE 28: 41.2295036315918
MSE 29: 50.44488525390625
MSE 30: 46.447147369384766
MSE 31: 45.34296798706055
MSE 32: 38.561031341552734
MSE 33: 46.196044921875
MSE 34: 41.556060791015625
MSE 35: 43.89261245727539
MSE 36: 49.516300201416016
MSE 37: 53.124412536621094
MSE 38: 46.51634979248047
MSE 39: 41.5524787

In [21]:
print('\n')
print("this is the mean and standard deviation of " +str(total_mean_squared_errors) + " mean squared errors without normalized data. Total number of epochs for each training is: " +str(epochs) + "\n")
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))



this is the mean and standard deviation of 50 mean squared errors without normalized data. Total number of epochs for each training is: 50

Mean: 49.191756696931506
Standard Deviation: 9.022722509628675
