# Initialing and importing data

In [77]:
import keras

from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

In [78]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [79]:
data = pd.read_csv('https://cocl.us/concrete_data')

concrete_data_columns = data.columns
predictors = data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = data['Strength'] # Strength column
n_cols = predictors.shape[1] #counting the number of columns which will be used in the model definition
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


# Question A: Building a baseline model

In [80]:
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,))) #add one hidden layer with 10 nodes and reLU activation
    model.add(Dense(1)) #add output layer with one node
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    return model

In [81]:
model = regression_model()

In [96]:
X = predictors
y = target
X

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360
...,...,...,...,...,...,...,...,...
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28


In [83]:
mse = []
i = 1
while i < 51:
    #Split data into train set and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
    #Apply the neural network model to the train data
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=0)
    #Evaluate the model by comparing with test data set.
    mse.append(model.evaluate(X_test, y_test, verbose=0)[1])
    i = i+1

In [89]:
mse = np.array(mse)
mean_mse = mse.mean()
std_mse = mse.std()
print('Mean of Mean Squared Error: {} \nStandard Deviation: {}'.format(mean_mse, std_mse))

Mean of Mean Squared Error: 54.95284156799316 
Standard Deviation: 15.363763205050617


# Question B: Normalized data

In [95]:
#Normalizing data
predictors_norm = (predictors - predictors.mean()) / predictors.std()
X = predictors_norm
y = data['Strength']
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [90]:
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,))) #add one hidden layer with 10 nodes and reLU activation
    model.add(Dense(1)) #add output layer with one node
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    return model

In [91]:
model = regression_model()

In [93]:
mse = []
i = 1
while i < 51:
    #Split data into train set and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
    #Apply the neural network model to the train data
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=0)
    #Evaluate the model by comparing with test data set.
    mse.append(model.evaluate(X_test, y_test, verbose=0)[1])
    i = i+1

In [94]:
mse = np.array(mse)
mean_mse = mse.mean()
std_mse = mse.std()
print('Mean of Mean Squared Error (data normalized): {} \nStandard Deviation (data normalized): {}'.format(mean_mse, std_mse))

Mean of Mean Squared Error (data normalized): 70.6209267425537 
Standard Deviation (data normalized): 100.65327202144452


# Question C: Increasing the number of epochs

In [97]:
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,))) #add one hidden layer with 10 nodes and reLU activation
    model.add(Dense(1)) #add output layer with one node
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    return model
model = regression_model()
mse = []
i = 1
while i < 51:
    #Split data into train set and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
    #Apply the neural network model to the train data
    model.fit(X_train, y_train, validation_split=0.3, epochs=100, verbose=0)
    #Evaluate the model by comparing with test data set.
    mse.append(model.evaluate(X_test, y_test, verbose=0)[1])
    i = i+1

mse = np.array(mse)
mean_mse = mse.mean()
std_mse = mse.std()
print('Mean of Mean Squared Error (data normalized): {} \nStandard Deviation (data normalized): {}'.format(mean_mse, std_mse))

Mean of Mean Squared Error (data normalized): 74.91678840637206 
Standard Deviation (data normalized): 30.296960447385455


# Question D: Increase the number of hidden layer

In [98]:
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,))) #add one hidden layer with 10 nodes and reLU activation
    model.add(Dense(10,activation='relu'))
    model.add(Dense(10,activation='relu'))
    model.add(Dense(1)) #add output layer with one node
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    return model
model = regression_model()
mse = []
i = 1
while i < 51:
    #Split data into train set and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
    #Apply the neural network model to the train data
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=0)
    #Evaluate the model by comparing with test data set.
    mse.append(model.evaluate(X_test, y_test, verbose=0)[1])
    i = i+1

mse = np.array(mse)
mean_mse = mse.mean()
std_mse = mse.std()
print('Mean of Mean Squared Error (data normalized): {} \nStandard Deviation (data normalized): {}'.format(mean_mse, std_mse))

Mean of Mean Squared Error (data normalized): 49.08640609741211 
Standard Deviation (data normalized): 10.826617327673896


# Conclusion
### Increasing the number of layers seems to be more accurate than increasing the number of epoch for these data set