In [2]:
#import libraries needed
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [3]:
#download data
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')

#set concrete_data_columns for simplicity
concrete_data_columns = concrete_data.columns

#predictors are all columns except 'Strength'
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]

#normalize perdictors!
predictors = (predictors - predictors.mean()) / predictors.std()

#target is the 'Strength' column
target = concrete_data['Strength']

#store the number of predictors for my network's input_shape (should be 8!)
n_cols = predictors.shape[1]

#for the new updated dataset we have 8 predictors rather than 7
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [4]:
def regression():
    #building model with 1 hidden layer with 10 units and relu for activation function
    model = Sequential()
    model.add(Dense(10, activation = 'relu', input_shape = (n_cols, )))
    model.add(Dense(10, activation = 'relu'))
    model.add(Dense(10, activation = 'relu'))
    model.add(Dense(1))
    
    #compile model with using adam optimizer and mean squared error loss function
    model.compile(optimizer = 'adam', loss = 'mean_squared_error')
    return model

The model is ready for training. In the following lines I split the data randomly by using a different argument for train_test_split. I then calculate the mean squared error for each iteration and store it in MSE list.

In [8]:
#list for storing the mean squared errors 
MSE = []

for i in range(50):    
    #random split of data, using i as an input to random state
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size = 0.3, random_state = i)
    
    #fit the model using 50 epochs (use verbose=0 for silent mode...)
    model = regression()
    model.fit(X_train, y_train, epochs = 50, verbose = 0)
    
    #make predictions for the test set
    yhat = model.predict(X_test)
    
    #calculate mean squared error and append it to the MSE list
    mse = mean_squared_error(y_test, yhat)
    MSE.append(mse)
    
    #inform user about
    print('trained model #' + str(i+1) + ' with MSE = ', mse)
    
#convert the list to numpy array for using numpy easily
MSE = np.array(MSE)

trained model #1 with MSE =  107.56352701591608
trained model #2 with MSE =  143.4195414370879
trained model #3 with MSE =  109.12804667430271
trained model #4 with MSE =  143.97418287841123
trained model #5 with MSE =  134.6407103185249
trained model #6 with MSE =  139.1606195405877
trained model #7 with MSE =  152.20061399218332
trained model #8 with MSE =  91.43746177649265
trained model #9 with MSE =  151.95139060250648
trained model #10 with MSE =  135.4941412972062
trained model #11 with MSE =  121.32136911712587
trained model #12 with MSE =  124.7537354388398
trained model #13 with MSE =  120.60448232293847
trained model #14 with MSE =  124.71891757470138
trained model #15 with MSE =  141.30997288588122
trained model #16 with MSE =  106.6044617757528
trained model #17 with MSE =  116.27684147682308
trained model #18 with MSE =  127.94117208910775
trained model #19 with MSE =  131.96939138219767
trained model #20 with MSE =  111.15860093812377
trained model #21 with MSE =  126.76

In [9]:
print('The mean of the mean squared errors is:', np.mean(MSE))
print('The standard deviation of the mean squared errors is:', np.std(MSE) )

The mean of the mean squared errors is: 128.65414190809983
The standard deviation of the mean squared errors is: 14.403805930660551
