#  1. Download Dataset

In [40]:
import pandas as pd
import numpy as np

In [2]:
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


### data is clean, target is strength

In [3]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [4]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [5]:
n_cols = predictors.shape[1]

###  Normalized data is below

In [6]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [7]:
n_cols_norm = predictors_norm.shape[1]

### A. Make network

In [8]:
import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [9]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [10]:
# build the model
modelR = regression_model()

Instructions for updating:
Colocations handled automatically by placer.


In [17]:
modelR.fit(predictors, target, validation_split=0.3, epochs=50, verbose=2)

Train on 721 samples, validate on 309 samples
Epoch 1/50
 - 0s - loss: 8908858.9015 - val_loss: 3970153.9895
Epoch 2/50
 - 0s - loss: 2335602.6991 - val_loss: 1008958.8380
Epoch 3/50
 - 0s - loss: 632883.0952 - val_loss: 284055.0090
Epoch 4/50
 - 0s - loss: 255247.3945 - val_loss: 145949.3133
Epoch 5/50
 - 0s - loss: 189709.3084 - val_loss: 125629.1930
Epoch 6/50
 - 0s - loss: 178161.0569 - val_loss: 120459.2879
Epoch 7/50
 - 0s - loss: 172016.0800 - val_loss: 117476.7386
Epoch 8/50
 - 0s - loss: 166275.9613 - val_loss: 115012.0314
Epoch 9/50
 - 0s - loss: 160893.8090 - val_loss: 112717.6320
Epoch 10/50
 - 0s - loss: 155899.3087 - val_loss: 110439.8323
Epoch 11/50
 - 0s - loss: 151062.8831 - val_loss: 108153.4971
Epoch 12/50
 - 0s - loss: 146478.9977 - val_loss: 106360.9989
Epoch 13/50
 - 0s - loss: 142011.7223 - val_loss: 103987.6687
Epoch 14/50
 - 0s - loss: 137679.7070 - val_loss: 102306.4324
Epoch 15/50
 - 0s - loss: 133804.3758 - val_loss: 100610.0978
Epoch 16/50
 - 0s - loss: 129

<keras.callbacks.History at 0x7f5be828ed68>

In [35]:
a=0
scoreset=[]
while a<50:
    modelR = regression_model()
    modelR.fit(predictors, target, validation_split=0.3, epochs=50, verbose=0)
    score = modelR.evaluate(predictors, target)
    scoreset.append(score)
    a+=1



## Mean and Std of mse of normal data

In [37]:
np.mean(scoreset, axis=0)

455.8533856334501

In [38]:
np.std(scoreset, axis=0)

379.21916340745463

## B. Using Normalized data

In [39]:
# define regression_norm model
def regression_norm_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols_norm,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [47]:
a=0
scoreset_norm=[]
while a<50:
    modelRn = regression_norm_model()
    modelRn.fit(predictors_norm, target, validation_split=0.3, epochs=50, verbose=0)
    score = modelRn.evaluate(predictors_norm, target)
    scoreset_norm.append(score)
    a+=1



### Mean and std

In [48]:
np.mean(scoreset_norm, axis=0)

381.83245288856284

In [49]:
np.std(scoreset_norm, axis=0)

101.87322259834369

all of them are smaller than non normalized things. 

##  C. more epoch; 50 to 100

In [50]:
a=0
scoreset_norm2=[]
while a<50:
    modelRn2 = regression_norm_model()
    modelRn2.fit(predictors_norm, target, validation_split=0.3, epochs=100, verbose=0)
    score = modelRn2.evaluate(predictors_norm, target)
    scoreset_norm2.append(score)
    a+=1



In [51]:
np.mean(scoreset_norm2, axis=0)

178.816101504724

In [52]:
np.std(scoreset_norm2, axis=0)

15.885861224829098

Less mean and std than B's

## D. more hidden layer; 1 to 3

In [53]:
def regression_norm_model2():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols_norm,)))
    model.add(Dense(10, activation='relu', input_shape=(n_cols_norm,)))
    model.add(Dense(10, activation='relu', input_shape=(n_cols_norm,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [54]:
a=0
scoreset_norm3=[]
while a<50:
    modelRn3 = regression_norm_model2()
    modelR.fit(predictors_norm, target, validation_split=0.3, epochs=50, verbose=0)
    score = modelR.evaluate(predictors_norm, target)
    scoreset_norm3.append(score)
    a+=1



In [55]:
np.mean(scoreset_norm3, axis=0)

70.28848352139667

In [56]:
np.std(scoreset_norm3, axis=0)

20.90357418045991

Much less mean and std than B's