In [2]:
import pandas as pd
import numpy as np

In [3]:
concrete_data = pd.read_csv('https://ibm.box.com/shared/static/svl8tu7cmod6tizo6rk0ke4sbuhtpdfx.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [4]:
concrete_data.shape

(1030, 9)

In [5]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [6]:
#Checking for missing values in each columns
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

###### Split data into predictors and target

In [11]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [8]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [9]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64


#### Splitting dataset into train and test set

In [29]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.20, random_state=42)

In [30]:
print("Size of Training X {}".format(X_train.shape))
print("Size of Training Y {}".format(y_train.shape))
print("Size of Testing X {}".format(X_test.shape))
print("Size of Testing Y {}".format(y_test.shape))

Size of Training X (824, 8)
Size of Training Y (824,)
Size of Testing X (206, 8)
Size of Testing Y (206,)




#### Normalizing training data

In [31]:
predictors_norm = (X_train - X_train.mean()) / X_train.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
995,-1.160173,0.856867,0.981853,-0.314094,1.480754,-0.257305,-0.647127,-0.275506
507,1.307829,-0.602126,1.23185,-0.178084,0.391477,-1.92578,-0.272982,-0.275506
334,-0.076819,-0.855064,1.066227,-1.045736,0.626091,1.017242,0.066588,-0.688895
848,-0.29163,0.260163,0.356859,0.572318,0.307686,-1.760584,0.603726,-0.275506
294,-1.064391,-0.369883,1.111539,-1.102016,0.776914,1.362882,0.297496,-0.688895


In [32]:
n_cols = predictors_norm.shape[1] # number of predictors

In [33]:
import keras
from keras.models import Sequential
from keras.layers import Dense


#### Build a Neural Network

In [80]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(50, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['acc'])
    return model


#### Train and Test the Network

In [81]:
# build the model
model = regression_model()

# fit the model
model.fit(predictors_norm, y_train, validation_split=0.3, epochs=100, verbose=2)

Train on 576 samples, validate on 248 samples
Epoch 1/100
 - 2s - loss: 1522.9844 - acc: 0.0000e+00 - val_loss: 1393.5922 - val_acc: 0.0000e+00
Epoch 2/100
 - 0s - loss: 1402.2127 - acc: 0.0000e+00 - val_loss: 1258.6528 - val_acc: 0.0000e+00
Epoch 3/100
 - 0s - loss: 1228.8946 - acc: 0.0000e+00 - val_loss: 1061.6570 - val_acc: 0.0000e+00
Epoch 4/100
 - 0s - loss: 990.1650 - acc: 0.0000e+00 - val_loss: 803.9561 - val_acc: 0.0000e+00
Epoch 5/100
 - 0s - loss: 708.1515 - acc: 0.0000e+00 - val_loss: 536.5194 - val_acc: 0.0000e+00
Epoch 6/100
 - 0s - loss: 460.2860 - acc: 0.0000e+00 - val_loss: 324.2595 - val_acc: 0.0000e+00
Epoch 7/100
 - 0s - loss: 300.3662 - acc: 0.0000e+00 - val_loss: 220.9331 - val_acc: 0.0000e+00
Epoch 8/100
 - 0s - loss: 242.6840 - acc: 0.0000e+00 - val_loss: 187.1967 - val_acc: 0.0000e+00
Epoch 9/100
 - 0s - loss: 223.3920 - acc: 0.0000e+00 - val_loss: 178.5332 - val_acc: 0.0000e+00
Epoch 10/100
 - 0s - loss: 209.4030 - acc: 0.0000e+00 - val_loss: 178.1062 - val_acc

Epoch 86/100
 - 0s - loss: 53.7554 - acc: 0.0000e+00 - val_loss: 59.5807 - val_acc: 0.0000e+00
Epoch 87/100
 - 0s - loss: 52.5346 - acc: 0.0000e+00 - val_loss: 58.1118 - val_acc: 0.0000e+00
Epoch 88/100
 - 0s - loss: 51.6940 - acc: 0.0000e+00 - val_loss: 57.1182 - val_acc: 0.0000e+00
Epoch 89/100
 - 0s - loss: 50.7454 - acc: 0.0000e+00 - val_loss: 56.3000 - val_acc: 0.0000e+00
Epoch 90/100
 - 0s - loss: 49.9121 - acc: 0.0000e+00 - val_loss: 56.4007 - val_acc: 0.0000e+00
Epoch 91/100
 - 0s - loss: 49.3622 - acc: 0.0000e+00 - val_loss: 55.1993 - val_acc: 0.0000e+00
Epoch 92/100
 - 0s - loss: 48.9870 - acc: 0.0000e+00 - val_loss: 54.2128 - val_acc: 0.0000e+00
Epoch 93/100
 - 0s - loss: 48.2789 - acc: 0.0000e+00 - val_loss: 53.7283 - val_acc: 0.0000e+00
Epoch 94/100
 - 0s - loss: 47.4259 - acc: 0.0000e+00 - val_loss: 52.9690 - val_acc: 0.0000e+00
Epoch 95/100
 - 0s - loss: 46.9980 - acc: 0.0000e+00 - val_loss: 52.1907 - val_acc: 0.0000e+00
Epoch 96/100
 - 0s - loss: 47.0747 - acc: 0.0000e+

<keras.callbacks.History at 0x7f75ad177f60>




#### Normalizing test data and make prediction

In [82]:
norm_test_data = (X_test - X_test.mean()) / X_test.std()
predictions = model.predict(norm_test_data)

In [83]:
scores = model.evaluate(predictors_norm, y_train, verbose=2)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

acc: 0.00%


In [84]:
print(predictions)

[[46.316586 ]
 [52.555626 ]
 [72.67157  ]
 [50.419662 ]
 [17.105669 ]
 [40.365246 ]
 [20.737621 ]
 [53.952675 ]
 [29.930285 ]
 [43.135197 ]
 [30.991367 ]
 [11.4231615]
 [57.19987  ]
 [46.13007  ]
 [22.262629 ]
 [33.37185  ]
 [35.28948  ]
 [20.005932 ]
 [32.984253 ]
 [26.27133  ]
 [35.563873 ]
 [48.871098 ]
 [44.000317 ]
 [16.275751 ]
 [26.059313 ]
 [33.485355 ]
 [14.248464 ]
 [40.923462 ]
 [48.18218  ]
 [21.11002  ]
 [46.972393 ]
 [39.732178 ]
 [47.863873 ]
 [55.79192  ]
 [20.550034 ]
 [39.26076  ]
 [29.554567 ]
 [38.61986  ]
 [11.265071 ]
 [43.099247 ]
 [17.706861 ]
 [ 7.2193346]
 [37.94495  ]
 [49.786602 ]
 [13.870807 ]
 [73.66626  ]
 [47.98937  ]
 [38.166187 ]
 [25.696089 ]
 [11.219438 ]
 [48.29107  ]
 [41.707867 ]
 [32.36243  ]
 [21.857939 ]
 [53.925434 ]
 [50.718708 ]
 [24.177162 ]
 [13.884843 ]
 [38.318638 ]
 [22.726732 ]
 [38.58359  ]
 [21.810839 ]
 [35.278812 ]
 [54.90929  ]
 [22.609324 ]
 [15.666196 ]
 [33.46592  ]
 [12.660592 ]
 [31.655367 ]
 [21.138693 ]
 [16.813559 ]
 [19.4

In [75]:
print(y_test)

31     52.91
109    55.90
136    74.50
88     35.30
918    10.54
       ...  
482    56.14
545    18.75
110    38.00
514    74.36
602    35.17
Name: Strength, Length: 206, dtype: float64
