**Importing the libraries**

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam

SEED = 2017

Using TensorFlow backend.


**Loading the dataset, and defining the variables:**

In [0]:
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep = ';')

y = data['quality']
X = data.drop(['quality'], axis = 1)

**Inspecting the dataset:**

In [3]:
y.head()

0    5
1    5
2    5
3    6
4    5
Name: quality, dtype: int64

In [4]:
X.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


**Splitting the data for training and testing:**

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = 0.2,
                                                    random_state = SEED)

**Print average quality and first rows of training set:**

In [6]:
print('Average quality training set: {: .4f}'.format(y_train.mean()))

X_train.head()

Average quality training set:  5.6231


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
1140,7.3,0.4,0.3,1.7,0.08,33.0,79.0,0.9969,3.41,0.65,9.5
920,9.6,0.41,0.37,2.3,0.091,10.0,23.0,0.99786,3.24,0.56,10.5
1198,7.7,0.26,0.26,2.0,0.052,19.0,77.0,0.9951,3.15,0.79,10.9
423,10.5,0.24,0.47,2.1,0.066,6.0,24.0,0.9978,3.15,0.9,11.0
601,13.2,0.46,0.52,2.2,0.071,12.0,35.0,1.0006,3.1,0.56,9.0


**Normalizing the input data:**

In [0]:
scaler = StandardScaler().fit(X_train)

X_train = pd.DataFrame(scaler.transform(X_train))
X_test = pd.DataFrame(scaler.transform(X_test))

**Determine the baseline predictions:**

In [8]:
# Predict the mean quality of the training data for each validation input

print('MSE: {:.4f}'.format(np.mean((y_test - ([y_train.mean()] * y_test.shape[0])) ** 2)))

MSE: 0.5940


**Build the neural network by defining the network architecture:**

In [9]:
model = Sequential()

# First hidden layer
model.add(Dense(200, input_dim = X_train.shape[1], 
                activation = 'relu'))

# Second hidden layer
model.add(Dense(25, activation = 'relu'))

# Output layer
model.add(Dense(1, activation = 'linear'))

# Set optimizer
opt = Adam()

# Compile model
model.compile(loss = 'mse', optimizer = opt, metrics = ['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.


**Let's define the callback for early stopping and saving the best model:**

In [0]:
callbacks = [
    EarlyStopping(monitor = 'val_acc', patience = 20, verbose = 2),
    ModelCheckpoint('multi_layer_best_model.h5',
                    monitor = 'val_acc',
                    save_best_only = True,
                    verbose = 0)
]

**Defining the batch size to be 64, and the number of epochs to be 5000:**

In [0]:
batch_size = 64
n_epochs = 5000

**Running the model:**

In [12]:
model.fit(X_train.values, y_train, batch_size = 64, 
          epochs = n_epochs, validation_split = 0.2,
          verbose = 2,
          validation_data = (X_test.values, y_test),
          callbacks = callbacks)

Instructions for updating:
Use tf.cast instead.
Train on 1279 samples, validate on 320 samples
Epoch 1/5000
 - 3s - loss: 20.4111 - acc: 0.0055 - val_loss: 12.5023 - val_acc: 0.0187
Epoch 2/5000
 - 0s - loss: 6.3885 - acc: 0.0977 - val_loss: 3.8108 - val_acc: 0.1656
Epoch 3/5000
 - 0s - loss: 3.1007 - acc: 0.2158 - val_loss: 2.6438 - val_acc: 0.2344
Epoch 4/5000
 - 0s - loss: 2.1791 - acc: 0.2682 - val_loss: 2.2836 - val_acc: 0.2625
Epoch 5/5000
 - 0s - loss: 1.8768 - acc: 0.2744 - val_loss: 1.9635 - val_acc: 0.2938
Epoch 6/5000
 - 0s - loss: 1.6709 - acc: 0.2924 - val_loss: 1.8119 - val_acc: 0.3063
Epoch 7/5000
 - 0s - loss: 1.5317 - acc: 0.3159 - val_loss: 1.6576 - val_acc: 0.2938
Epoch 8/5000
 - 0s - loss: 1.3947 - acc: 0.3307 - val_loss: 1.5535 - val_acc: 0.3312
Epoch 9/5000
 - 0s - loss: 1.2799 - acc: 0.3487 - val_loss: 1.4428 - val_acc: 0.3469
Epoch 10/5000
 - 0s - loss: 1.1796 - acc: 0.3683 - val_loss: 1.3455 - val_acc: 0.3656
Epoch 11/5000
 - 0s - loss: 1.0862 - acc: 0.3792 - v

<keras.callbacks.History at 0x7f0f29091668>

**Printing the performance on the test set after loading the optimal weights:**

In [13]:
best_model = model

best_model.load_weights('multi_layer_best_model.h5')
best_model.compile(loss = 'mse', optimizer = 'adam',
                   metrics = ['accuracy'])

# Evaluate on test set
score = best_model.evaluate(X_test.values, y_test, verbose = 0)

print('Test accuracy: %.2f%%' % (score[1] * 100))

Test accuracy: 63.44%
