In [8]:
import numpy as np
import os
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

def reset_graph(seed = 42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [9]:
# 8a) Build a DNN with five hidden layers of 100 neurons with He initialization and the ELU activation function

he_init = tf.contrib.layers.variance_scaling_initializer()

def dnn(inputs, n_hidden_layers = 5, n_neurons = 100, name = None, activation = tf.nn.elu, initializer = he_init):
    with tf.name_scope(name, 'dnn'):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs, n_neurons, activation = activation, kernel_initializer = initializer, name = "hidden%d" % (layer + 1))
        return inputs

In [10]:
n_inputs = 28 * 28
n_outputs = 5

reset_graph()

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
y = tf.placeholder(tf.int64, shape = (None), name = "y")

dnn_outputs = dnn(X)

logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer = he_init, name = 'logits')
Y_proba = tf.nn.softmax(logits, name = 'Y_proba')

In [11]:
# 8b) Use Adam optimization and early stopping to trian MNIST for digits 0 to 3

learning_rate = 0.01

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = 'loss')

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss, name = 'training_op')
    
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name = 'accuracy')
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [12]:
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

In [36]:
n_epochs = 100
batch_size = 20
max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict = {X: X_valid1, y:y_valid1})
        
        if loss_val < best_loss:
            save_path = saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(epoch, loss_val, best_loss, acc_val * 100))
        
        
with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    acc_test = accuracy.eval(feed_dict = {X: X_test1, y: y_test1})
    print("Final test accuracy {:.2f}%".format(acc_test * 100))

0	Validation loss: 0.227003	Best loss: 0.227003	Accuracy: 95.04%
1	Validation loss: 0.235784	Best loss: 0.227003	Accuracy: 95.78%
2	Validation loss: 0.122533	Best loss: 0.122533	Accuracy: 97.38%
3	Validation loss: 0.163384	Best loss: 0.122533	Accuracy: 95.62%
4	Validation loss: 0.108590	Best loss: 0.108590	Accuracy: 97.85%
5	Validation loss: 0.093206	Best loss: 0.093206	Accuracy: 98.05%
6	Validation loss: 0.161963	Best loss: 0.093206	Accuracy: 97.07%
7	Validation loss: 0.114235	Best loss: 0.093206	Accuracy: 97.69%
8	Validation loss: 0.733225	Best loss: 0.093206	Accuracy: 90.11%
9	Validation loss: 0.399503	Best loss: 0.093206	Accuracy: 96.01%
10	Validation loss: 0.332553	Best loss: 0.093206	Accuracy: 95.35%
11	Validation loss: 0.258545	Best loss: 0.093206	Accuracy: 96.13%
12	Validation loss: 0.385276	Best loss: 0.093206	Accuracy: 96.99%
13	Validation loss: 0.220626	Best loss: 0.093206	Accuracy: 96.91%
14	Validation loss: 0.203410	Best loss: 0.093206	Accuracy: 97.38%
15	Validation loss: 

In [36]:
# 8c) Tuning the hyperparameters using cross-validation

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers = 5, n_neurons = 100, optimizer_class = tf.train.AdamOptimizer, learning_rate = 0.01, batch_size = 20, activation = tf.nn.elu, initializer = he_init, batch_norm_momentum = None, dropout_rate = None, random_state = None):
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None
        
    def _dnn(self, inputs):
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training = self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons, kernel_initializer = self.initializer, name = "hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum = self.batch_norm_momentum, training = self.training)
                inputs = self.activation(inputs, name = "hidden%d_out" % (layer + 1))
        return inputs
    
    def _build_graph(self, n_inputs, n_outputs):
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)
            
        X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
        y = tf.placeholder(tf.int32, shape = (None), name = 'y')
        
        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape = (), name = 'training')
        else:
            self._training = None
            
        dnn_outputs = self._dnn(X)
        
        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer = he_init, name = 'logits')
        
        Y_proba = tf.nn.softmax(logits, name = 'Y_proba')
        
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
        
        loss = tf.reduce_mean(xentropy, name = 'loss')
        
        optimzier = self.optimizer_class(learning_rate = self.learning_rate)
        training_op = optimizer.minimize(loss)
        
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name = 'accuracy')
        
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
        
        self._X = self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver
        
    def close_session(self):
        if self._session:
            self._session.close()
            
    def _get_model_params(self):
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}
    
    def _restore_model_params(self, model_params):
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign") for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict = feed_dict)
        
    def fit(self, X, y, n_epochs = 100, X_valid = None, y_valid = None):
    
        self.close_session()
        
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        self.class_to_index_ = {label: index
                               for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label] for label in y], dtype = np.int32)
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        self._session = tf.Session(graph = self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict = feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy], feed_dict = {self._X: X_valid, self._y : y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress = 0
                    print("{}\tValidation: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(epoch. loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss. self._accuracy], feed_dict = {self._X: X_batch, self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(epoch, loss_train, acc_train * 100))
        
            if best_params:
                self._restore_model_params(best_params)
            return self
            
        def predict_proba(self, X):
            if not self._session:
                raise NotFittedError("This %s instance is not fitted yet" % self._class_._name__)
            
            with self._session_as_default() as sess:
                return self._Y_proba.eval(feed_dict = {self._X:X})
            
        def predict(self, X):
            class_indices = np.argmax(self.predict_proba(X), axis = 1)
            return np.array([[self.classes_[class_index]] for class_index in class_indices], np.int32)
        
        def save(self, path):
            self._saver.save(self._session, path)
            


In [37]:
dnn_clf = DNNClassifier(random_state = 42)
dnn_clf.fit(X_train1, y_train1, n_epochs = 1000, X_valid = X_valid1, y_valid = y_valid1)

ValueError: Could not flatten dictionary. Key had 2 elements, but value had 1 elements. Key: [<tf.Tensor 'X:0' shape=(?, 784) dtype=float32>, <tf.Tensor 'y:0' shape=<unknown> dtype=int32>], value: [array([0, 2, 0, 2, 1, 1, 4, 1, 4, 1, 3, 1, 1, 4, 3, 0, 4, 3, 1, 1, 1], dtype=int32)].

In [25]:
from sklearn.metrics import accuracy_score

y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.9793734189531037

In [38]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=5, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                 learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        """Initialize the DNNClassifier by simply storing all the hyperparameters."""
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None

    def _dnn(self, inputs):
        """Build the hidden layers, with support for batch normalization and dropout."""
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum=self.batch_norm_momentum,
                                                       training=self._training)
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        """Build the same model as earlier"""
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int32, shape=(None), name="y")

        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                                  logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        # Make the important operations available easily through instance variables
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """Get all variable values (used for early stopping, faster than saving to disk)"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        """Set all variables to the given values (for early stopping, faster than loading from disk)"""
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        """Fit the model to the training set. If X_valid and y_valid are provided, use early stopping."""
        self.close_session()

        # infer n_inputs and n_outputs from the training set.
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        # Translate the labels vector to a vector of sorted class indices, containing
        # integers from 0 to n_outputs - 1.
        # For example, if y is equal to [8, 8, 9, 5, 7, 6, 6, 6], then the sorted class
        # labels (self.classes_) will be equal to [5, 6, 7, 8, 9], and the labels vector
        # will be translated to [3, 3, 4, 0, 2, 1, 1, 1]
        self.class_to_index_ = {label: index
                                for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                      for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            # extra ops for batch normalization
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # needed in case of early stopping
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        # Now train the model!
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid,
                                                            self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            # If we used early stopping then rollback to the best model found
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

In [44]:
from sklearn.model_selection import RandomizedSearchCV

def leaky_relu(alpha = 0.01):
    def parametrized_leaky_relu(z, name = None):
        return tf.maximum(alpha*z, z, name = name)
    return parametrized_leaky_relu

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha = 0.01), leaky_relu(alpha = 0.1)]
}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state = 42), param_distribs, n_iter = 50, fit_params = {"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000}, random_state = 42, verbose = 2)
rnd_search.fit(X_train1, y_train1)




Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=10, learning_rate=0.05, batch_size=100, activation=<function elu at 0x1099692f0> 
0	Validation loss: 0.133020	Best loss: 0.133020	Accuracy: 96.40%
1	Validation loss: 0.153023	Best loss: 0.133020	Accuracy: 95.78%
2	Validation loss: 0.137756	Best loss: 0.133020	Accuracy: 96.44%
3	Validation loss: 0.113295	Best loss: 0.113295	Accuracy: 96.60%
4	Validation loss: 0.134245	Best loss: 0.113295	Accuracy: 96.68%
5	Validation loss: 0.160136	Best loss: 0.113295	Accuracy: 96.48%
6	Validation loss: 1.205526	Best loss: 0.113295	Accuracy: 58.29%
7	Validation loss: 0.854387	Best loss: 0.113295	Accuracy: 58.44%
8	Validation loss: 1.618615	Best loss: 0.113295	Accuracy: 19.27%
9	Validation loss: 1.610767	Best loss: 0.113295	Accuracy: 22.01%
10	Validation loss: 1.613220	Best loss: 0.113295	Accuracy: 19.27%
11	Validation loss: 1.611311	Best loss: 0.113295	Accuracy: 22.01%
12	Validation loss: 1.627571	Best loss: 0.113295	Accuracy: 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   10.7s remaining:    0.0s


0	Validation loss: 0.151885	Best loss: 0.151885	Accuracy: 95.62%
1	Validation loss: 0.108809	Best loss: 0.108809	Accuracy: 96.83%
2	Validation loss: 0.117152	Best loss: 0.108809	Accuracy: 96.99%
3	Validation loss: 0.147208	Best loss: 0.108809	Accuracy: 96.52%
4	Validation loss: 0.140152	Best loss: 0.108809	Accuracy: 96.36%
5	Validation loss: 0.141784	Best loss: 0.108809	Accuracy: 96.44%
6	Validation loss: 1.561300	Best loss: 0.108809	Accuracy: 39.44%
7	Validation loss: 1.164626	Best loss: 0.108809	Accuracy: 39.80%
8	Validation loss: 1.150888	Best loss: 0.108809	Accuracy: 40.15%
9	Validation loss: 1.150942	Best loss: 0.108809	Accuracy: 40.15%
10	Validation loss: 1.135206	Best loss: 0.108809	Accuracy: 42.49%
11	Validation loss: 1.158850	Best loss: 0.108809	Accuracy: 40.23%
12	Validation loss: 1.142637	Best loss: 0.108809	Accuracy: 42.34%
13	Validation loss: 1.173786	Best loss: 0.108809	Accuracy: 40.27%
14	Validation loss: 1.146071	Best loss: 0.108809	Accuracy: 42.30%
15	Validation loss: 

31	Validation loss: 0.078109	Best loss: 0.056953	Accuracy: 98.40%
32	Validation loss: 0.100915	Best loss: 0.056953	Accuracy: 98.36%
33	Validation loss: 0.077558	Best loss: 0.056953	Accuracy: 98.36%
34	Validation loss: 0.100731	Best loss: 0.056953	Accuracy: 98.20%
Early stopping!
[CV]  n_neurons=30, learning_rate=0.02, batch_size=500, activation=<function relu at 0x10998d488>, total=   8.7s
[CV] n_neurons=30, learning_rate=0.02, batch_size=500, activation=<function relu at 0x10998d488> 
0	Validation loss: 0.121159	Best loss: 0.121159	Accuracy: 96.68%
1	Validation loss: 0.093012	Best loss: 0.093012	Accuracy: 97.11%
2	Validation loss: 0.084517	Best loss: 0.084517	Accuracy: 97.50%
3	Validation loss: 0.070080	Best loss: 0.070080	Accuracy: 97.58%
4	Validation loss: 0.073499	Best loss: 0.070080	Accuracy: 97.69%
5	Validation loss: 0.077686	Best loss: 0.070080	Accuracy: 97.93%
6	Validation loss: 0.063263	Best loss: 0.063263	Accuracy: 98.20%
7	Validation loss: 0.066538	Best loss: 0.063263	Accura

18	Validation loss: 4.937276	Best loss: 0.328711	Accuracy: 69.16%
19	Validation loss: 6.343340	Best loss: 0.328711	Accuracy: 60.36%
20	Validation loss: 2.815907	Best loss: 0.328711	Accuracy: 66.54%
21	Validation loss: 61.898945	Best loss: 0.328711	Accuracy: 30.61%
22	Validation loss: 2.002695	Best loss: 0.328711	Accuracy: 74.35%
23	Validation loss: 1.484913	Best loss: 0.328711	Accuracy: 76.43%
24	Validation loss: 1.078764	Best loss: 0.328711	Accuracy: 82.53%
25	Validation loss: 1.479109	Best loss: 0.328711	Accuracy: 85.89%
26	Validation loss: 35.164253	Best loss: 0.328711	Accuracy: 41.87%
27	Validation loss: 15440.137695	Best loss: 0.328711	Accuracy: 18.73%
28	Validation loss: 339.888580	Best loss: 0.328711	Accuracy: 51.13%
29	Validation loss: 317.065582	Best loss: 0.328711	Accuracy: 58.48%
30	Validation loss: 297.735199	Best loss: 0.328711	Accuracy: 56.88%
31	Validation loss: 473.699951	Best loss: 0.328711	Accuracy: 56.45%
32	Validation loss: 245.989059	Best loss: 0.328711	Accuracy: 5

0	Validation loss: 0.109893	Best loss: 0.109893	Accuracy: 96.76%
1	Validation loss: 0.069199	Best loss: 0.069199	Accuracy: 98.12%
2	Validation loss: 0.069521	Best loss: 0.069199	Accuracy: 97.93%
3	Validation loss: 0.052694	Best loss: 0.052694	Accuracy: 98.32%
4	Validation loss: 0.059152	Best loss: 0.052694	Accuracy: 98.20%
5	Validation loss: 0.051254	Best loss: 0.051254	Accuracy: 98.51%
6	Validation loss: 0.055154	Best loss: 0.051254	Accuracy: 98.59%
7	Validation loss: 0.080139	Best loss: 0.051254	Accuracy: 98.05%
8	Validation loss: 0.069264	Best loss: 0.051254	Accuracy: 98.24%
9	Validation loss: 0.055425	Best loss: 0.051254	Accuracy: 98.67%
10	Validation loss: 0.060814	Best loss: 0.051254	Accuracy: 98.51%
11	Validation loss: 0.047053	Best loss: 0.047053	Accuracy: 98.87%
12	Validation loss: 0.065596	Best loss: 0.047053	Accuracy: 98.67%
13	Validation loss: 0.091969	Best loss: 0.047053	Accuracy: 97.93%
14	Validation loss: 0.067328	Best loss: 0.047053	Accuracy: 98.51%
15	Validation loss: 

11	Validation loss: 0.070314	Best loss: 0.043865	Accuracy: 98.32%
12	Validation loss: 0.055357	Best loss: 0.043865	Accuracy: 98.63%
13	Validation loss: 0.042599	Best loss: 0.042599	Accuracy: 99.06%
14	Validation loss: 0.064410	Best loss: 0.042599	Accuracy: 98.75%
15	Validation loss: 0.051728	Best loss: 0.042599	Accuracy: 98.79%
16	Validation loss: 0.053933	Best loss: 0.042599	Accuracy: 98.59%
17	Validation loss: 0.062257	Best loss: 0.042599	Accuracy: 98.63%
18	Validation loss: 0.052563	Best loss: 0.042599	Accuracy: 98.83%
19	Validation loss: 0.070491	Best loss: 0.042599	Accuracy: 98.51%
20	Validation loss: 0.060236	Best loss: 0.042599	Accuracy: 98.83%
21	Validation loss: 0.059361	Best loss: 0.042599	Accuracy: 98.94%
22	Validation loss: 0.076154	Best loss: 0.042599	Accuracy: 98.48%
23	Validation loss: 0.029685	Best loss: 0.029685	Accuracy: 99.14%
24	Validation loss: 0.039523	Best loss: 0.029685	Accuracy: 99.10%
25	Validation loss: 0.051737	Best loss: 0.029685	Accuracy: 98.63%
26	Validat

15	Validation loss: 0.074958	Best loss: 0.055234	Accuracy: 98.51%
16	Validation loss: 0.061288	Best loss: 0.055234	Accuracy: 98.79%
17	Validation loss: 0.078039	Best loss: 0.055234	Accuracy: 98.32%
18	Validation loss: 0.071433	Best loss: 0.055234	Accuracy: 98.48%
19	Validation loss: 0.074246	Best loss: 0.055234	Accuracy: 98.40%
20	Validation loss: 0.068495	Best loss: 0.055234	Accuracy: 98.71%
21	Validation loss: 0.054553	Best loss: 0.054553	Accuracy: 98.59%
22	Validation loss: 0.065967	Best loss: 0.054553	Accuracy: 98.55%
23	Validation loss: 0.062534	Best loss: 0.054553	Accuracy: 98.79%
24	Validation loss: 0.059071	Best loss: 0.054553	Accuracy: 98.67%
25	Validation loss: 0.078951	Best loss: 0.054553	Accuracy: 98.20%
26	Validation loss: 0.070349	Best loss: 0.054553	Accuracy: 98.28%
27	Validation loss: 0.060198	Best loss: 0.054553	Accuracy: 98.75%
28	Validation loss: 0.068352	Best loss: 0.054553	Accuracy: 98.75%
29	Validation loss: 0.069762	Best loss: 0.054553	Accuracy: 98.71%
30	Validat

28	Validation loss: 1.656264	Best loss: 1.608595	Accuracy: 22.01%
29	Validation loss: 1.617747	Best loss: 1.608595	Accuracy: 18.73%
30	Validation loss: 1.641874	Best loss: 1.608595	Accuracy: 19.27%
31	Validation loss: 1.631482	Best loss: 1.608595	Accuracy: 18.73%
32	Validation loss: 1.641183	Best loss: 1.608595	Accuracy: 18.73%
33	Validation loss: 1.647446	Best loss: 1.608595	Accuracy: 19.08%
34	Validation loss: 1.610284	Best loss: 1.608595	Accuracy: 22.01%
35	Validation loss: 1.624692	Best loss: 1.608595	Accuracy: 19.08%
36	Validation loss: 1.630852	Best loss: 1.608595	Accuracy: 18.73%
37	Validation loss: 1.632281	Best loss: 1.608595	Accuracy: 18.73%
38	Validation loss: 1.631021	Best loss: 1.608595	Accuracy: 22.01%
Early stopping!
[CV]  n_neurons=50, learning_rate=0.1, batch_size=10, activation=<function relu at 0x10998d488>, total= 2.3min
[CV] n_neurons=50, learning_rate=0.1, batch_size=10, activation=<function relu at 0x10998d488> 
0	Validation loss: 1.631920	Best loss: 1.631920	Acc

KeyboardInterrupt: 