In [1]:
""" Repeating MNIST first experiment of swish paper. """

import gc # Garbage collector
import logging
import numpy as np
import matplotlib.pyplot as plt

# Record settings
LOG_FORMAT = "%(levelname)s %(asctime)s - %(message)s"
logging.basicConfig(filename="swish_first_exp_log.txt",format = LOG_FORMAT, level = logging.DEBUG, filemode = "a")
logs = logging.getLogger()

%matplotlib notebook

np.random.seed(2)

from sklearn.model_selection import train_test_split

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

# For adding new activation function
from keras import backend as K
from keras.datasets import mnist
from keras.utils.generic_utils import get_custom_objects
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
print("X_train original shape", X_train.shape)
print("y_train original shape", Y_train.shape)

X_train original shape (60000, 28, 28)
y_train original shape (60000,)


In [3]:
# Normalization
nb_classes = 10
# Normalize the data
X_train = X_train / 255.0
X_test = X_test / 255.0

In [4]:
# Encode labels to one hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)

In [5]:
# Set the random seed
random_seed = 2

In [6]:
# Split the train and the validation set for the fitting
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=random_seed)

In [7]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
X_train = X_train.reshape(-1,784)
X_val = X_val.reshape(-1,784)
X_test = X_test.reshape(-1,784)
# test = test.values.reshape(-1,28,28,1)
print(X_train.shape, X_val.shape, X_test.shape)

(54000, 784) (6000, 784) (10000, 784)


In [8]:
def swish(x):
    return x*K.sigmoid(x)

def e_swish_2(x):
    sigmoid = K.sigmoid(x)
    return K.maximum(x*sigmoid, x*(2-sigmoid))

In [9]:
# Set the CNN model 
# my CNN architechture is In -> [[Conv2D->relu]*2 -> MaxPool2D -> Dropout]*2 -> Flatten -> Dense -> Dropout -> Out
def create(act, n):
    model = Sequential()
    # First conv block
    model.add(Dense(512, input_shape=(784,)))
    model.add(Activation(act))
    for i in range(n-1):
        if i%2 == 0:
            model.add(BatchNormalization())
        model.add(Dense(512))
        model.add(Activation(act))
        model.add(Dropout(0.3))
        
    model.add(Dense(nb_classes))
    model.add(Activation("softmax"))
    
    return model

In [10]:
def plot(arr, names = ["relu", "e_swish_2", "swish"]):
    fig, ax = plt.subplots()
    for item in arr:
        ax.plot(np.array(item))
    
    plt.grid()
    plt.legend(names[:len(arr)], loc='upper right')
    ax.set_ylim(0,1)
    plt.show()

In [11]:
def accuracy(y_pred, y_test):
    y_hat = np.argmax(y_pred, axis=1)
    y = np.argmax(y_test, axis=1)

    good = np.sum(np.equal(y, y_hat))
    return float(good/len(y_test))

In [None]:
act = "relu"

logs_relu = []
record_relu = []
for n in range(23,42,3):
    ensembler = 0
    logger = [n]
    print("\n \n Starting round with {0} layers".format(n))
    for i in range(3):
        # Garbage collector
        gc.collect()
        # Set optimizer
        opt = SGD(lr=0.01, momentum=0.9)
        # Set callbacks (learning rate reducer and early stopping)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.35, min_lr=0.00001)
        early_stop = EarlyStopping(monitor='val_acc', patience=5, verbose = 1)
        # Common params 
        epochs = 15
        batch_size = 128
        # Create and compile the model
        model = create(act, n)
        # Compile the model
        model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
        # Train the model
        history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
                            verbose = 1 , callbacks=[learning_rate_reduction, early_stop])
        
        # Record accuracy of each model and save it
        logger.append(model.evaluate(X_test, Y_test)[1])
        # Calculate probabilities of test data and sum them toghether
        ensembler += model.predict_proba(X_test)
        # Clear session (GPU MEMORY)
        K.get_session().close()
        K.set_session(K.tf.Session())
        del model, history, learning_rate_reduction, early_stop, opt
     
    # Calculate the median accuracy
    ensembled = accuracy(ensembler, Y_test)
    print(ensembled)
    # Save the ensembled accuracy and the three models accuracy
    record_relu.append([n, ensembled])
    logs_relu.append(logger)
    del ensembler, ensembled
    
print("\n \n \n")
print("Logs Relu: ", logs_relu)
print("Record Relu: ", record_relu)

    
plot([[x[1] for x in logs_relu]])


 
 Starting round with 23 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.9717

 
 Starting round with 26 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samp

Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.8639

 
 Starting round with 32 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15


Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.6807

 
 Starting round with 35 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.5834

 
 Starting round with 38 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch

In [None]:
act = "relu"
logs_relu = []
record_relu = [[23, 0.9717], [26, 0.9253], [29, 0.8639], [32, 0.6807], [35, 0.5834]]
logs.info("Relu 38-42")

for n in range(38,42,3):
    ensembler = 0
    logger = [n]
    print("\n \n Starting round with {0} layers".format(n))
    logs.info("\n \n Starting round with {0} layers".format(n))
    for i in range(3):
        # Garbage collector
        gc.collect()
        # Set optimizer
        opt = SGD(lr=0.01, momentum=0.9)
        # Set callbacks (learning rate reducer and early stopping)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.35, min_lr=0.00001)
        early_stop = EarlyStopping(monitor='val_acc', patience=5, verbose = 1)
        # Common params 
        epochs = 15
        batch_size = 128
        # Create and compile the model
        model = create(act, n)
        # Compile the model
        model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
        # Train the model
        history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
                            verbose = 1 , callbacks=[learning_rate_reduction, early_stop])
        
        # Record accuracy of each model and save it
        logger.append(model.evaluate(X_test, Y_test)[1])
        logs.info("Accuracy "+str(i)+": "+str(logger[-1]))
        # Calculate probabilities of test data and sum them toghether
        ensembler += model.predict_proba(X_test)
        # Clear session (GPU MEMORY)
        K.get_session().close()
        K.set_session(K.tf.Session())
        del model, history, learning_rate_reduction, early_stop, opt
     
    # Calculate the median accuracy
    ensembled = accuracy(ensembler, Y_test)
    print(ensembled)
    # Save the ensembled accuracy and the three models accuracy
    record_relu.append([n, ensembled])
    logs_relu.append(logger)
    del ensembler, ensembled
    logs.info("Ensembled accuracy: "+str(record_relu[-1]))
    logs.info("Logs: "+str(logs_relu[-1]))
    
logs.info("\n \n \n")
logs.info("Logs e_swish_2: "+str(logs_relu))
logs.info("Record e_swish_2: "+str(record_relu))
print("\n \n \n")
print("Logs Relu: ", logs_relu)
print("Record Relu: ", record_relu)

    
plot([[x[1] for x in logs_relu]])


 
 Starting round with 38 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 00006: reducing learning rate to 0.0034999999217689036.
Epoch 7/15
Epoch 8/15
Epoch 00008: reducing learning rate to 0.0012249999563209713.
Epoch 00008: early stopping
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: reducing learning rate to 0.0034999999217689036.
Epoch 5/15
Epoch 6/15
Epoch 00006: reducing learning rate to 0.0012249999563209713.
Epoch 00006: early stopping
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: reducing learning rate to 0.0034999999217689036.
Epoch 5/15
Epoch 6/15
Epoch 00006: reducing learning rate to 0.0012249999563209713.
Epoch 00006: early stopping
0.1135

 
 Starting round with 41 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/1

Epoch 5/15
Epoch 6/15
Epoch 00006: reducing learning rate to 0.0012249999563209713.
Epoch 00006: early stopping
0.1572

 
 

Logs Relu:  [[23, 0.9717], [26, 0.9253], [29, 0.8639], [32, 0.6807], [35, 0.5834], [38, 0.1135, 0.1135, 0.1135], [41, 0.15679999999999999, 0.1135, 0.11360000000000001]]
Record Relu:  [[38, 0.1135], [41, 0.1572]]


<IPython.core.display.Javascript object>

In [None]:
act = e_swish_2
logs.info("\n \n \n"+"\n \n \n")
logs.info("ESWISH 23-42")

logs_e_swish_2 = []
record_e_swish_2 = []
for n in range(23,42,3):
    ensembler = 0
    logger = [n]
    logs.info("\n \n Starting round with {0} layers".format(n))
    print("\n \n Starting round with {0} layers".format(n))
    for i in range(3):
        # Garbage collector
        gc.collect()
        # Set optimizer
        opt = SGD(lr=0.01, momentum=0.9)
        # Set callbacks (learning rate reducer and early stopping)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.35, min_lr=0.00001)
        early_stop = EarlyStopping(monitor='val_acc', patience=5, verbose = 1)
        # Common params 
        epochs = 15
        batch_size = 128
        # Create and compile the model
        model = create(act, n)
        # Compile the model
        model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
        # Train the model
        history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
                            verbose = 1 , callbacks=[learning_rate_reduction, early_stop])
        
        # Record accuracy of each model and save it
        logger.append(model.evaluate(X_test, Y_test)[1])
        logs.info("Accuracy "+str(i)+": "+str(logger[-1]))
        # Calculate probabilities of test data and sum them toghether
        ensembler += model.predict_proba(X_test)
        # Clear session (GPU MEMORY)
        K.get_session().close()
        K.set_session(K.tf.Session())
        del model, history, learning_rate_reduction, early_stop, opt
     
    # Calculate the median accuracy
    ensembled = accuracy(ensembler, Y_test)
    print(ensembled)
    # Save the ensembled accuracy and the three models accuracy
    record_e_swish_2.append([n, ensembled])
    logs_e_swish_2.append(logger)
    del ensembler, ensembled
    logs.info("Ensembled accuracy: "+str(record_e_swish_2[-1]))
    logs.info("Logs: +"+str(logs_e_swish_2[-1]))
    
logs.info("\n \n \n")
logs.info("Logs e_swish_2: "+str(logs_e_swish_2))
logs.info("Record e_swish_2: "+str(record_e_swish_2))
    
print("\n \n \n")
print("Logs e_swish_2: ", logs_e_swish_2)
print("Record e_swish_2: ", record_e_swish_2)
    
plot([[x[1] for x in logs_relu], [x[1] for x in logs_e_swish_2]])


 
 Starting round with 23 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 00014: reducing learning rate to 0.0034999999217689036.
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.9753

 
 Starting round with 26 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 0

Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.9487

 
 Starting round with 32 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
E

Epoch 14/15
Epoch 15/15
0.9047

 
 Starting round with 38 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 00009: reducing learning rate to 0.0034999999217689036.
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.7936

 
 Starting round with 41 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15


In [12]:
act = e_swish_2
# logs.info("\n \n \n"+"\n \n \n")
# logs.info("ESWISH 41-42")

logs_e_swish_2 = []
record_e_swish_2 = []
for n in range(41,45,3):
    ensembler = 0
    logger = [n]
    logs.info("\n \n Starting round with {0} layers".format(n))
    print("\n \n Starting round with {0} layers".format(n))
    for i in range(3):
        # Garbage collector
        gc.collect()
        # Set optimizer
        opt = SGD(lr=0.01, momentum=0.9)
        # Set callbacks (learning rate reducer and early stopping)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.35, min_lr=0.00001)
        early_stop = EarlyStopping(monitor='val_acc', patience=5, verbose = 1)
        # Common params 
        epochs = 15
        batch_size = 128
        # Create and compile the model
        model = create(act, n)
        # Compile the model
        model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
        # Train the model
        history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
                            verbose = 1 , callbacks=[learning_rate_reduction, early_stop])
        
        # Record accuracy of each model and save it
        logger.append(model.evaluate(X_test, Y_test)[1])
        logs.info("Accuracy "+str(i)+": "+str(logger[-1]))
        # Calculate probabilities of test data and sum them toghether
        ensembler += model.predict_proba(X_test)
        # Clear session (GPU MEMORY)
        K.get_session().close()
        K.set_session(K.tf.Session())
        del model, history, learning_rate_reduction, early_stop, opt
     
    # Calculate the median accuracy
    ensembled = accuracy(ensembler, Y_test)
    print(ensembled)
    # Save the ensembled accuracy and the three models accuracy
    record_e_swish_2.append([n, ensembled])
    logs_e_swish_2.append(logger)
    del ensembler, ensembled
    logs.info("Ensembled accuracy: "+str(record_e_swish_2[-1]))
    logs.info("Logs: +"+str(logs_e_swish_2[-1]))
    
# logs.info("\n \n \n")
# logs.info("Logs e_swish_2: "+str(logs_e_swish_2))
# logs.info("Record e_swish_2: "+str(record_e_swish_2))
    
# print("\n \n \n")
# print("Logs e_swish_2: ", logs_e_swish_2)
# print("Record e_swish_2: ", record_e_swish_2)


 
 Starting round with 41 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.679

 
 Starting round with 44 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15


Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Train on 54000 samples, validate on 6000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 00014: reducing learning rate to 0.0034999999217689036.
Epoch 15/15
0.6189

 
 Starting round with 47 layers
Train on 54000 samples, validate on 6000 samples
Epoch 1/15


KeyboardInterrupt: 

In [13]:
logs.info("\n \n \n")
logs.info("Logs e_swish_2: "+str(logs_e_swish_2))
logs.info("Record e_swish_2: "+str(record_e_swish_2))
    
print("\n \n \n")
print("Logs e_swish_2: ", logs_e_swish_2)
print("Record e_swish_2: ", record_e_swish_2)


 
 

Logs e_swish_2:  [[41, 0.6018, 0.55249999999999999, 0.48520000000000002], [44, 0.35160000000000002, 0.51910000000000001, 0.3891]]
Record e_swish_2:  [[41, 0.679], [44, 0.6189]]


In [None]:
act = swish
logs.info("\n \n \n"+"\n \n \n")
logs.info("SWISH 23-42")

logs_swish = []
record_swish = []
for n in range(23,42,3):
    ensembler = 0
    logger = [n]
    logs.info("\n \n Starting round with {0} layers".format(n))
    print("\n \n Starting round with {0} layers".format(n))
    for i in range(3):
        # Garbage collector
        gc.collect()
        # Set optimizer
        opt = SGD(lr=0.01, momentum=0.9)
        # Set callbacks (learning rate reducer and early stopping)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.35, min_lr=0.00001)
        early_stop = EarlyStopping(monitor='val_acc', patience=5, verbose = 1)
        # Common params 
        epochs = 15
        batch_size = 128
        # Create and compile the model
        model = create(act, n)
        # Compile the model
        model.compile(optimizer = opt , loss = "categorical_crossentropy", metrics=["accuracy"])
        # Train the model
        history = model.fit(X_train,Y_train, epochs = epochs, validation_data = (X_val,Y_val),
                            verbose = 1 , callbacks=[learning_rate_reduction, early_stop])
        
        # Record accuracy of each model and save it
        logger.append(model.evaluate(X_test, Y_test)[1])
        logs.info("Accuracy "+str(i)+": "+str(logger[-1]))
        # Calculate probabilities of test data and sum them toghether
        ensembler += model.predict_proba(X_test)
        # Clear session (GPU MEMORY)
        K.get_session().close()
        K.set_session(K.tf.Session())
        del model, history, learning_rate_reduction, early_stop, opt
     
    # Calculate the median accuracy
    ensembled = accuracy(ensembler, Y_test)
    print(ensembled)
    # Save the ensembled accuracy and the three models accuracy
    record_swish.append([n, ensembled])
    logs_swish.append(logger)
    del ensembler, ensembled
    logs.info("Ensembled accuracy: "+str(record_swish[-1]))
    logs.info("Logs: +"+str(logs_swish[-1]))
    
logs.info("\n \n \n")
logs.info("Logs swish: "+str(logs_swish))
logs.info("Record swish: "+str(record_swish))
    
print("\n \n \n")
print("Logs swish: ", logs_swish)
print("Record swish: ", record_swish)

In [None]:
print("RELU: ", logs_relu)
print("E-SWISH: ", logs_e_swish_2)
print("SWISH: ", logs_swish)