# ResNet-Competition

__Author:Marcus P

In [5]:
import numpy as np;
from keras.utils import np_utils
from keras.callbacks import EarlyStopping
from keras.datasets import mnist
from keras.layers import Dense, Flatten,Input
from keras.models import Model
from keras.layers.merge import add
from keras.utils import np_utils
from keras.callbacks import EarlyStopping
import tensorflow as tf;


def build_res_net(n_layers=3,activation_fun=tf.nn.swish,max_number_of_weights=10**4, w=28,h=28, n_out=10):
    #(w*h * size +  n_layers *size^2 + size*n_out) < max_number_of_weights
    #((w*h + n_out) * size/n_layers +  size^2 -max_number_of_weights/n_layers  = 0;
    pdiv2 =  (w*h + n_out)/(2*n_layers);
    res_net_size = int(np.floor( -pdiv2 + np.sqrt(pdiv2**2 + max_number_of_weights/n_layers)));
    
    # The model
    input_tensor = Input(shape=(1,w,h))
    x = Flatten()(input_tensor)
    x = Dense(res_net_size, activation=activation_fun)(x)
    layer_sizes = []; 
    layer_sizes.append(w*h);
    for i in range(n_layers):
        layer_sizes.append(res_net_size);

        x = res_block(res_net_size,activation_fun)(x)
    x = Dense(units=n_out,activation='softmax')(x)
    layer_sizes.append(n_out);

    model = Model(inputs=input_tensor, outputs=x)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model,layer_sizes,'RES_NET';
def build_mlp(n_layers=3,activation_fun=tf.nn.swish,max_number_of_weights=10**4, w=28,h=28, n_out=10):
    # exp choose sizes
    # choose appropiate  slope
    # e^(-(k*x-s)) + m = input_size, if x = 0
    # e^(-(k*x-s)) + m = output_size, if x = n_layers +1;
    # ---- > input_size - e^(s) = m --> e^(-(k*(n_layers +1)-s)) + input_size - e^(s) = output_size
    # ----> e^(s)(1/e^(k*(n_layers +1))  - 1) = output_size - input_size
    # ---> s = log((output_size - input_size)/(1/e^(k*(n_layers +1))  - 1))  
    num_weights = 100000000;
    # a stupid way to get the number of weights for each layer
    k = 0.5; #could be a bad initial guess
    k_step = 0.01;
    n_in = w*h;
    visited_k  = set();
    layer_sizes = [];
    while True:
        s = np.log((n_out - n_in)/(1/np.exp(k*(n_layers +1))  - 1));
        m = n_in - np.exp(s);
    
        layer_sizes = [];
        for i in range(n_layers+2):
            layer_sizes.append(int(round(np.exp(-(k*i -s))+ m)));
        num_weights = 0;
        for i in range(len(layer_sizes) -1):
            num_weights += layer_sizes[i+1]*layer_sizes[i];
   
        if(num_weights > max_number_of_weights):
            k += k_step;
        else:
            visited_k.add(k);
            k -= k_step;
        if k in visited_k:
            break;
    
    # The model
    input_tensor = Input(shape=(1,w,h))
    x = Flatten()(input_tensor)
    for i in range(len(layer_sizes)-1):
        x = Dense(layer_sizes[i], activation=activation_fun)(x)
    x = Dense(units=n_out,activation='softmax')(x)
    model = Model(inputs=input_tensor, outputs=x)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model,layer_sizes,'MLP';
def res_block(n_output,activation_fun):

    def f(x):
        
        # H_l(x):
        # first pre-activation
        h =Dense(n_output, activation=activation_fun)(x)
        return add([x, h])
    return f



# Game rules
max_number_of_weights=10**4

#Some conf.
n_layers_interv = [3,6,9,12];


#load data 
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')

train_x =X_train / 255
test_x =X_test / 255
train_y = np_utils.to_categorical(y_train)
test_y = np_utils.to_categorical(y_test)


# training
for n_layers in n_layers_interv:
    models = [];
    models.append(build_mlp(n_layers=n_layers,max_number_of_weights=max_number_of_weights));
    models.append(build_res_net(n_layers=n_layers));
    
    for i in range(len(models)):
        model = models[i][0];
        max_epochs = 300
        
        #Store training stats
        early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto', baseline=None, restore_best_weights=True)
        
        
        history = model.fit(train_x,train_y, epochs=max_epochs,
                           validation_split=0.2, verbose=0,shuffle=False,
                           callbacks=[early_stop],batch_size=100) 
        
        
        [loss, acc] = model.evaluate(test_x, test_y, verbose=0)
        
        print("Name: "+ models[i][2] + " Conf: " +  str(models[i][1]) + " test acc "  + str(acc) +" Stopped at: " + str(len(history.history['val_loss'])) );


Name: MLP Conf: [784, 13, 10, 10, 10] test acc 0.9726 Stopped at: 11
Name: RES_NET Conf: [784, 12, 12, 12, 10] test acc 0.9483 Stopped at: 21
Name: MLP Conf: [784, 12, 10, 10, 10, 10, 10, 10] test acc 0.9752 Stopped at: 16
Name: RES_NET Conf: [784, 11, 11, 11, 11, 11, 11, 10] test acc 0.9436 Stopped at: 21
Name: MLP Conf: [784, 12, 10, 10, 10, 10, 10, 10, 10, 10, 10] test acc 0.9725 Stopped at: 22
Name: RES_NET Conf: [784, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10] test acc 0.9477 Stopped at: 20
Name: MLP Conf: [784, 12, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10] test acc 0.9712 Stopped at: 26
Name: RES_NET Conf: [784, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10] test acc 0.9364 Stopped at: 23
