# Experimental: progressively growing network
This architecture is based on an idea I had about "growing" a network -- one that first learns the low level features and "grows" deeper in hopes to better seperate on the distinguishing features.  This file needs a lot of updating (dataset, augmentation methods - switch to tf), and internal organization, BUT the skeleton is present.

Eventually, I would love to include more ideas and have the network become more "automatic" in that it chooses which layers should be added next -- I'd like to do this by having it evaluate itself after each iteration -- maybe on metrics like the slope of the previous layers loss / performance metrics?

In [1]:
# NOTE: this is a custom cell that contains the common imports I personally 
# use these may/may not be necessary for the following examples

# DL framework
import tensorflow as tf

from datetime import datetime

# common packages
import numpy as np
import os # handling file i/o
import sys
import math
import time # timing epochs

# for ordered dict when building layer components
import collections

# plotting pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib import colors # making colors consistent
from mpl_toolkits.axes_grid1 import make_axes_locatable # colorbar helper

# read image
from imageio import imread
# + data augmentation
from scipy import ndimage
from scipy import misc

# used for manually saving best params
import pickle

# for shuffling data batches
from sklearn.utils import shuffle

# const
SEED = 42

# Helper to make the output consistent
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# helper to create dirs if they don't already exist
def maybe_create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print("{} created".format(dir_path))
    else:
        print("{} already exists".format(dir_path))
    
def make_standard_dirs(saver=True, best_params=True, tf_logs=True):
    # `saver/` will hold tf saver files
    maybe_create_dir("saver")
    # `best_params/` will hold a serialized version of the best params
    # I like to keep this as a backup in case I run into issues with
    # the saver files
    maybe_create_dir("best_params")
    # `tf_logs/` will hold the logs that will be visable in tensorboard
    maybe_create_dir("tf_logs")

    
# set tf log level to supress messages, unless an error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Important Version information
print("Python: {}".format(sys.version_info[:]))
print('TensorFlow: {}'.format(tf.__version__))

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
    
reset_graph()

Python: (3, 5, 4, 'final', 0)
TensorFlow: 1.6.0-dev20180105
No GPU


In [None]:
make_standard_dirs()

### Helpers for saving/loading model params to disk

In [None]:
# these two functions (get_model_params and restore_model_params) are 
# ad[a|o]pted from; 
# https://github.com/ageron/handson-ml/blob/master/11_deep_learning.ipynb
def get_model_params():
    global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    return {global_vars.op.name: value for global_vars, value in 
            zip(global_vars, tf.get_default_session().run(global_vars))}

def restore_model_params(model_params, g, sess):
    gvar_names = list(model_params.keys())
    assign_ops = {gvar_name: g.get_operation_by_name(gvar_name + "/Assign")
                  for gvar_name in gvar_names}
    init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
    feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
    sess.run(assign_ops, feed_dict=feed_dict)

# these two functions are used to manually save the best
# model params to disk
def save_obj(obj, name):
    with open('best_params/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open('best_params/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

## TODO: update - dataset

In [None]:
# 672
SQUARE_DIM = 224
if SQUARE_DIM:
    IMG_WIDTH = SQUARE_DIM
    IMG_HEIGHT = SQUARE_DIM
    
CHANNELS = 3

# utility plotting function
def plot_color_image(image):
    plt.imshow(image.astype(np.uint8),interpolation="nearest")
    plt.title("{}x{}".format(image.shape[1], image.shape[0]))
    plt.axis("off")
    
# load dataset into memory
X_tr = np.load('../numpy/sigmoid/' + str(SQUARE_DIM) + '/X_train.npy')
y_tr = np.load('../numpy/sigmoid/' + str(SQUARE_DIM) + '/y_train.npy')

raw_X_val = np.load('../numpy/sigmoid/' + str(SQUARE_DIM) + '/X_val.npy')
y_val = np.load('../numpy/sigmoid/' + str(SQUARE_DIM) + '/y_val.npy')

raw_X_test = np.load('../numpy/sigmoid/' + str(SQUARE_DIM) + '/X_test.npy')
y_test = np.load('../numpy/sigmoid/' + str(SQUARE_DIM) + '/y_test.npy')

In [None]:
## - vgg stats
# meanR = 123.68
# meanG = 116.779
# meanB = 103.939


# lesion information
R_std = 40.8277369711
G_std = 45.2704252815
B_std = 48.3190131682

meanR = 183.305
meanG = 149.097
meanB = 135.272


# # scale from [0, 1]
# X_tr = np.zeros((raw_X_tr.shape[0], IMG_WIDTH, IMG_HEIGHT, 3), dtype='float32')
# ind = 0
# for img in raw_X_tr:
#     X_tr[ind][:,:,0] = np.divide((img[:,:,0] - meanR), 255)
#     X_tr[ind][:,:,1] = np.divide((img[:,:,1] - meanG), 255)
#     X_tr[ind][:,:,2] = np.divide((img[:,:,2] - meanB), 255)
#     ind += 1
# del raw_X_tr

# LESION NORMALIZATION
# # validation
X_val = np.zeros((raw_X_val.shape[0], IMG_WIDTH, IMG_HEIGHT, 3), dtype='float32')
ind = 0
for img in raw_X_val:
    #VGG ex: X_test[ind][:,:,0] =  raw_X_test[ind][:,:,0] - meanR
    X_val[ind][:,:,0] =  np.divide((raw_X_val[ind][:,:,0] - meanR), R_std)
    X_val[ind][:,:,1] =  np.divide((raw_X_val[ind][:,:,1] - meanG), G_std)
    X_val[ind][:,:,2] =  np.divide((raw_X_val[ind][:,:,2] - meanB), B_std)
    # IF VGG; convert to BGR
#     X_val[ind] = X_val[ind][:,:,::-1]
    ind += 1
del raw_X_val


# # test
X_test = np.zeros((raw_X_test.shape[0], IMG_WIDTH, IMG_HEIGHT, 3), dtype='float32')
ind = 0
for img in raw_X_test:
    X_test[ind][:,:,0] =  np.divide((raw_X_test[ind][:,:,0] - meanR), R_std)
    X_test[ind][:,:,1] =  np.divide((raw_X_test[ind][:,:,1] - meanR), R_std)
    X_test[ind][:,:,2] =  np.divide((raw_X_test[ind][:,:,2] - meanR), R_std)
    # convert to BGR
    # IF VGG; convert to BGR
#     X_test[ind] = X_test[ind][:,:,::-1]
    ind += 1
del raw_X_test

In [None]:
# this function is adapted from https://github.com/ageron/handson-ml/blob/master/13_convolutional_neural_networks.ipynb
def val_augment_image(image, lrf_p=0.0, upf_p=0.0, roll_num=0):
        
    n_image = image

    # Flip the image horizontally with upf_p% probability:
    if np.random.rand() < lrf_p:
        n_image = np.fliplr(n_image)

    # Flip the image vertically with lrf_p% probability:
    if np.random.rand() < upf_p:
        n_image = np.flipud(n_image)
        
    # 50% chance roll image horiz
    if np.random.rand() < 0.5:
        val = int(np.random.rand() * roll_num)
        # 50% left or right
        if np.random.rand() < 0.5:
            n_image = np.roll(n_image, val, axis=1)
        else:
            n_image = np.roll(n_image, -val, axis=1)
        
    # 50% chance roll image vert
    if np.random.rand() < 0.5:
        val = int(np.random.rand() * roll_num)
        # 50% left or right
        if np.random.rand() < 0.5:
            n_image = np.roll(n_image, val, axis=0)
        else:
            n_image = np.roll(n_image, -val, axis=0)

    # ensure float32
    n_image = n_image.astype(np.float32)

    return n_image

In [None]:
# this function is adapted from https://github.com/ageron/handson-ml/blob/master/13_convolutional_neural_networks.ipynb
def augment_image(image, lrf_p=0.0, upf_p=0.0, r_degree=0.0, max_zoom=0.0, roll_num=0):  
    n_image = image
    
    # Flip the image horizontally with upf_p% probability:
    if np.random.rand() < lrf_p:
        n_image = np.fliplr(n_image)

    # Flip the image vertically with lrf_p% probability:
    if np.random.rand() < upf_p:
        n_image = np.flipud(n_image)
        
    # 50% chance roll image horiz
    if np.random.rand() < 0.5:
        val = int(np.random.rand() * roll_num)
        # 50% left or right
        if np.random.rand() < 0.5:
            n_image = np.roll(n_image, val, axis=1)
        else:
            n_image = np.roll(n_image, -val, axis=1)
        
    # 50% chance roll image vert
    if np.random.rand() < 0.5:
        val = int(np.random.rand() * roll_num)
        # 50% left or right
        if np.random.rand() < 0.5:
            n_image = np.roll(n_image, val, axis=0)
        else:
            n_image = np.roll(n_image, -val, axis=0)

    # normalize on the fly
    # this is inefficient, but we need to use this here since the resize converts
    # back from uint8
    n_image = n_image.astype(np.float32)

    #print("pre", n_image[0][0])
#     meanR = 123.68
#     meanG = 116.779
#     meanB = 103.939
    meanR = 183.305
    meanG = 149.097
    meanB = 135.272
    
    R_std = 40.8277369711
    G_std = 45.2704252815
    B_std = 48.3190131682
    
    # if VGG, only subtract mean, don't scale
    n_image[:,:,0] -= meanR
    n_image[:,:,1] -= meanG
    n_image[:,:,2] -= meanB
    
    n_image[:,:,0] /= R_std
    n_image[:,:,1] /= G_std
    n_image[:,:,2] /= B_std

    # IF VGG, convert to BGR
#     n_image = n_image[:,:,::-1]

    if r_degree > 0.0:
        r_degree *= np.random.rand()
        # randomly choose rotation direction
        r_dir = (1 if np.random.rand() > 0.5 else -1)
        n_image = ndimage.interpolation.rotate(n_image, r_dir*r_degree, reshape=False)
    
    return n_image

## TODO: update hyperparms use/organization

In [None]:
batch_size = 16
n_outputs = 1

l2_regularizer = tf.contrib.layers.l2_regularizer(0.01)
he_init = tf.contrib.layers.variance_scaling_initializer()

In [None]:
def __build_kernel_var(NAME=None, SHAPE=None, KERNEL_INIT=None):
    if KERNEL_INIT:
        KERNEL_INIT_METHOD = KERNEL_INIT
    else:
        KERNEL_INIT_METHOD = tf.contrib.layers.xavier_initializer()
    kernel = tf.get_variable(name=str(NAME + "_kernel"),
                             shape=SHAPE,
                             initializer=KERNEL_INIT_METHOD,
                             dtype=tf.float32)
    return kernel

In [None]:
def __build_bias_var(NAME=None, FILTER_NUM=None, BIAS_INIT=None):
    if BIAS_INIT:
        BIAS_INIT_METHOD = BIAS_INIT
    else:
        BIAS_INIT_METHOD = tf.zeros_initializer()
    bias_var = tf.get_variable(name=str(NAME+"_bias"), 
                               shape=[FILTER_NUM], 
                               initializer=BIAS_INIT_METHOD)
    return bias_var

In [None]:
def __build_out(name=None, kernel=None, bias=None, activation_fn=None):
    component = tf.nn.bias_add(kernel, bias)
    if activation_fn:
        # handle special cases of activations, if requested
        print("NOTE: custom activation function requested")
        component_out = activation_fn(component)
    else:
        component_out = tf.nn.selu(component, name=str(name+"_out"))
    return component_out

In [None]:
def _build_conv_layer(X_in=None, name=None, kern_shape=None, stride_len=None):
    block_name = str(name + "_conv2d")
    kernel = __build_kernel_var(NAME=block_name, 
                                SHAPE=kern_shape)
    conv_block = tf.nn.conv2d(input=X_in, filter=kernel,
                              strides=[1, stride_len, stride_len, 1], 
                              padding="SAME", 
                              name=block_name)
    return conv_block

In [None]:
# used to create stacked, split, conv layers
def _build_split_conv_layer(X_in=None, name=None, kern_size=None, 
                            H_first=True, stride_len=None, 
                            filter_num_in=None, filter_num_out=None):
    # :kern_size: size of the kernel (long dimension), single value
    if H_first:
        # [long, 1] --> [1, long]
        block_name = str(name + "_split_HV_" + str(kern_size))
        shape_a = [kern_size, 1, filter_num_in, filter_num_out]
        shape_b = [1, kern_size, filter_num_out, filter_num_out]
        
    else:
        # [1, long] --> [long, 1]
        block_name = str(name + "_split_VH_" + str(kern_size))
        shape_a = [1, kern_size, filter_num_in, filter_num_out]
        shape_b = [kern_size, 1, filter_num_out, filter_num_out]

    # create the two corresponding split kernels
    c_a_name = str(block_name + "_a")
    split_kernel_a = __build_kernel_var(NAME=c_a_name, SHAPE=shape_a)
    c_b_name = str(block_name + "_b")
    split_kernel_b = __build_kernel_var(NAME=c_b_name, SHAPE=shape_b)
    
    # build first block
    # ---- conv
    conv_a = _build_conv_layer(X_in=X_in, name=c_a_name, 
                               kern_shape=shape_a, stride_len=stride_len)
    bias_a = __build_bias_var(NAME=c_a_name, FILTER_NUM=int(filter_num_out))
    conv_a_out = __build_out(kernel=conv_a, bias=bias_a, name=c_a_name)
    
    # build second block
    conv_b = _build_conv_layer(X_in=conv_a_out, name=c_b_name,
                               kern_shape=shape_b, stride_len=stride_len)
    
    return conv_b

In [None]:
# used to create single stacked layers
def _build_nested_conv_layer(X_in=None, name=None, kern_shape=None, stride_len=None, depth=None):
    
    filter_multipier = 1.5
    nested_component_id = ["a","b","c","d","e","f"]
    for i in range(depth):  
        
        block_name = str(name + "_conv2d_nested_" + nested_component_id[i]) 
        if i == 0:
            in_ph = X_in
        else:
            in_ph = endpoint
            # we will want to keep the kernel size the same.
            # the out out filter size could change
            in_filter_num = int(in_ph.get_shape()[-1])
            out_filter_num = int(in_filter_num*filter_multipier)
            cur_conv_size = kern_shape[0]
            kern_shape=[cur_conv_size, cur_conv_size, in_filter_num, out_filter_num]
        
              
        endpoint = _build_conv_layer(X_in=in_ph, name=block_name, 
                                     kern_shape=kern_shape, stride_len=stride_len)
        
        # apply bias and activation if not end
        if i != (depth - 1):
            print("appplying mid bias and activation")
            conv_bias = __build_bias_var(NAME=block_name, FILTER_NUM=int(endpoint.get_shape()[-1]))
            endpoint = __build_out(kernel=endpoint, bias=conv_bias, name=block_name)
    
    return endpoint

In [None]:
def _concatenate_layers_out(name=None, layer_list=None, full_len=None):
    stacked_kernels = tf.concat(layer_list, axis=3)
    bis_var = __build_bias_var(NAME=name, FILTER_NUM=full_len)
    concat_out = __build_out(kernel=stacked_kernels, bias=bis_var, name=name)
    return concat_out

In [None]:
def __build_feature_components_dict(component_list=None):
    # the id is used to produce a unique id for each componet within
    # a layer as well as prevent any variable name reuse
    layer_id = ["a","b","c","d","e","f","g","h","i","j","k","l"]
    ordered_feat_desc = collections.OrderedDict()
    for i, component in enumerate(component_list):
        ordered_feat_desc[str(layer_id[i])] = component
    return ordered_feat_desc

In [None]:
def _build_layer_extraction(prev_endpoint=None, layer_num=None, comp_dict=None, in_filter_num=None, out_filter_num=None, name_str=None):    
    component_list = []
    num_conv = 0
    num_pool = 0
    for comp_key in comp_dict:
        base_name = name_str + "_" + comp_key
        if comp_dict[comp_key]["type"] == "conv":
            c_size = comp_dict[comp_key]["params"]["size"]
            stride_len = comp_dict[comp_key]["params"]["stride"]
            if comp_dict[comp_key]["params"]["shape"] == "square":
                print("square")
                component = _build_conv_layer(X_in=prev_endpoint, name=base_name, 
                                              kern_shape=[c_size,c_size,in_filter_num, out_filter_num], 
                                              stride_len=stride_len)

            elif comp_dict[comp_key]["params"]["shape"] == "split":
                print("split")
                component = _build_split_conv_layer(X_in=prev_endpoint, name=base_name, 
                                                    kern_size=c_size, 
                                                    H_first=True, stride_len=stride_len, 
                                                    filter_num_in=in_filter_num, filter_num_out=out_filter_num)
            num_conv += 1

            
        elif comp_dict[comp_key]["type"] == "avgpool":
            print("avg pool:", comp_dict[comp_key])
            component = tf.nn.avg_pool(value=prev_endpoint, 
                                       ksize=[1,3,3,1], strides=[1,1,1,1], 
                                       padding="SAME", name=str(name_str+"_avg_pool"))
            num_pool += 1
        
        elif comp_dict[comp_key]["type"] == "maxpool":
            print("max pool:", comp_dict[comp_key])
            component = tf.nn.max_pool(value=prev_endpoint, 
                                       ksize=[1,2,2,1], strides=[1,2,2,1], 
                                       padding="SAME", name=str(name_str+"_max_pool"))
            num_pool += 1
            
        elif comp_dict[comp_key]["type"] == "nested":
            c_size = comp_dict[comp_key]["params"]["size"]
            stride_len = comp_dict[comp_key]["params"]["stride"]
            nested_depth = comp_dict[comp_key]["params"]["depth"]
            # TODO: there is an issue here in that we only increase the filter depth
            # during the first iteration.  This likely should be interatively inreased
            # and the filter_out_num will need to be returned and accounted for.  This
            # strengthens the idea that when building the layers, the filter num
            # should be calculated from the previous output and not hard coded.
            component = _build_nested_conv_layer(X_in=prev_endpoint, name=base_name, 
                                     kern_shape=[c_size,c_size,in_filter_num, out_filter_num], 
                                     stride_len=stride_len, depth=nested_depth)
            num_conv += 1
        
        component_list.append(component)
        
    # =================== concatenate output together
    final_len = 0
    final_bias = 0
    for compx in component_list:
        compx_bias = int(np.prod(compx.get_shape()[1:]))
        final_bias += compx_bias
        final_len = int(compx.get_shape()[-1])

    feature_len = int(out_filter_num*(num_conv) + (in_filter_num*num_pool))
    print("final_len: ", final_len)
    print("final_bias: ", final_bias)
    print("feature len: ", feature_len)
    endpoint = _concatenate_layers_out(name=str(name_str+"_concat"), layer_list=component_list, full_len=final_len)

    return endpoint

In [None]:
def build_custom(ARCH_ID, P_LEVEL, FINAL, REG, MODEL_DEFINITION):

    reset_graph()
    g = tf.Graph()
    with g.as_default():

        with tf.name_scope("hyper_params"):
            learning_rate = tf.placeholder(tf.float32, name='learning_rate')

        with tf.name_scope("inputs"):
            # data
            X = tf.placeholder(tf.float32, shape=[None, IMG_HEIGHT, IMG_WIDTH, CHANNELS], name="X")

            # labels
            y_raw = tf.placeholder(tf.int64, shape=[None, n_outputs], name="y_input")
            y_ = tf.cast(y_raw, tf.float32)

            # for training/evaluating
            training = tf.placeholder_with_default(False, shape=[], name="training")

        ######################################## Build layers
        endpoint = None
        l = None
        for i, l in enumerate(MODEL_DEFINITION):
            component_list = MODEL_DEFINITION[l]
            layer_name = "cat_0" + str(i)
            if i == 0:
                in_chan = 3
                filter_count = int(in_chan * 2)
                in_ph = X
            else:
                in_ph = endpoint
                
            if l == "E":
#                 component_list = MODEL_DEFINITION[l]
                OCD = __build_feature_components_dict(component_list)

            elif l == "R":
                # the objective is to decrease the H and W of the current feature map
                # i.e. [224x224xN] --> [112x112xN]
                OCD = __build_feature_components_dict(component_list)
                
            elif l == "C":
                # the objective is to decrease the "length" of the current feature map
                # and so our filter count will be a % smaller than the current value.
                # i.e. [HxWx512] --> [HxWx256]
                # TODO: this value should be parameterized
                filter_count = int(in_chan*0.6)
                OCD = __build_feature_components_dict(component_list)
            
            elif l == "N":
                print("---- nested ------")
                OCD = __build_feature_components_dict(component_list)
                
            endpoint = _build_layer_extraction(prev_endpoint=in_ph, layer_num=i, comp_dict=OCD,
                                               in_filter_num=in_chan, out_filter_num=filter_count,
                                               name_str=layer_name)
            in_chan = int(np.prod(endpoint.get_shape()[-1]))
        
        ######################################################### [Build Layers: END]
                        
            
        # Reshape for fully connected layer
        last_shape = int(np.prod(endpoint.get_shape()[1:]))
        endpoint = tf.reshape(endpoint, shape=[-1, last_shape])
        
        
        ## add dropout regularization after the first 2 layers
        REG = False
        if REG:
            reg_value = 0.33
            print("adding pre-fc regularization: {}".format(reg_value))
            endpoint = tf.layers.dropout(endpoint, reg_value, training=training, name="entry_drop")

            
        with tf.name_scope("output_layer"):
            if FINAL:
                print("last shape:", last_shape)
                logits_ = tf.layers.dense(endpoint, 128, kernel_regularizer=l2_regularizer, 
                                          kernel_initializer=he_init, activation=tf.nn.selu, name="fc1")
                logits_ = tf.layers.dropout(endpoint, 0.2, training=training, name="fc1_drop")
                
                logits_ = tf.layers.dense(logits_, 48, kernel_regularizer=l2_regularizer,
                                          kernel_initializer=he_init, activation=tf.nn.selu, name="fc2")
                
                logits_ = tf.layers.dense(logits_, 16, kernel_regularizer=l2_regularizer,
                                          kernel_initializer=he_init, activation=tf.nn.selu, name="fc3")
                
#                 logits_ = tf.layers.dense(logits_, 24, kernel_regularizer=l2_regularizer,
#                                           kernel_initializer=he_init, activation=tf.nn.selu, name="fc4")
                
#                 logits_ = tf.layers.dense(logits_, 16, kernel_regularizer=l2_regularizer, 
#                                           kernel_initializer=he_init, activation=tf.nn.selu, name="fc5")
                
#                 logits_ = tf.layers.dense(logits_, 8, kernel_regularizer=l2_regularizer, 
#                                           kernel_initializer=he_init, activation=tf.nn.selu, name="fc6")

                logits_ = tf.layers.dense(logits_, n_outputs, name="sigmoid_output")
                preds = tf.sigmoid(logits_, name="preds")
            else:
                logits_ = tf.layers.dense(endpoint, n_outputs, name="sigmoid_output")
                preds = tf.sigmoid(logits_, name="preds")

        # ================================================ cost
        with tf.name_scope("cross_entropy"):
            xentropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits_, labels=y_)
            batch_loss = tf.reduce_mean(xentropy)

        with tf.name_scope("train"):
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                               beta1=0.9,
                                               beta2=0.999,
                                               epsilon=1e-08,
                                               use_locking=False,
                                               name='Adam')
            # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
#             optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
            # training_op = optimizer.minimize(batch_loss, name="training_op")
            
            all_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            scope_str = None
            if FINAL:
                # TODO:
                scope_str = "cat_0[^a]"
                freeze_vars = g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope_str)
#                 freeze_vars = g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
#                 last_bias = "cat_0["+ str(P_LEVEL) +"]_all_bias"
#                 last_bias_var = g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=last_bias)
#                 freeze_vars = [var for var in freeze_vars if var not in last_bias_var]
            else:
                # TODO:
                scope_str = "cat_0[^" + str(P_LEVEL) + "]"
                print("scope_str:", scope_str)
                freeze_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope_str)
#                 prev_bias_str = "cat_0[" + str(P_LEVEL - 1) + "]_all_bias"
#                 prev_bias_var = g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=prev_bias_str)
                # remove previous bias var from variables to freeze
#                 freeze_vars = [var for var in freeze_vars if var not in prev_bias_var]
                
            train_vars = [var for var in all_vars if var not in freeze_vars]

            print("--------------------------------")
            print("frozen: ", freeze_vars)
            print("--------------------------------")
            print("train: ", train_vars)
            print("--------------------------------")
            print("num vars:", len(all_vars), "= frozen(", len(freeze_vars), ")", "+ train(", len(train_vars), ")")
            print("=================================")
            
            training_op = optimizer.minimize(batch_loss, var_list=train_vars, name="training_op")


        # =============================================== metrics
        with tf.name_scope("train_metrics") as scope:    
            # train_y_true_cls = tf.cast(y_, tf.bool)
            train_y_true_cls = tf.greater_equal(y_, 0.5)
            train_y_pred_cls = tf.greater_equal(preds, 0.5)

            correct_prediction = tf.equal(train_y_pred_cls, train_y_true_cls)
            train_batch_acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            
            train_auc, train_auc_update = tf.metrics.auc(labels=y_, predictions=preds)

            train_tf_acc, train_tf_acc_op = tf.metrics.accuracy(labels=train_y_true_cls, predictions=train_y_pred_cls)
            train_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
            train_acc_reset_op = tf.variables_initializer(train_acc_vars, name="train_acc_reset_op")
            
        for node in (y_, preds, train_y_true_cls, train_y_pred_cls, correct_prediction, train_batch_acc):
            g.add_to_collection("label_nodes", node)

        with tf.name_scope("validation_metrics") as scope:    
            val_y_true_cls = tf.greater_equal(y_, 0.5)
            val_y_pred_cls = tf.greater_equal(preds, 0.5)

            val_correct_prediction = tf.equal(val_y_pred_cls, val_y_true_cls)
            val_batch_acc = tf.reduce_mean(tf.cast(val_correct_prediction, tf.float32))
            
            val_auc, val_auc_update = tf.metrics.auc(labels=y_, predictions=preds)

            val_tf_acc, val_tf_acc_op = tf.metrics.accuracy(labels=val_y_true_cls, predictions=val_y_pred_cls)
            val_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
            val_acc_reset_op = tf.variables_initializer(val_acc_vars, name="val_acc_reset_op")

        with tf.name_scope("test_metrics") as scope:    
            test_y_true_cls = tf.greater_equal(y_, 0.5)
            test_y_pred_cls = tf.greater_equal(preds, 0.5)
            
            test_auc, test_auc_update = tf.metrics.auc(labels=y_, predictions=preds)
            
            test_tf_acc, test_tf_acc_op = tf.metrics.accuracy(labels=test_y_true_cls, predictions=test_y_pred_cls)
            test_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
            test_acc_reset_op = tf.variables_initializer(test_acc_vars, name="test_acc_reset_op")

        # =============================================== loss 
        with tf.name_scope("train_loss_eval") as scope:
            train_mean_loss, train_mean_loss_update = tf.metrics.mean(batch_loss)
            train_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
            train_loss_reset_op = tf.variables_initializer(train_loss_vars, name="train_loss_reset_op")

        with tf.name_scope("val_loss_eval") as scope:
            val_mean_loss, val_mean_loss_update = tf.metrics.mean(batch_loss)
            val_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
            val_loss_reset_op = tf.variables_initializer(val_loss_vars, name="val_loss_reset_op")

        with tf.name_scope("test_loss_eval") as scope:
            test_mean_loss, test_mean_loss_update = tf.metrics.mean(batch_loss)
            test_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
            test_loss_reset_op = tf.variables_initializer(test_loss_vars, name="test_loss_rest_op")

        # ================================================ initialize and save
        with tf.name_scope("save_session"):
            init_global = tf.global_variables_initializer()
            init_local = tf.local_variables_initializer()
            saver = tf.train.Saver()


        # ==================================== combine operations
        # ===== epoch, train
        epoch_train_loss_scalar = tf.summary.scalar('train_epoch_loss', train_mean_loss)
#         epoch_train_acc_scalar = tf.summary.scalar('train_epoch_acc', train_tf_acc)
        epoch_train_auc_scalar = tf.summary.scalar('train_epoch_auc', train_auc)
        epoch_train_write_op = tf.summary.merge([epoch_train_loss_scalar, epoch_train_auc_scalar], name="epoch_train_write_op")

        # ===== epoch, validation
        epoch_validation_loss_scalar = tf.summary.scalar('validation_epoch_loss', val_mean_loss)
#         epoch_validation_acc_scalar = tf.summary.scalar('validation_epoch_acc', val_tf_acc)
        epoch_validation_auc_scalar = tf.summary.scalar('validation_epoch_auc', val_auc)
        epoch_validation_write_op = tf.summary.merge([epoch_validation_loss_scalar, epoch_validation_auc_scalar], name="epoch_validation_write_op")

        # ====== batch, train
        train_batch_loss_scalar = tf.summary.scalar('train_batch_loss', batch_loss)
        train_batch_acc_scalar = tf.summary.scalar('train_batch_acc', train_batch_acc)
        train_batch_write_op = tf.summary.merge([train_batch_loss_scalar, train_batch_acc_scalar], name="train_batch_write_op")

        # ====== checkpoint, validation
        checkpoint_validation_loss_scalar = tf.summary.scalar('validation_batch_loss', batch_loss)
        checkpoint_validation_acc_scalar = tf.summary.scalar('validation_batch_acc', val_batch_acc)
        checkpoint_validation_write_op = tf.summary.merge([checkpoint_validation_loss_scalar, checkpoint_validation_acc_scalar], name="checkpoint_valdiation_write_op")
        
        # write operations
        for node in (epoch_train_write_op, epoch_validation_write_op, train_batch_write_op, checkpoint_validation_write_op):
            g.add_to_collection("write_ops", node)
            
        # saver/init
        for node in (init_global, init_local):
            g.add_to_collection("save_init", node)
            
        # acc metrics
        for node in (train_tf_acc, train_tf_acc_op, train_acc_reset_op, 
                     train_auc, train_auc_update,
                     val_tf_acc, val_tf_acc_op, val_acc_reset_op,
                     val_auc, val_auc_update,
                     test_tf_acc, test_tf_acc_op, test_acc_reset_op,
                     test_auc, test_auc_update):
            g.add_to_collection("acc_metrics", node)
        
        # loss metrics
        for node in (train_mean_loss, train_mean_loss_update, train_loss_reset_op, 
                     val_mean_loss, val_mean_loss_update, val_loss_reset_op, 
                     test_mean_loss, test_mean_loss_update, test_loss_reset_op):
            g.add_to_collection("loss_metrics", node)
            
        # test metrics
        for node in (train_y_true_cls, train_y_pred_cls):
            g.add_to_collection("test_metrics", node)
            
        # main operations
        for node in (training_op, X, y_raw, training, learning_rate):
            g.add_to_collection("main_ops", node)

        print("model built.")

    return g, saver

print("ready")

In [None]:
def run_that_model(P_LEVEL, p_g, NUM_EPOCHS, FINAL, LEARNING_RATE, MODEL_DEF_LIST, REG_FLAG=False):
    global X_tr
    global y_tr
    global X_val
    global y_val
    global X_test
    global y_test
    global batch_size
    print("batch_size: ", batch_size)
    
    #####################################
    # ----------- augmentation
    ##### training
    AUG_BOOL = True
    LRF_P = 0.25
    UPF_p = 0.25
    MAX_ZOOM = 0.0
    ROLL_NUM = 20
    if FINAL:
        ROT_DEG = 0.0
    else:
        ROT_DEG = 0.0
    
    ##### validation
    VAL_AUG_BOOL = True
    if FINAL:
        VAL_LRF_P = 0.5
        VAL_UPF_P = 0.5
        VAL_ROLL_NUM = 20
    else:
        VAL_LRF_P = 0.25
        VAL_UPF_P = 0.25
        VAL_ROLL_NUM = 10
    ######################################
    
    
    # --- save best params
    best_model_params = None
    best_loss_val = np.infty

    # ------ for "early-ish" stopping
    # check_interval = 15
    checks_since_last_progress = 0
    # max_checks_without_progress = 20

    # Tensorboard
    now = datetime.now().strftime("%d%b%Y_%H%M%S")
    if FINAL:
        root_logdir = "tf_logs_prog/" + str(P_LEVEL) + "_dense"
    else:
        root_logdir = "tf_logs_prog/" + str(P_LEVEL)
    logdir = "{}/{}/".format(root_logdir, now)
    
    # get previous endpoint
#     target_endpoint = None
#     if p_g:
#         target_str = "conv_0" + str(P_LEVEL-1)
#         target_endpoint = p_g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=target_str)[0]
#         print("previous endpoint: ", target_endpoint)

    g, saver = build_custom(now, P_LEVEL, FINAL, REG=REG_FLAG, MODEL_DEFINITION=MODEL_DEF_LIST)
    
    if P_LEVEL > 1:
        # load all weights/bias that aren't on this level (all previous levels)
        if FINAL:
            # load all conv vars
            # TODO:
            scope_str = "cat_0[^a]"
            reuse_vars = g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope_str)
#             last_bias = "conv_0"+ str(P_LEVEL) +"/bias"
#             last_bias_var = g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=last_bias)
#             load_vars = [var for var in reuse_vars if var not in last_bias_var]
            reuse_vars_dict = dict([(var.op.name, var) for var in reuse_vars])
            print("restored {} vars: {}".format(len(reuse_vars), reuse_vars))
            restore_saver = tf.train.Saver(reuse_vars_dict)
        else:
            scope_str = "cat_0[^" + str(P_LEVEL) + "]"
            print(scope_str)
            reuse_vars = g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope_str)
            reuse_vars_dict = dict([(var.op.name, var) for var in reuse_vars])
            print("restored {} vars: {}".format(len(reuse_vars), reuse_vars))
            restore_saver = tf.train.Saver(reuse_vars_dict)
    
    epoch_train_write_op, epoch_validation_write_op, train_batch_write_op, checkpoint_validation_write_op = g.get_collection("write_ops")
    init_global, init_local = g.get_collection("save_init")
    train_tf_acc, train_tf_acc_op, train_acc_reset_op, train_auc, train_auc_update, val_tf_acc, val_tf_acc_op, val_acc_reset_op, val_auc, val_auc_update, test_tf_acc, test_tf_acc_op, test_acc_reset_op, test_auc, test_auc_update = g.get_collection("acc_metrics")
    train_mean_loss, train_mean_loss_update, train_loss_reset_op, val_mean_loss, val_mean_loss_update, val_loss_reset_op, test_mean_loss, test_mean_loss_update, test_loss_reset_op = g.get_collection("loss_metrics")
    training_op, X, y_raw, training, learning_rate = g.get_collection("main_ops")
    #prepared_image_op, input_image = g.get_collection("preprocess")
    step = 0
    with tf.Session(graph=g) as sess:
        init_global.run()
        init_local.run()
        sess.run(val_acc_reset_op)
        sess.run(val_loss_reset_op)
        sess.run(train_acc_reset_op)
        sess.run(train_loss_reset_op)

        # tensorboard writer
        #batch_train_writer = tf.summary.FileWriter(os.path.join(logdir, "cnn/batch/train/"))
        #batch_val_writer = tf.summary.FileWriter(os.path.join(logdir, "cnn/batch/val/"))
        epoch_train_writer = tf.summary.FileWriter(os.path.join(logdir, "cnn/epoch/train/"))
        epoch_val_writer = tf.summary.FileWriter(os.path.join(logdir, "cnn/epoch/val/"))
        epoch_train_writer.add_graph(sess.graph)

        low_i = 0
        high_i = 0
        for epoch in range(NUM_EPOCHS):
            step += 1
            EPOCH_START_TIME = time.time()

            # Shuffle the training set (data and label)
            X_tr, y_tr = shuffle(X_tr, y_tr, random_state=42)

            mini_step = 0
            for iteration in range(len(X_tr) // batch_size):
                mini_step += 1

                low_i = iteration*batch_size
                high_i = (iteration+1)*batch_size
                X_batch_raw = X_tr[low_i:high_i]     
                y_batch = y_tr[low_i:high_i]
#                 print(mini_step, ": ", "[",low_i,",", high_i,"]","of",len(X_batch_raw))

                # augment batch
                # TODO: look into factoring class size
                X_batch_aug = np.zeros((X_batch_raw.shape[0], IMG_WIDTH, IMG_HEIGHT, 3), dtype='float32')
                ind = 0
                for img in X_batch_raw:
                    if AUG_BOOL:
#                         X_batch_aug[ind] = sess.run(prepared_image_op, feed_dict={input_image: img})
                        X_batch_aug[ind] = augment_image(np.copy(img), lrf_p=LRF_P, 
                                                         upf_p=UPF_p, r_degree=ROT_DEG, 
                                                         roll_num=ROLL_NUM)
                    else:
                        X_batch_aug[ind] = np.copy(img)
                    ind += 1

                # shuffle batch after augmentation
                #X_tr, y_tr = shuffle(X_batch_aug, y_batch, random_state=42)


                feed_train = {X: X_batch_aug, 
                              y_raw: y_batch, 
                              training: True,
                              learning_rate: LEARNING_RATE}

                # training operation (back prop+)
                sess.run(training_op, feed_dict=feed_train)
                #step += 1

                # update training metrics, acc and loss, (evaluation on current batch)
                sess.run([train_tf_acc_op, train_auc_update, train_mean_loss_update], feed_dict={X: X_batch_aug, 
                                                                                                 y_raw: y_batch,
                                                                                                 training: False})

                # ================================================================================[validation]
                if mini_step % 8 == 0:
                         # ----- validation
                    # run validation 3x w/ (lesser) augmentation
                    for i in range(3):
#                         X_val, y_val = shuffle(X_val, y_val, random_state=42)
                        lower_i = 0
                        higher_i = 0
                        for vitter in range(len(X_val) // batch_size):
                            lower_i = vitter*batch_size
                            higher_i = (vitter+1)*batch_size
                            X_val_batch = X_val[lower_i:higher_i]
                            y_val_batch = y_val[lower_i:higher_i]

                            # augment
                            X_val_batch_aug = np.zeros((X_val_batch.shape[0], IMG_WIDTH, IMG_HEIGHT, 3), dtype='float32')
                            ind = 0
                            for img in X_val_batch:
                                if VAL_AUG_BOOL:
                                    # no normalization happens here
                                    X_val_batch_aug[ind] = val_augment_image(np.copy(img), 
                                                                             lrf_p=VAL_LRF_P, 
                                                                             upf_p=VAL_UPF_P, 
                                                                             roll_num=VAL_ROLL_NUM)
                                else:
                                    X_val_batch_aug[ind] = np.copy(img)
                                ind += 1

                            sess.run([val_tf_acc_op, val_auc_update, val_mean_loss_update], 
                                     feed_dict={X: X_val_batch_aug, y_raw: y_val_batch, training: False})

                        X_val_batch = X_val[higher_i:]
                        y_val_batch = y_val[higher_i:]
                        # not augmenting final validation batch *(which will be different each round
                        # becuase of the shuffle)
                        if len(X_batch_raw) > 0:
                            sess.run([val_tf_acc_op, val_auc_update, val_mean_loss_update], 
                                     feed_dict={X: X_val_batch, y_raw: y_val_batch, training: False})
            # ===============================================================================[validation end]
            
            # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Train on remaining batch, if it exists 
            # metrics aren't calculated since the batch size could be really small
            X_batch_raw = X_tr[high_i:]
            y_batch = y_tr[high_i:]

            if len(X_batch_raw) > 0:
#                 print(len(X_batch_raw))
                X_batch_augx = np.zeros((X_batch_raw.shape[0], IMG_WIDTH, IMG_HEIGHT, 3), dtype='float32')
                ind = 0
                for img in X_batch_raw:
                    if AUG_BOOL:
#                         X_batch_aug[ind] = sess.run(prepared_image_op, feed_dict={input_image: img})
                        X_batch_augx[ind] = augment_image(np.copy(img), lrf_p=LRF_P, 
                                                          upf_p=UPF_p, r_degree=ROT_DEG, 
                                                          roll_num=ROLL_NUM)
                    else:
                        X_batch_augx[ind] = np.copy(img)
                    ind += 1

                feed_train = {X: X_batch_augx, 
                              y_raw: y_batch, 
                              training: True,
                              learning_rate: LEARNING_RATE}

                sess.run(training_op, feed_dict=feed_train)
                #step += 1
            # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

            # ========================================================= Epoch metrics (log and print)

            # ----- training
            epoch_acc_print, epoch_auc_print, epoch_loss_print = sess.run([train_tf_acc, train_auc, train_mean_loss])

            epoch_train_summary = sess.run(epoch_train_write_op)
            epoch_train_writer.add_summary(epoch_train_summary, step)
            epoch_train_writer.flush()

            epoch_val_acc_print, epoch_val_auc_print, epoch_val_loss_print = sess.run([val_tf_acc, val_auc, val_mean_loss])

            epoch_val_summary = sess.run(epoch_validation_write_op)
            epoch_val_writer.add_summary(epoch_val_summary, step)
            epoch_val_writer.flush()

            print("E:{}tr: auc {:.2f}%|acc {:.2f}%|{:.4f} || val: auc {:.2f}%|acc {:.2f}%|{:.4f}".format(epoch,
                                                                                             epoch_auc_print*100,              
                                                                                             epoch_acc_print*100, 
                                                                                             epoch_loss_print,
                                                                                             epoch_val_auc_print*100,
                                                                                             epoch_val_acc_print*100, 
                                                                                             epoch_val_loss_print))
            #plot_color_image(X_batch_aug[0])
            #plt.imshow(X_batch_aug[0].astype(np.uint8),interpolation="nearest")
            #plt.title("{}x{}".format(image.shape[1], image.shape[0]))
#             show_output(sess, 9, X_batch_aug, 3, "RGB")
#             plt.axis("off")
            # ==========================================================================================

            # save 'best' model (lowest epoch avg for validation loss)
            if epoch_val_loss_print < best_loss_val:
                best_loss_val = epoch_val_loss_print
                checks_since_last_progress = 0
                best_model_params = get_model_params()
                print("Best params saved| val loss: {:.4f}".format(best_loss_val))
                
            else:
                checks_since_last_progress += 1
            
            if checks_since_last_progress >= 150:
                print("Early Stopping")
                break

            # estimate time remaining
            EPOCH_END_TIME = time.time()
            EPOCH_ELAPSED_TIME = EPOCH_END_TIME - EPOCH_START_TIME
            EST_TIME_REMAINING = (((EPOCH_ELAPSED_TIME) * (NUM_EPOCHS - epoch)) / 60)
            print("E time {:.1f} secs, Est.Time Remain: {:.1f} mins".format(EPOCH_ELAPSED_TIME, EST_TIME_REMAINING))
            print("-----------------------------")

            sess.run(val_acc_reset_op)
            sess.run(val_loss_reset_op)
            sess.run(train_acc_reset_op)
            sess.run(train_loss_reset_op)

        # done with training writers
        #batch_train_writer.close()
        #batch_val_writer.close()
        epoch_train_writer.close()
        epoch_val_writer.close()

        # save session so we can re-enter at a later time
        if FINAL:
            saver_path = "./saver/pinn_" + str(SQUARE_DIM) + "_" + "FINAL"
        else:
            saver_path = "./saver/pinn_" + str(SQUARE_DIM) + "_" + str(P_LEVEL)
        save_path = saver.save(sess, saver_path)
        return g, best_model_params, saver

print("ready")

## Build Layer by Layer
### Layer 0

In [None]:
MASTER_list = collections.OrderedDict()

In [None]:
# zero out params
g = None
bmp = None
saver = None
sess = None

NUM_epochs = 1
#custom_model_def = collections.Ord
new_layer = {}
component_list = []
component_list.append({"type": "nested", "params":{"shape":"square", "size":3, "stride":1, "depth":2}})
#component_list.append({"type": "avgpool"})
new_layer["N"] = component_list

# run graph
START_TIME = time.time()

# pass previous graph (g) and create new graph
P_LEVEL = 0
print("P_LEVEL: ", P_LEVEL)

# if bmp:
# #         restore_model_params(bmp, g, sess)
#     saver_path = "./saver/pinn_" + str(SQUARE_DIM) + "_" + str(P_LEVEL)
#     sess = tf.Session(graph=g)
#     saver.restore(sess, saver_path)
#     restore_model_params(bmp, g, sess)

g, bmp, saver = run_that_model(P_LEVEL, g, NUM_EPOCHS=NUM_epochs, FINAL=False, LEARNING_RATE=1e-7, MODEL_DEF_LIST=new_layer, REG_FLAG=False)

END_TIME = time.time()
complete = True
print("DONE")

In [None]:
NUM_epochs = 40
#custom_model_def = collections.Ord
new_layer = {}
component_list = []
component_list.append({"type": "conv", "params":{"shape":"square", "size":1, "stride":1}})
component_list.append({"type": "conv", "params":{"shape":"square", "size":3, "stride":1}})
component_list.append({"type": "conv", "params":{"shape":"split", "size":5, "stride":1}})
component_list.append({"type": "conv", "params":{"shape":"split", "size":7, "stride":1}})
component_list.append({"type": "avgpool"})
new_layer["R"] = component_list


custom_model_def = MASTER_list
custom_model_def.update(new_layer)
# run graph
START_TIME = time.time()

# pass previous graph (g) and create new graph
P_LEVEL = 0
print("P_LEVEL: ", P_LEVEL)

if bmp:
#         restore_model_params(bmp, g, sess)
    saver_path = "./saver/pinn_" + str(SQUARE_DIM) + "_" + str(P_LEVEL - 1)
    sess = tf.Session(graph=g)
    saver.restore(sess, saver_path)
    restore_model_params(bmp, g, sess)

g, bmp, saver = run_that_model(P_LEVEL, g, NUM_EPOCHS=NUM_epochs, FINAL=False, LEARNING_RATE=1e-7, MODEL_DEF_LIST=custom_model_def, REG_FLAG=False)

END_TIME = time.time()
complete = True
print("DONE")

In [None]:
if complete:
    MASTER_list.update(custom_model_def)
    complete = False
    P_LEVEL += 1
else:
    print("I think it was already added")
print("current list:", MASTER_list)

In [None]:
NUM_epochs = 40
new_layer = {}

component_list = []
component_list.append({"type": "conv", "params":{"shape":"square", "size":1, "stride":1}})
new_layer["C"] = component_list

custom_model_def = MASTER_list
custom_model_def.update(new_layer)
# run graph
START_TIME = time.time()

print("P_LEVEL: ", P_LEVEL)

if bmp:
    saver_path = "./saver/pinn_" + str(SQUARE_DIM) + "_" + str(P_LEVEL - 1)
    sess = tf.Session(graph=g)
    saver.restore(sess, saver_path)
    restore_model_params(bmp, g, sess)

g, bmp, saver = run_that_model(P_LEVEL, g, NUM_EPOCHS=NUM_epochs, FINAL=False, LEARNING_RATE=1e-7, MODEL_DEF_LIST=custom_model_def, REG_FLAG=False)

END_TIME = time.time()
complete = True
print("DONE")

In [None]:
if complete:
    MASTER_list.update(custom_model_def)
    complete = False
    P_LEVEL += 1
else:
    print("I think it was already added")
print("current list:", MASTER_list)

In [None]:
NUM_epochs = 1

new_layer = {}
component_list = []
component_list.append({"type": "conv", "params":{"shape":"square", "size":3, "stride":2}})
component_list.append({"type": "maxpool"})
new_layer["R"] = component_list

custom_model_def = MASTER_list
custom_model_def.update(new_layer)
# run graph
START_TIME = time.time()

print("P_LEVEL: ", P_LEVEL)

if bmp:
    saver_path = "./saver/pinn_" + str(SQUARE_DIM) + "_" + str(P_LEVEL - 1)
    sess = tf.Session(graph=g)
    saver.restore(sess, saver_path)
    restore_model_params(bmp, g, sess)

g, bmp, saver = run_that_model(P_LEVEL, g, NUM_EPOCHS=NUM_epochs, FINAL=False, LEARNING_RATE=1e-7, MODEL_DEF_LIST=custom_model_def, REG_FLAG=False)

END_TIME = time.time()
complete = True
print("DONE")

In [None]:
if complete:
    MASTER_list.update(custom_model_def)
    complete = False
    P_LEVEL += 1
else:
    print("I think it was already added")
print("current list:", MASTER_list)

In [None]:
# add fc classifier
# LAST_LEVEL = NUM_LAYERS
# print(LAST_LEVEL)
# if bmp:
#     # restore params from final model
#     saver_path = "./saver/pinn_" + str(SQUARE_DIM) + "_" + str(LAST_LEVEL)
#     sess = tf.Session(graph=g)
#     saver.restore(sess, saver_path)
#     restore_model_params(bmp, g, sess)

# g_f, bmp_f, saver_f = run_that_model(LAST_LEVEL, g, NUM_EPOCHS, True, LEARNING_RATE=0.0001)
# END_TIME = time.time()

## Evaluate

In [None]:
saver_path = "./saver/pinn_" + str(SQUARE_DIM) + "_" + str(P_LEVEL - 1)
sess = tf.Session(graph=g)
saver.restore(sess, saver_path)

training_op, X, y_raw, training, learning_rate = g.get_collection("main_ops")
train_tf_acc, train_tf_acc_op, train_acc_reset_op, train_auc, train_auc_update, val_tf_acc, val_tf_acc_op, val_acc_reset_op, val_auc, val_auc_update, test_tf_acc, test_tf_acc_op, test_acc_reset_op, test_auc, test_auc_update = g.get_collection("acc_metrics")
train_y_true_cls, train_y_pred_cls = g.get_collection("test_metrics")
train_mean_loss, train_mean_loss_update, train_loss_reset_op, val_mean_loss, val_mean_loss_update, val_loss_reset_op, test_mean_loss, test_mean_loss_update, test_loss_reset_op = g.get_collection("loss_metrics")

num_classes = 2

# todo: there is a better way to implement the confusion matrix
# see here; https://stackoverflow.com/questions/41617463/tensorflow-confusion-matrix-in-tensorboard#42857070

with sess:
    confusion_mat = tf.Variable( tf.zeros([num_classes,num_classes], dtype=tf.int32 ), name='confusion')
    sess.run(tf.global_variables_initializer())

    # restore to the best parameters to calculate test metrics
    if bmp:
        restore_model_params(bmp, g, sess)

    sess.run(test_acc_reset_op)
    sess.run(test_loss_reset_op)
    
    for iteration in range(len(X_test) // batch_size):
        low_i = iteration*batch_size
        high_i = (iteration+1)*batch_size
        X_test_batch = X_test[low_i:high_i]
        y_test_batch = y_test[low_i:high_i]
        sess.run([test_tf_acc_op, test_mean_loss_update, test_auc_update], 
                 feed_dict={X: X_test_batch, y_raw: y_test_batch, training: False})
        jj = train_y_true_cls.eval(feed_dict={y_raw: y_test_batch})
        batch_conf_matrix = tf.confusion_matrix(labels = train_y_true_cls.eval(feed_dict={y_raw: y_test_batch}).reshape(-1),
                                                predictions = train_y_pred_cls.eval(feed_dict={X: X_test_batch}).reshape(-1),
                                                num_classes=num_classes)
        sess.run(confusion_mat.assign(confusion_mat.eval() + batch_conf_matrix.eval()))
    
    X_test_batch = X_test[high_i:]
    y_test_batch = y_test[high_i:]
    if len(X_test_batch) > 0:
        sess.run([test_tf_acc_op, test_mean_loss_update, test_auc_update], 
                 feed_dict={X: X_test_batch, y_raw: y_test_batch, training: False})
        batch_conf_matrix = tf.confusion_matrix(labels = train_y_true_cls.eval(feed_dict={y_raw: y_test_batch}).reshape(-1),
                                                predictions = train_y_pred_cls.eval(feed_dict={X: X_test_batch}).reshape(-1),
                                                num_classes=num_classes)
        sess.run(confusion_mat.assign(confusion_mat.eval() + batch_conf_matrix.eval()))
    final_test_acc, final_test_loss, final_test_auc = sess.run([test_tf_acc, test_mean_loss, test_auc])

    print(">>>>>>>>>> test auc: {:.3f}% acc: {:.2f} loss: {:.5f}".format(final_test_auc*100, 
                                                                         final_test_acc*100,
                                                                         final_test_loss))

    print(confusion_mat.eval())

sess.close()