In [1]:
import tensorflow as tf
import numpy as np
from tfUtils import *
import matplotlib.pyplot as plt
import innvestigate

Using TensorFlow backend.


In [2]:
from keras.layers import Input, Activation, Dense, Flatten, ZeroPadding2D, Conv2D, BatchNormalization, MaxPooling2D
from keras.models import Model

In [3]:
def generate_data(m, n_x, seed = 9):
    np.random.seed(seed)
    X = np.random.randn(m, n_x)
    Y = np.zeros((m, 2))
    dist = np.random.randint(low = 0, high = n_x-1, size = m)
    for i in range(m):
        t = np.random.rand();
        if t < 0.5:
            X[i, dist[i]] = -20
            Y[i, 0] = 1
        elif t >= 0.5:
            X[i, dist[i]] = 20
            Y[i, 1] = 1
    return X, Y, dist

In [4]:
def initialize_parameters(shape, name, which):
    if which == "weights":
        return tf.get_variable(name, shape, 
                               initializer = tf.contrib.layers.xavier_initializer())
    elif which == "bias":
        return tf.get_variable(name, shape, 
                              initializer = tf.zeros_initializer())
    return

In [5]:
def create_placeholders(n_h, n_w, n_c, n_y):
    X = tf.placeholder(tf.float32, [None, n_h, n_w, n_c])
    Y = tf.placeholder(tf.float32, [None, n_y])
    return X, Y

In [6]:
def flatten(layer):
    size = layer.get_shape()
    num_features = size[1] * size[2] * size[3]
    flattened_layer = tf.reshape(layer, [-1, num_features])
    return flattened_layer, num_features

In [7]:
def fully_connected(layer, input_features, output_features, activation, indicator):
    weights = initialize_parameters([input_features, output_features], "W" + str(indicator), "weights")
    bias = initialize_parameters([1, output_features], "b" + str(indicator), "bias")
    Z = tf.matmul(layer, weights) + bias
    if activation == "relu":
        return tf.nn.relu(Z), weights, bias
    elif activation == "sigmoid":
        return tf.nn.sigmoid(Z), weights, bias
    elif activation == "tanh":
        return tf.nn.tanh(Z), weights, bias
    elif activation == "linear":
        return Z, weights, bias

In [8]:
def conv_2D(layer, filter_size, output_channels, stride, padding, activation, use_maxpool, indicator):
    input_channels = layer.get_shape()[3]
    weights = initialize_parameters([filter_size, filter_size, input_channels, output_channels], "W" + str(indicator), 
                                   "weights")
    bias = initialize_parameters([output_channels], "b" + str(indicator), "bias")
    Z = tf.nn.conv2d(layer, weights, strides = [1, stride, stride, 1], padding = padding) + bias
    if activation == "relu":
        A = tf.nn.relu(Z)
    elif activation == "sigmoid":
        A = tf.nn.sigmoid(Z)
    elif activation == "tanh":
        A = tf.nn.tanh(Z)
    elif activation == "linear":
        A = Z
    if use_maxpool:
        return tf.nn.max_pool(A, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'VALID'), weights, bias
    else:
        return A, weights, bias

In [9]:
def compute_cost(Z, Y):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = Z, labels = Y))
    return cost

In [10]:
X_train, Y_train, dist = generate_data(50000, 144)
X_train = X_train.reshape(-1, 12, 12, 1)

In [11]:
def model(X_train, Y_train, layers, filter_size, stride, padding, activation,
          epochs, batch_size, starting_rate, decay, use_maxpool):
    tf.reset_default_graph()
    input_shape = X_train.shape
    output_shape = Y_train.shape
    X, Y = create_placeholders(input_shape[1], input_shape[2], input_shape[3], output_shape[1])
    parameters = {}
    A = X
    already_flat = 0
    for i in range(1, len(layers) + 1):
        if layers[i - 1][0] == 'conv_2D':
            A, parameters['W' + str(i)], parameters['b' + str(i)] = conv_2D(
                A, filter_size, layers[i - 1][1], stride, padding, activation[i - 1], use_maxpool, indicator = i)
        elif layers[i - 1][0] == 'fc':
            if already_flat == 0:
                A, input_layers = flatten(A)
                already_flat = 1
            else: input_layers = A.get_shape()[1]
            A, parameters['W' + str(i)], parameters['b' + str(i)] = fully_connected(
                A, input_layers, layers[i - 1][1], activation[i - 1], indicator = i)
    cost = compute_cost(A, Y)
    hard_A = tf.argmax(tf.nn.softmax(A, axis = 1), axis = 1)
    acc = tf.reduce_mean(tf.cast(tf.equal(hard_A, tf.argmax(Y, axis = 1)), tf.float32))
    global_steps = tf.Variable(0, trainable = False)
    learning_rate = tf.train.exponential_decay(starting_rate, global_steps, 5000, decay, staircase = True)
    train = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost, global_step = global_steps)
    init = tf.global_variables_initializer()
    cost_list = []
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(epochs):
            minibatches = random_minibatches(X_train, Y_train, batch_size, seed = epoch)
            epoch_cost = 0
            for minibatch in minibatches:
                sess.run(train, feed_dict = {X : minibatch[0], Y : minibatch[1]})
                epoch_cost += sess.run(cost, feed_dict = {X : minibatch[0], Y : minibatch[1]}) / len(minibatches)
            cost_list.append(epoch_cost)        
            if epoch % 5 == 0:
                print(epoch_cost)
        parameter_names = []
        parameter_list = []
        for i in range(1, len(parameters) // 2 + 1):
            parameter_names.append("W" + str(i))
            parameter_names.append("b" + str(i))
        for j in range(len(parameter_names)):
            parameter_list.append(sess.run(parameters[parameter_names[j]]))
        print("accuracy", sess.run(acc, feed_dict = {X : X_train, Y : Y_train}))
    sess.close()
    plt.plot(np.array(cost_list), '-b')
    plt.show()
    return parameter_list

In [None]:
parameter_list_relu = model(X_train, Y_train, layers = [("conv_2D", 32), ("conv_2D", 64), ("fc", 512), ("fc", 1024), ("fc", 2)], 
                           filter_size = 3, stride = 1, padding = 'VALID', 
                            activation = ["relu", "relu", "relu", "relu", "linear"], epochs = 100, batch_size = 32, 
                            starting_rate = 3e-4, decay = .9, use_maxpool = True)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

0.2180096482785368
0.20378436575156922


In [None]:
parameter_list_tanh = model(X_train, Y_train, layers = [("conv_2D", 32), ("conv_2D", 64), ("fc", 512), ("fc", 1024), ("fc", 2)], 
                           filter_size = 3, stride = 1, padding = 'VALID', 
                            activation = ["tanh", "tanh", "tanh", "tanh", "linear"], epochs = 60, batch_size = 32, 
                            starting_rate = 5e-4, decay = .85, use_maxpool = True)

In [None]:
parameter_list_sigmoid = model(X_train, Y_train, layers = [("conv_2D", 32), ("conv_2D", 64), ("fc", 512), ("fc", 1024), ("fc", 2)], 
                           filter_size = 3, stride = 1, padding = 'VALID', 
                            activation = ["sigmoid", "sigmoid", "sigmoid", "sigmoid", "linear"], epochs = 100, batch_size = 16, 
                            starting_rate = 1e-4, decay = .9, use_maxpool = True)

In [None]:
def create_model(input_shape, activations, use_soft):
    X_input = Input(input_shape);
    X = Conv2D(32, (3,3), strides = (1,1), padding = "valid", name = "Z_1")(X_input);
    X = Activation(activations[0])(X);
    X = MaxPooling2D(pool_size = (2,2), strides = (2,2), padding = "valid", name = "max_pool_1")(X);
    X = Conv2D(64, (3,3), strides = (1,1), padding = "valid", name = "Z_2")(X);
    X = Activation(activations[1])(X);
    X = MaxPooling2D(pool_size = (2,2), strides = (2,2), padding = "valid", name = "max_pool_2")(X);
    X = Flatten()(X);
    X = Dense(512, activation = activations[2], name = "A_3")(X)
    X = Dense(1024, activation = activations[3], name = "A_4")(X)
    if use_soft:
        X = Dense(2, activation = "softmax", name = "A_5")(X)
    else:
        X = Dense(2, activation = "linear", name = "Z_5")(X)
    model = Model(inputs = X_input, outputs = X)
    return model

In [None]:
np.save('Model Weights and relevant parameters/CNNs/relu_model_weights.npy', parameter_list_relu)
np.save('Model Weights and relevant parameters/CNNs/sigmoid_model_weights.npy', parameter_list_sigmoid)
np.save('Model Weights and relevant parameters/CNNs/tanh_model_weights.npy', parameter_list_tanh)

In [None]:
parameter_list_relu = np.load('Model Weights and relevant parameters/CNNs/relu_model_weights.npy')
parameter_list_sigmoid = np.load('Model Weights and relevant parameters/CNNs/sigmoid_model_weights.npy')
parameter_list_tanh = np.load('Model Weights and relevant parameters/CNNs/tanh_model_weights.npy')

In [None]:
modelp_relu = create_model(X_train.shape[1:], ["relu", "relu", "relu", "relu"], True)
model_relu = create_model(X_train.shape[1:], ["relu", "relu", "relu", "relu"], False)
modelp_tanh = create_model(X_train.shape[1:], ["tanh", "tanh", "tanh", "tanh"], True)
model_tanh = create_model(X_train.shape[1:], ["tanh", "tanh", "tanh", "tanh"], False)
modelp_sigmoid = create_model(X_train.shape[1:], ["sigmoid", "sigmoid", "sigmoid", "sigmoid"], True)
model_sigmoid = create_model(X_train.shape[1:], ["sigmoid", "sigmoid", "sigmoid", "sigmoid"], False)

In [None]:
modelp_relu.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
modelp_tanh.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
modelp_sigmoid.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
for i in range(len(parameter_list_relu)):
    if i % 2 == 1:
        parameter_list_relu[i] = parameter_list_relu[i].reshape(-1)
        parameter_list_tanh[i] = parameter_list_tanh[i].reshape(-1)
        parameter_list_sigmoid[i] = parameter_list_sigmoid[i].reshape(-1)

In [None]:
modelp_relu.set_weights(parameter_list_relu)
model_relu.set_weights(parameter_list_relu)
modelp_relu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
modelp_relu.evaluate(x = X_train, y = Y_train)

In [None]:
modelp_tanh.set_weights(parameter_list_tanh)
model_tanh.set_weights(parameter_list_tanh)
modelp_tanh.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
modelp_tanh.evaluate(x = X_train, y = Y_train)

In [None]:
modelp_sigmoid.set_weights(parameter_list_sigmoid)
model_sigmoid.set_weights(parameter_list_sigmoid)
modelp_sigmoid.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
modelp_sigmoid.evaluate(x = X_train, y = Y_train)

In [None]:
analyzer_relu = innvestigate.create_analyzer("lrp.alpha_2_beta_1_IB", model_relu)
analyzer_sigmoid = innvestigate.create_analyzer("lrp.alpha_2_beta_1", model_sigmoid)
analyzer_tanh = innvestigate.create_analyzer("lrp.alpha_2_beta_1", model_tanh)

In [None]:
analysis_relu = analyzer_relu.analyze(X_train[:, :])
analysis_sigmoid = analyzer_sigmoid.analyze(X_train[:, :])
analysis_tanh = analyzer_tanh.analyze(X_train[:, :])

In [None]:
rel_me = np.zeros((len(dist), 144))
for i in range(len(dist)):
    rel_me[i, dist[i]] = 1

In [None]:
rel_me = np.reshape(rel_me, (-1, 12, 12)) 

In [None]:
plt.close()
for i in range(100):
    f, axarr = plt.subplots(1, 4, figsize=(12, 12))
    
    m_relu = np.max(np.abs(analysis_relu[i,:,:,0]))
    a_relu = ((analysis_relu[i,:,:,0] / m_relu) + 1) / 2
    
    m_sigmoid = np.max(np.abs(analysis_sigmoid[i,:,:,0]))
    a_sigmoid = ((analysis_sigmoid[i,:,:,0] / m_sigmoid) + 1) / 2
    
    m_tanh = np.max(np.abs(analysis_tanh[i,:,:,0]))
    a_tanh = ((analysis_tanh[i,:,:,0] / m_tanh) + 1) / 2
    
    fig = axarr[0].imshow(a_relu, vmax = 1, vmin = 0, cmap = "jet")
    fig = axarr[1].imshow(a_sigmoid, vmax = 1, vmin = 0, cmap = "jet")
    fig = axarr[2].imshow(a_tanh, vmax = 1, vmin = 0, cmap = "jet")
    fig = axarr[3].imshow(rel_me[i,:,:], cmap = "binary")
plt.show()