In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 

import tensorflow as tf
import pickle

from functions.build_constant_network import build_network
from functions.multiple_bar_plot import multiple_bar_plot

import copy

PATH = '/home/claudia/Dokumente/Uni/lab_rotation_TU/compare_AER_AEUR'

### Initialize AER

In [2]:
AER_file = PATH + '/network_params/AER_finetuned_params_complete.p'
AER_params = pickle.load(open(AER_file, 'rb'))

W_AER = AER_params['weights_incl_softmax']
B_AER = AER_params['bias_incl_softmax']

x_AER, y_AER, weights_AER, bias_AER, activations_AER, measures_AER = \
build_network(W_AER, B_AER) 

sess = tf.InteractiveSession()
tf.global_variables_initializer().run()


### Load Datasets

In [3]:
DATA = pickle.load(open(PATH +'/datasets/rotated_mnist_test.p', 'rb'))
mnist = DATA['mnist']
mnist_30 = DATA['mnist_30']
mnist_60 = DATA['mnist_60']
mnist_90 = DATA['mnist_90']

DIGIT_IDX = pickle.load(open(PATH +'/datasets/mnist_test_digit_indices.p', 'rb'))
LABELS = pickle.load(open(PATH +'/datasets/mnist_test_lables.p', 'rb'))

### Digitwise Accuracy of Manipulated Networks

In [5]:
layer_number = 4
layer = activations_AER['h_'+ str(layer_number)]
print('DATASET \t AER \t\t AER_manip1 \t AER_manip2 \t AER_manip3 \t AER_control')

for i in range(10): 
    
    # select a digit and define the corresponding rotated& unrotated datasets
    digit = i
    unrot = mnist[DIGIT_IDX['only_'+str(digit)]]
    rot_90 = mnist_90[DIGIT_IDX['only_'+ str(digit)]]
    labels = LABELS[DIGIT_IDX['only_'+ str(digit)]]

    # perform classification for the rotated and unrotated dataset & 
    # save layer 4 activations
    y_0 = sess.run(layer, feed_dict={x_AER: unrot.T, y_AER: labels.T})
    y_90 = sess.run(layer, feed_dict={x_AER: rot_90.T, y_AER: labels.T})

    # z_x holds the indices of the maximally active neuron for each image
    # zz_x holds the number of times each neuron was the maximally active
    z_0 = np.argmax(y_0, axis = 0)
    zz_0 = np.bincount(z_0)
    z_90 = np.argmax(y_90, axis = 0)
    zz_90 = np.bincount(z_90) 
    
    # unrot_and_digit conatins the indices of all neurons that were maximally active
    # during the classification of the unrotated datset, 
    # rot_and_digit conatins the indices of all neurons that were maximally active
    # during the classification of the rotated datset, 
    unrot_and_digit = np.flatnonzero(zz_0)
    rot_and_digit = np.flatnonzero(zz_90)

    # digit_neurons contains the indices of all neurons that were maximally active 
    # during both classification tasks
    # rot_neurons contains the indices of all neurons that were maximally active 
    # during the classification of rotated data but not of unrotated
    # unrot_neurons contains the indices of all neurons that were maximally active 
    # during the classification of unrotated data but not of rotated
    digit_neurons = list(set(rot_and_digit).intersection(set(unrot_and_digit)))
    rot_neurons = list(set(rot_and_digit) - set(digit_neurons))
    unrot_neurons = list(set(unrot_and_digit) - set(digit_neurons))

    N = np.max([len(digit_neurons), len(rot_neurons), len(unrot_neurons)])
    control_neurons = np.random.randint(0,128, N)
    
    ##########################################################################################
    # CREATE MANIPULATED NETWORKS
    ##########################################################################################

    # in the manip1-weights all conections from layer4 neurons with the inidces from the set 
    # digit_neurons to the softmax layer are set to 0
    W_AER_manip1 = copy.deepcopy(W_AER)
    W_AER_manip1[layer_number][:, digit_neurons] = 0
    
    x_AER_manip1, y_AER_manip1, weights_AER_manip1, bias_AER_manip1, activations_AER_manip1,\
    measures_AER_manip1 = build_network(W_AER_manip1, B_AER)

    # in the manip2-weights all conections from layer4 neurons with the inidces from the set 
    # rot_neurons to the softmax layer are set to 0
    W_AER_manip2 = copy.deepcopy(W_AER)
    W_AER_manip2[layer_number][:, rot_neurons] = 0
    
    x_AER_manip2, y_AER_manip2, weights_AER_manip2, bias_AER_manip2, activations_AER_manip2,\
    measures_AER_manip2 = build_network(W_AER_manip2, B_AER) 
    
    # in the manip3-weights all conections from layer4 neurons with the inidces from the set 
    # unrot_neurons to the softmax layer are set to 0
    W_AER_manip3 = copy.deepcopy(W_AER)
    W_AER_manip3[layer_number][:, unrot_neurons] = 0
    
    x_AER_manip3, y_AER_manip3, weights_AER_manip3, bias_AER_manip3, activations_AER_manip3,\
    measures_AER_manip3 = build_network(W_AER_manip3, B_AER) 

    # in the control-weights N connections from layer 4 to softmax are chosen at random and 
    # set to 0
    W_AER_control = copy.deepcopy(W_AER)
    W_AER_control[layer_number][:, control_neurons] = 0

    x_AER_control, y_AER_control, weights_AER_control, bias_AER_control, activations_AER_control,\
    measures_AER_control = build_network(W_AER_control, B_AER) 

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()
    
    ##########################################################################################
    # CALCULATE ACCURACIES FOR ALL NETWORKS AND THE ROTATED AND UNROTATED DATASETS 
    ##########################################################################################
    
    acc_AER_0 = sess.run(measures_AER['accuracy'], feed_dict={x_AER: unrot.T, y_AER: labels.T})
    acc_AER_90 = sess.run(measures_AER['accuracy'], feed_dict={x_AER: rot_90.T, y_AER: labels.T})

    acc_AER_manip1_0 = sess.run(measures_AER_manip1['accuracy'], feed_dict={x_AER_manip1: unrot.T, y_AER_manip1: labels.T})
    acc_AER_manip1_90 = sess.run(measures_AER_manip1['accuracy'], feed_dict={x_AER_manip1: rot_90.T, y_AER_manip1: labels.T})

    acc_AER_manip2_0 = sess.run(measures_AER_manip2['accuracy'], feed_dict={x_AER_manip2: unrot.T, y_AER_manip2: labels.T})
    acc_AER_manip2_90 = sess.run(measures_AER_manip2['accuracy'], feed_dict={x_AER_manip2: rot_90.T, y_AER_manip2: labels.T})

    acc_AER_manip3_0 = sess.run(measures_AER_manip3['accuracy'], feed_dict={x_AER_manip3: unrot.T, y_AER_manip3: labels.T})
    acc_AER_manip3_90 = sess.run(measures_AER_manip3['accuracy'], feed_dict={x_AER_manip3: rot_90.T, y_AER_manip3: labels.T})
    
    acc_AER_control_0 = sess.run(measures_AER_control['accuracy'], feed_dict={x_AER_control: unrot.T, y_AER_control: labels.T})
    acc_AER_control_90 = sess.run(measures_AER_control['accuracy'], feed_dict={x_AER_control: rot_90.T, y_AER_control: labels.T})

    print(str(digit)+'_unrot\t\t', acc_AER_0, '\t', acc_AER_manip1_0, '\t', acc_AER_manip2_0, '\t', acc_AER_manip3_0, '\t', acc_AER_control_0)
    print(str(digit)+'_rot_90\t', acc_AER_90, '\t', acc_AER_manip1_90, '\t', acc_AER_manip2_90, '\t', acc_AER_manip3_90, '\t', acc_AER_control_90)
    print('')



DATASET 	 AER 		 AER_manip1 	 AER_manip2 	 AER_manip3 	 AER_control
0_unrot		 0.967347 	 0.894898 	 0.931633 	 0.972449 	 0.895918
0_rot_90	 0.745918 	 0.631633 	 0.590816 	 0.761225 	 0.658163

1_unrot		 0.977974 	 0.954185 	 0.962996 	 0.961233 	 0.980617
1_rot_90	 0.990308 	 0.947137 	 0.942731 	 0.971806 	 0.981498

2_unrot		 0.778101 	 0.104651 	 0.734496 	 0.750969 	 0.351744
2_rot_90	 0.502907 	 0.0387597 	 0.467054 	 0.45155 	 0.151163

3_unrot		 0.780198 	 0.487129 	 0.69505 	 0.805941 	 0.841584
3_rot_90	 0.19703 	 0.0207921 	 0.167327 	 0.239604 	 0.229703

4_unrot		 0.762729 	 0.422607 	 0.822811 	 0.163951 	 0.911405
4_rot_90	 0.342159 	 0.0600815 	 0.39613 	 0.120163 	 0.594705

5_unrot		 0.780269 	 0.463004 	 0.477578 	 0.688341 	 0.673767
5_rot_90	 0.263453 	 0.0257848 	 0.139013 	 0.190583 	 0.284753

6_unrot		 0.847599 	 0.507307 	 0.877871 	 0.818372 	 0.140919
6_rot_90	 0.51357 	 0.0542797 	 0.629436 	 0.496868 	 0.0678497

7_unrot		 0.810311 	 0.551556 	 0.736381 	

### Discussion:
The four columns of the above table show the classification results for 4 types of networks: 
- AER: Unchanged AER network as in previous notebooks (Autoencoder- weights + retrained softmax).
- AER_manip1: weights connecting layer4 neurons that showed maximal activity during the classification of both rotated and unrotated dataset to the softmax are set to 0.
- AER_manip2: weights connecting layer4 neurons that showed maximal activity during the classification of the rotated dataset (but not during the classifictaion of the unrotated dataset) to the softmax are set to 0.
- AER_manip3: weights connecting layer4 neurons that showed maximal activity during the classification of the unrotated dataset (but not during the classifictaion of the rotated dataset) to the softmax are set to 0.
- AER_control: The same number of weights as in AER_manip1 are chosen at random and set to 0.
    
    
The neurons chosen for the manipx conditions seem to have the desired effect because switching them off (i.e. setting the relevant weights to 0) decreases the accuracy stronger than turning off an equivalent number of neurons at random. 

Not surprisingly the manip1 condition seems to reduce accuracy the most. The differences between manip2 and manip3 are more subtle and one has to take into account that for the rotated dataset the accuracy is generally lower. 

The case of digit 1 might be interesting because here we have close to perfect accuracy for both the roatated and the unrotated case: 

|DATASET| 	 AER| 		 AER_manip1| 	 AER_manip2| 	 AER_manip3| 	 AER_control|
|---|---|---|---|---|---|
|1_unrot|		 0.977974| 	 0.954185| 	     0.962996| 	     0.961233| 	     0.980617|
|1_rot_90|	 0.990308| 	 0.947137| 	     0.942731| 	     0.971806| 	     0.981498|

when we remove the neurons that are only active for the rotated set (manip2) accuracy for the roatated data decreases stronger than for the unrotated. 
On the other hand if we remove the neurons that are only active in the unrotated case we see the opposite picture. 