In [29]:
#%%time
#To hide warnings export PYTHONWARNINGS="ignore"
#Imports{

import os
from os.path import dirname
from os.path import join
os.environ['KERAS_BACKEND'] = 'theano'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Cha


import sys
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
    
import pickle
import numpy as np

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd

from scipy.io import loadmat
from scipy import stats
#from scipy.interpolate import spline
from sklearn.cluster import MiniBatchKMeans
from scipy.stats import percentileofscore

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from sklearn.cluster import KMeans

import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, MaxPooling2D, Input, ZeroPadding2D,merge,Lambda
from keras.layers.convolutional import Convolution2D
from keras.models import Model
from keras.utils.layer_utils import convert_all_kernels_in_model
from keras.optimizers import SGD
from keras import backend as K
from keras.engine import Layer
from keras.layers.core import Lambda
from keras.utils.vis_utils import plot_model
from keras.layers.core import  Lambda
from keras.regularizers import l2
import cv2
#}

In [30]:
#Code snippet needed to read activation values from each layer of the pre-trained artificial neural networks
def get_activations(model, layer, X_batch):
    #keras.backend.function(inputs, outputs, updates=None)
    get_activations = K.function([model.layers[0].input, K.learning_phase()], [model.layers[layer].output,])
    #The learning phase flag is a bool tensor (0 = test, 1 = train)
    activations = get_activations([X_batch,0])
    return activations

In [31]:
def preprocess_image(image_paths, image_height=224, image_width=224,color_mode='rgb'):
    """resize images to the appropriate dimensions
    :param image_width:
    :param image_height:
    :param image: image
    :return: image
    """
    img_list = []
    
    for im_path in image_paths:
        image = cv2.imread(im_path)
        image = cv2.resize(image, (image_height, image_width))
        
        image = image.astype('float32')
        image[:, :, 0] -= 123.68
        image[:, :, 1] -= 116.779
        image[:, :, 2] -= 103.939
        if color_mode == 'bgr':
            image = image.transpose((2, 0, 1))
        
        img_list.append(image)
        
    try:
        img_batch = np.stack(img_list, axis=0)
    except:
        print im_path
        raise ValueError('when img_size and crop_size are None, images'
                ' in image_paths must have the same shapes.')
    return img_batch

In [32]:
#Helper function to normalization across channels
K.set_image_dim_ordering('th')
def crosschannelnormalization(alpha=1e-4, k=2, beta=0.75, n=5, **kwargs):
    """
    This is the function used for cross channel normalization in the original
    Alexnet
    """
    def f(X):
        if K.image_dim_ordering()=='tf':
            b, r, c, ch = X.get_shape()
        else:
            b, ch, r, c = X.shape

        half = n // 2
        square = K.square(X)
        scale = k
        if K.image_dim_ordering() == 'th':
            extra_channels = K.spatial_2d_padding(K.permute_dimensions(square, (0, 2, 3, 1)), ((0,0),(half,half)))
            extra_channels = K.permute_dimensions(extra_channels, (0, 3, 1, 2))
            for i in range(n):
                scale += alpha * extra_channels[:, i:i+ch, :, :]
        if K.image_dim_ordering() == 'tf':
            extra_channels = K.spatial_2d_padding(K.permute_dimensions(square, (0, 3, 1, 2)), (half, 0))
            extra_channels = K.permute_dimensions(extra_channels, (0, 2, 3, 1))
            for i in range(n):
                scale += alpha * extra_channels[:, :, :, i:i+int(ch)]
        scale = scale ** beta
        return X / scale


    return Lambda(f, output_shape=lambda input_shape: input_shape, **kwargs)

In [33]:
#Helper Function to split tensor
def splittensor(axis=1, ratio_split=1, id_split=0, **kwargs):
    def f(X):
        div = K.shape(X)[axis] // ratio_split

        if axis == 0:
            output = X[id_split*div:(id_split+1)*div, :, :, :]
        elif axis == 1:
            output = X[:, id_split*div:(id_split+1)*div, :, :]
        elif axis == 2:
            output = X[:, :, id_split*div:(id_split+1)*div, :]
        elif axis == 3:
            output = X[:, :, :, id_split*div:(id_split+1)*div]
        else:
            raise ValueError("This axis is not possible")
        return output

    def g(input_shape):
        output_shape = list(input_shape)
        output_shape[axis] = output_shape[axis] // ratio_split
        return tuple(output_shape)


    return Lambda(f, output_shape=lambda input_shape: g(input_shape), **kwargs)

In [34]:
#Alexnet layer architecture class
def AlexNet(img_shape=(3, 227, 227), n_classes=1000, l2_reg=0.,weights_path=None, lambda_mask=None):

    dim_ordering = K.image_dim_ordering()
    print(dim_ordering)
    if dim_ordering == 'th':
        batch_index = 0
        channel_index = 1
        row_index = 2
        col_index = 3
    if dim_ordering == 'tf':
        batch_index = 0
        channel_index = 3
        row_index = 1
        col_index = 2
        
    
    inputs = Input(img_shape)

    conv_1 = Convolution2D(96, 11, 11, subsample=(4, 4), activation='relu',
                           name='conv_1', W_regularizer=l2(l2_reg))(inputs)

    if lambda_mask is not None:
        conv_1_mask  = np.reshape(lambda_mask[0:290400], (96,55,55))
    else:
        conv_1_mask = np.ones(shape=((96, 55, 55)))
    
    conv_1_mask  = K.variable(conv_1_mask)
    conv_1_lambda = Lambda(lambda x: x * conv_1_mask)(conv_1)

    conv_2 = MaxPooling2D((3, 3), strides=(2, 2))(conv_1_lambda)
    conv_2 = crosschannelnormalization(name="convpool_1")(conv_2)
    conv_2 = ZeroPadding2D((2, 2))(conv_2)
    conv_2 = merge([
        Convolution2D(128, 5, 5, activation="relu", name='conv_2_'+str(i+1),
                      W_regularizer=l2(l2_reg))(
            splittensor(axis=channel_index, ratio_split=2, id_split=i)(conv_2)
        ) for i in range(2)], mode='concat', concat_axis=channel_index, name="conv_2")

    if lambda_mask is not None:
        conv_2_mask  = np.reshape(lambda_mask[290400:477024],(256, 27, 27) )
    else:
        conv_2_mask = np.ones(shape=((256, 27, 27)))
        
    conv_2_mask = K.variable(conv_2_mask)
    conv_2_lambda = Lambda(lambda x: x * conv_2_mask)(conv_2)

    conv_3 = MaxPooling2D((3, 3), strides=(2, 2))(conv_2_lambda)
    conv_3 = crosschannelnormalization()(conv_3)
    conv_3 = ZeroPadding2D((1, 1))(conv_3)
    conv_3 = Convolution2D(384, 3, 3, activation='relu', name='conv_3',
                           W_regularizer=l2(l2_reg))(conv_3)

    if lambda_mask is not None:
        conv_3_mask  = np.reshape(lambda_mask[477024:541920],(384, 13, 13))
    else:
        conv_3_mask = np.ones(shape=((384, 13, 13)))
    
    conv_3_mask = K.variable(conv_3_mask)
    conv_3_lambda = Lambda(lambda x: x * conv_3_mask)(conv_3)

    conv_4 = ZeroPadding2D((1, 1))(conv_3_lambda)
    conv_4 = merge([
        Convolution2D(192, 3, 3, activation="relu", name='conv_4_'+str(i+1),
                      W_regularizer=l2(l2_reg))(
            splittensor(axis=channel_index, ratio_split=2, id_split=i)(conv_4)
        ) for i in range(2)], mode='concat', concat_axis=channel_index, name="conv_4")

    if lambda_mask is not None:
        conv_4_mask  = np.reshape(lambda_mask[541920:606816],(384, 13, 13))
    else:
        conv_4_mask = np.ones(shape=((384, 13, 13)))
        
    conv_4_mask = K.variable(conv_4_mask)
    conv_4_lambda = Lambda(lambda x: x * conv_4_mask)(conv_4)

    conv_5 = ZeroPadding2D((1, 1))(conv_4_lambda)
    conv_5 = merge([
        Convolution2D(128, 3, 3, activation="relu", name='conv_5_'+str(i+1),
                      W_regularizer=l2(l2_reg))(
            splittensor(axis=channel_index, ratio_split=2, id_split=i)(conv_5)
        ) for i in range(2)], mode='concat', concat_axis=channel_index, name="conv_5")

    if lambda_mask is not None:
        conv_5_mask  = np.reshape(lambda_mask[606816:650080],(256, 13, 13))
    else:
        conv_5_mask = np.ones(shape=((256, 13, 13)))
    
    conv_5_mask = K.variable(conv_5_mask)
    conv_5_lambda = Lambda(lambda x: x * conv_5_mask)(conv_5)

    dense_1 = MaxPooling2D((3, 3), strides=(2, 2), name="convpool_5")(conv_5_lambda)

    dense_1 = Flatten(name="flatten")(dense_1)
    dense_1 = Dense(4096, activation='relu', name='dense_1',
                    W_regularizer=l2(l2_reg))(dense_1)

    if lambda_mask is not None:
        dense_1_mask  = np.reshape(lambda_mask[650080:654176],(4096,))
    else:
        dense_1_mask = np.ones(shape=((4096,)))
    
    
    dense_1_mask = K.variable(dense_1_mask)
    dense_1_lambda = Lambda(lambda x: x * dense_1_mask)(dense_1)

    dense_2 = Dropout(0.5)(dense_1_lambda)
    dense_2 = Dense(4096, activation='relu', name='dense_2',
                    W_regularizer=l2(l2_reg))(dense_2)

    if lambda_mask is not None:
        dense_2_mask  = np.reshape(lambda_mask[654176:658272],(4096,))
    else:
        dense_2_mask = np.ones(shape=((4096,)))
    
    dense_2_mask = K.variable(dense_2_mask)
    dense_2_lambda = Lambda(lambda x: x * dense_2_mask)(dense_2)

    dense_3 = Dropout(0.5)(dense_2_lambda)
    if n_classes == 1000:
        dense_3 = Dense(n_classes, name='dense_3',
                        W_regularizer=l2(l2_reg))(dense_3)
    else:
        # We change the name so when loading the weights_file from a
        # Imagenet pretrained model does not crash
        dense_3 = Dense(n_classes, name='dense_3_new',
                        W_regularizer=l2(l2_reg))(dense_3)


    prediction = Activation("softmax", name="softmax")(dense_3)

    model = Model(input=inputs, output=prediction)
    if weights_path:
        model.load_weights(weights_path)

    return model

In [35]:
#Load the details of all the 1000 classes and the function to conver the synset id to words{
meta_clsloc_file = '../../data/meta_clsloc.mat'
synsets = loadmat(meta_clsloc_file)['synsets'][0]
synsets_imagenet_sorted = sorted([(int(s[0]), str(s[1][0])) for s in synsets[:1000]],key=lambda v: v[1])
corr = {}
for j in range(1000):
    corr[synsets_imagenet_sorted[j][0]] = j

corr_inv = {}
for j in range(1, 1001):
    corr_inv[corr[j]] = j

def id_to_words(id_):
    return synsets[corr_inv[id_] - 1][2][0]

def pprint_output(out, n_max_synsets=10):
    wids = []
    best_ids = out.argsort()[::-1][:10]
    for u in best_ids:
        wids.append(str(synsets[corr_inv[u] - 1][1][0]))
    #print('%.2f' % round(100 * out[u], 2) + ' : ' + id_to_words(u)+' '+ str(synsets[corr_inv[u] - 1][1][0]))
    return wids


In [36]:
#Code snippet to load the ground truth labels to measure the performance{
truth = {}
with open('../../data/ILSVRC2014_clsloc_validation_ground_truth.txt') as f:
    line_num = 1
    for line in f.readlines():
        ind_ = int(line)
        temp  = None
        for i in synsets_imagenet_sorted:
            if i[0] == ind_:
                temp = i
        #print ind_,temp
        if temp != None:
            truth[line_num] = temp
        else:
            print('##########', ind_)
            pass
        line_num += 1
#}

In [37]:
#Function to predict the top 5 accuracy
def top5accuracy(true, predicted):
    assert len(true) == len(predicted)
    result = []
    flag  = 0
    for i in range(len(true)):
        flag  = 0
        temp = true[i]
        for j in predicted[i][0:5]:
            if j == temp:
                flag = 1
                break
        if flag == 1:
            result.append(1)
        else:
            result.append(0)
    counter = 0.
    for i in result:
        if i == 1:
            counter += 1.
    error = 1.0 - counter/float(len(result))
    #print len(np.where(np.asarray(result) == 1)[0])
    return len(np.where(np.asarray(result) == 1)[0]), error


In [112]:
#Testing on test data{
data_path = '../../data/pkl/'
classes = ['animate','inanimate']

result = {}

with open(data_path+classes[0]+'_test.pkl','rb') as f:
    X_fold = pickle.load(f)
with open(data_path+classes[1]+'_test.pkl','rb') as f:
    y_fold = pickle.load(f)

X = np.column_stack((X_fold,y_fold))  
if os.path.exists('../../data/pkl/kmeans_first_test.pickle'):
    with open('../../data/pkl/kmeans_first_test.pickle',"rb") as f:
        X_new,pred_kmeans,kmeans = pickle.load(f)
else:   
   
    kmeans = MiniBatchKMeans(n_clusters=65827,
                             random_state=0,
                             batch_size=6,
                             max_iter=10).fit(X)
    #print kmeans.cluster_centers_
    pred_kmeans = kmeans.predict(X)
    X_new = kmeans.cluster_centers_


#DO CLUSTERING AND GET CLUSTERS

from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import Birch
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
#import genieclust
#import hdbscan
#import smm

j = 23 #Set this value from scree plot!
method = 'GMM'
print(j)
#clf = hdbscan.HDBSCAN(min_cluster_size=j, gen_min_span_tree=True)
#clf = DBSCAN(eps=5.443)
#clf = KMeans(n_clusters=j,random_state=143)
#clf= SpectralClustering(n_clusters=j,random_state=143)
#clf =  AgglomerativeClustering(n_clusters=j, linkage='ward')
#clf = Birch(branching_factor=50, n_clusters=j, threshold=0.5,compute_labels=True)
clf = GaussianMixture(n_components=j, covariance_type='full',max_iter=1000, random_state=42)
#clf= genieclust.genie.Genie(n_clusters=j)
#clf= smm.SMM(n_components=j, covariance_type='full', random_state=143, tol=1e-12,min_covar=1e-6, n_iter=1000, n_init=1, params='wmcd', init_params='wmcd')
temp = clf.fit(X_new)
y_pred = clf.predict(X_new)
#y_pred = clf.fit_predict(X_new)
print(set(y_pred))
#Z = clf.predict(X)

for label in set(y_pred):
    print('Cluster: ',j,'Label: ', label)

    #Lesioning and measuring performance
    #pred = clf.fit_predict(X_new)
    temp = clf.fit(X_new)
    pred = clf.predict(X_new)
    loc = np.where(pred==label)
    loc_temp = kmeans.predict(X_new[loc[0]])
    loc_new =[]
    for entry in set(loc_temp):
        temp = np.where(pred_kmeans==entry)[0]
        loc_new.extend(temp)

    lambda_mask = np.ones(shape=((658272,)))
    lambda_mask[loc_new] = 0.

    #plt.scatter(X[:,0],X[:,1], c=y_pred) 

    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    model = AlexNet(weights_path="../../data/weights/alexnet_weights.h5",lambda_mask=lambda_mask)
    model.compile(optimizer=sgd, loss='mse')

    flag = 0
    dprime = 0.
    for p in classes:
        im_valid_test = []
        image_list_valid = '../../data/pkl/'+p+'_image_list_test.txt'
        with open(image_list_valid,'rb') as f:
            for line in f.readlines():
                im_valid_test.append(line.strip('\n'))
        im_temp = preprocess_image(im_valid_test,227,227, color_mode="bgr")
        out = model.predict(im_temp,batch_size=64)
        
        true_valid_wids = []
        for i in im_valid_test:
                temp1 = i.split('/')[4]
                temp = temp1.split('.')[0].split('_')[2]
                true_valid_wids.append(truth[int(temp)][1])

        predicted_valid_wids = []
        for i in range(len(im_valid_test)):
            #print im_list[i], pprint_output(out[i]), true_wids[i]
            predicted_valid_wids.append(pprint_output(out[i]))

        count, error  = top5accuracy(true_valid_wids, predicted_valid_wids)

        print(str(p)+' '+str(count)+' '+str(len(im_valid_test))+' '+str(error)+' '+str(1-error))

        if flag == 0:
            dprime = error
            flag = 1
        else:
            dprime -= error

    result[label] = dprime

23
set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22])
('Cluster: ', 23, 'Label: ', 0)
th
animate 0 39 1.0 0.0
inanimate 18 39 0.538461538462 0.461538461538
('Cluster: ', 23, 'Label: ', 1)
th
animate 30 39 0.230769230769 0.769230769231
inanimate 28 39 0.282051282051 0.717948717949
('Cluster: ', 23, 'Label: ', 2)
th
animate 31 39 0.205128205128 0.794871794872
inanimate 28 39 0.282051282051 0.717948717949
('Cluster: ', 23, 'Label: ', 3)
th
animate 33 39 0.153846153846 0.846153846154
inanimate 27 39 0.307692307692 0.692307692308
('Cluster: ', 23, 'Label: ', 4)
th
animate 31 39 0.205128205128 0.794871794872
inanimate 27 39 0.307692307692 0.692307692308
('Cluster: ', 23, 'Label: ', 5)
th
animate 33 39 0.153846153846 0.846153846154
inanimate 27 39 0.307692307692 0.692307692308
('Cluster: ', 23, 'Label: ', 6)
th
animate 32 39 0.179487179487 0.820512820513
inanimate 26 39 0.333333333333 0.666666666667
('Cluster: ', 23, 'Label: ', 7)
th
animate 30 39 0.230769

In [113]:
print(label)

22


In [114]:
result.values(),X.shape

([0.46153846153846156,
  -0.05128205128205132,
  -0.07692307692307687,
  -0.15384615384615385,
  -0.10256410256410253,
  -0.15384615384615385,
  -0.15384615384615385,
  -0.02564102564102566,
  -0.5384615384615384,
  0.41025641025641024,
  -0.4358974358974358,
  -0.10256410256410253,
  -0.10256410256410253,
  -0.07692307692307698,
  -0.15384615384615385,
  -0.10256410256410253,
  -0.1282051282051282,
  -0.10256410256410253,
  -0.17948717948717952,
  -0.1282051282051282,
  -0.10256410256410264,
  0.0,
  -0.1282051282051282],
 (658272, 2))

In [115]:
labels = result.values()

In [116]:
z_temp = []
for item in y_pred:
    z_temp.append(result[item])
print(len(z_temp),len(X_new))
loc_z = kmeans.predict(X_new)
z = np.ones(shape=((658272,)))
for i in range(len(loc_z)):
    temp = np.where(pred_kmeans==loc_z[i])[0]
    z[temp] = z_temp[i]

(65827, 65827)


In [117]:
X.shape, z.shape

((658272, 2), (658272,))

In [118]:
x = X[:,0]
y = X[:,1]

In [119]:
print result.values().index(max(result.values())), result.values().index(min(result.values()))
ana = int(result.values().index(max(result.values())))
ina = int(result.values().index(min(result.values())))
print result[ana], -1*(result[ina])

0 8
0.461538461538 0.538461538462


In [120]:
spax = []
spay = []
spix = []
spiy = []
for i in range(0,len(z)):
    if z[i] == result[ana]:
        spax.append(x[i])
        spay.append(y[i])
    elif z[i] == result[ina]:
        spix.append(x[i])
        spiy.append(y[i])
spax = np.asarray(spax)
spay = np.asarray(spay)
spix = np.asarray(spix)
spiy = np.asarray(spiy)

In [121]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model = AlexNet(weights_path="../../data/weights/alexnet_weights.h5",lambda_mask=lambda_mask)
model.compile(optimizer=sgd, loss='mse')

th


In [122]:
#Testing data pkl - animate
im_test = []
image_list_test = '../../data/pkl/animate_image_list_test.txt'
with open(image_list_test,'rb') as f:
    for line in f.readlines():
        im_test.append(line.strip('\n'))

im_temp = preprocess_image(im_test,227,227, color_mode="bgr")
out = model.predict(im_temp,batch_size=64)

true_valid_wids = []
for i in im_test:
        temp1 = i.split('/')[4]
        temp = temp1.split('.')[0].split('_')[2]
        true_valid_wids.append(truth[int(temp)][1])

predicted_valid_wids = []
for i in range(len(im_test)):
    #print im_list[i], pprint_output(out[i]), true_wids[i]
    predicted_valid_wids.append(pprint_output(out[i]))

count, error  = top5accuracy(true_valid_wids, predicted_valid_wids)



print len(true_valid_wids), len(predicted_valid_wids), len(im_test)
print count, error


#}
# Code snippet to get the activation values and saving information{
data = np.array([])

i = 0
result ={}
for layer in model.layers:
    weights = layer.get_weights()
    if len(weights) > 0:
        activations = get_activations(model,i,im_temp)
        if result.get(layer.name, None) is None:
            result[layer.name] = activations[0]
            if len(activations[0].shape) == 4:
                temp = activations[0].reshape(activations[0].shape[0], activations[0].shape[1]*activations[0].shape[2]*activations[0].shape[2])
            else:
                temp = activations[0].reshape(activations[0].shape[0], activations[0].shape[1])
            if layer.name != 'dense_3':
                print layer.name,len(weights),len(activations), activations[0].shape, np.mean(activations[0], axis=0).shape, temp.shape
                if len(data) == 0:
                    data = temp
                else:
                    shape = data.shape
                    data = np.append(data, temp)
                    data = data.reshape(shape[0], shape[1] + temp.shape[1])
                #print(data.shape)
    i += 1
    
data_animate_activations = data.copy()
correlation_mat= 1 - np.corrcoef(data)
data_shape = correlation_mat.shape
arr_sorted =  sorted(correlation_mat.ravel())
s = pd.Series(correlation_mat.ravel())
percentiles = np.asarray(s.apply(lambda x: percentileofscore(arr_sorted, x)))
data = np.reshape(percentiles, data_shape)
data_full_animate = data

39 39 39
33 0.153846153846
conv_1 2 1 (39, 96, 55, 55) (96, 55, 55) (39, 290400)
conv_2_1 2 1 (39, 128, 27, 27) (128, 27, 27) (39, 93312)
conv_2_2 2 1 (39, 128, 27, 27) (128, 27, 27) (39, 93312)
conv_3 2 1 (39, 384, 13, 13) (384, 13, 13) (39, 64896)
conv_4_1 2 1 (39, 192, 13, 13) (192, 13, 13) (39, 32448)
conv_4_2 2 1 (39, 192, 13, 13) (192, 13, 13) (39, 32448)
conv_5_1 2 1 (39, 128, 13, 13) (128, 13, 13) (39, 21632)
conv_5_2 2 1 (39, 128, 13, 13) (128, 13, 13) (39, 21632)
dense_1 2 1 (39, 4096) (4096,) (39, 4096)
dense_2 2 1 (39, 4096) (4096,) (39, 4096)


In [123]:
#Testing data pkl - Inanimate
im_test = []
image_list_test = '../../data/pkl/inanimate_image_list_test.txt'
with open(image_list_test,'rb') as f:
    for line in f.readlines():
        im_test.append(line.strip('\n'))

im_temp = preprocess_image(im_test,227,227, color_mode="bgr")
out = model.predict(im_temp,batch_size=64)

true_valid_wids = []
for i in im_test:
        temp1 = i.split('/')[4]
        temp = temp1.split('.')[0].split('_')[2]
        true_valid_wids.append(truth[int(temp)][1])

predicted_valid_wids = []
for i in range(len(im_test)):
    #print im_list[i], pprint_output(out[i]), true_wids[i]
    predicted_valid_wids.append(pprint_output(out[i]))

count, error  = top5accuracy(true_valid_wids, predicted_valid_wids)



print len(true_valid_wids), len(predicted_valid_wids), len(im_test)
print count, error


#}
# Code snippet to get the activation values and saving information{
data = np.array([])

i = 0
result ={}
for layer in model.layers:
    weights = layer.get_weights()
    if len(weights) > 0:
        activations = get_activations(model,i,im_temp)
        if result.get(layer.name, None) is None:
            result[layer.name] = activations[0]
            if len(activations[0].shape) == 4:
                temp = activations[0].reshape(activations[0].shape[0], activations[0].shape[1]*activations[0].shape[2]*activations[0].shape[2])
            else:
                temp = activations[0].reshape(activations[0].shape[0], activations[0].shape[1])
            if layer.name != 'dense_3':
                print layer.name,len(weights),len(activations), activations[0].shape, np.mean(activations[0], axis=0).shape, temp.shape
                if len(data) == 0:
                    data = temp
                else:
                    shape = data.shape
                    data = np.append(data, temp)
                    data = data.reshape(shape[0], shape[1] + temp.shape[1])
                #print(data.shape)
    i += 1

data_inanimate_activations = data.copy()
correlation_mat= 1 - np.corrcoef(data)
data_shape = correlation_mat.shape
arr_sorted =  sorted(correlation_mat.ravel())
s = pd.Series(correlation_mat.ravel())
percentiles = np.asarray(s.apply(lambda x: percentileofscore(arr_sorted, x)))
data = np.reshape(percentiles, data_shape)
data_full_inanimate = data

39 39 39
28 0.282051282051
conv_1 2 1 (39, 96, 55, 55) (96, 55, 55) (39, 290400)
conv_2_1 2 1 (39, 128, 27, 27) (128, 27, 27) (39, 93312)
conv_2_2 2 1 (39, 128, 27, 27) (128, 27, 27) (39, 93312)
conv_3 2 1 (39, 384, 13, 13) (384, 13, 13) (39, 64896)
conv_4_1 2 1 (39, 192, 13, 13) (192, 13, 13) (39, 32448)
conv_4_2 2 1 (39, 192, 13, 13) (192, 13, 13) (39, 32448)
conv_5_1 2 1 (39, 128, 13, 13) (128, 13, 13) (39, 21632)
conv_5_2 2 1 (39, 128, 13, 13) (128, 13, 13) (39, 21632)
dense_1 2 1 (39, 4096) (4096,) (39, 4096)
dense_2 2 1 (39, 4096) (4096,) (39, 4096)


In [124]:
data_full_inanimate.shape

(39, 39)

In [125]:
data_total = np.concatenate((data_animate_activations, data_inanimate_activations))
print(data_total.shape)
correlation_mat= 1 - np.corrcoef(data_total)
data_shape = correlation_mat.shape
arr_sorted =  sorted(correlation_mat.ravel())
s = pd.Series(correlation_mat.ravel())
percentiles = np.asarray(s.apply(lambda x: percentileofscore(arr_sorted, x)))
data = np.reshape(percentiles, data_shape)
data_total_all = data
print(data_total_all.shape)

(78, 658272)
(78, 78)


In [126]:
data_cluster_animate = {}
data_cluster_inanimate = {}
data_cluster = {}


for label in range(len(labels)):
    pred = clf.predict(X_new)
    loc = np.where(pred==label)[0]
    loc_new =[]
    for i in range(len(loc)):
        temp = np.where(pred_kmeans==loc[i])[0]
        loc_new.extend(temp)

    
    index_cluster = loc_new
    
    #Animate
    data_temp = data_animate_activations[:,loc_new]
    correlation_mat= 1 - np.corrcoef(data_temp)
    data_shape = correlation_mat.shape
    arr_sorted =  sorted(correlation_mat.ravel())
    s = pd.Series(correlation_mat.ravel())
    percentiles = np.asarray(s.apply(lambda x: percentileofscore(arr_sorted, x)))
    data_temp = np.reshape(percentiles, data_shape)
    data_cluster_animate[label] = data_temp
    
    #InAnimate
    data_temp = data_inanimate_activations[:,loc_new]
    correlation_mat= 1 - np.corrcoef(data_temp)
    data_shape = correlation_mat.shape
    arr_sorted =  sorted(correlation_mat.ravel())
    s = pd.Series(correlation_mat.ravel())
    percentiles = np.asarray(s.apply(lambda x: percentileofscore(arr_sorted, x)))
    data_temp = np.reshape(percentiles, data_shape)
    data_cluster_inanimate[label] = data_temp
    
    #Total
    data_temp = np.concatenate((data_animate_activations[:,loc_new],data_inanimate_activations[:,loc_new]))
    correlation_mat= 1 - np.corrcoef(data_temp)
    data_shape = correlation_mat.shape
    arr_sorted =  sorted(correlation_mat.ravel())
    s = pd.Series(correlation_mat.ravel())
    percentiles = np.asarray(s.apply(lambda x: percentileofscore(arr_sorted, x)))
    data_temp = np.reshape(percentiles, data_shape)
    data_cluster[label] = data_temp

In [133]:
rdm_impact ={}

for label in range(len(labels)):
    if labels[label] != 0.:
        t_ana, p_ana = stats.kendalltau(data_full_animate, data_cluster_animate[label])
        t_ina, p_ina = stats.kendalltau(data_full_inanimate, data_cluster_inanimate[label])
        t, p = stats.kendalltau(data_cluster_animate[label], data_cluster_inanimate[label])
        diff = float(t_ana - t_ina)
        rdm_impact[label] = p_ina
        print(label, labels[label], t_ana, t_ina, diff)
    

(0, 0.46153846153846156, 0.5364215136956896, 0.47270211248053845, 0.06371940121515113)
(1, -0.05128205128205132, 0.17850434511522223, 0.1796758397915715, -0.0011714946763492773)
(2, -0.07692307692307687, 0.3292610273784932, 0.2991349590790416, 0.030126068299451636)
(3, -0.15384615384615385, 0.1068375198725745, 0.08324446170815161, 0.023593058164422892)
(4, -0.10256410256410253, 0.09953101004966683, 0.06456303031264808, 0.03496797973701875)
(5, -0.15384615384615385, 0.22049758832021912, 0.24636980244748038, -0.02587221412726126)
(6, -0.15384615384615385, 0.12800633202635398, 0.10660809481505362, 0.021398237211300353)
(7, -0.02564102564102566, 0.23153376985190668, 0.2612816195155701, -0.029747849663663423)
(8, -0.5384615384615384, 0.5321787252186083, 0.4899768491073624, 0.04220187611124593)
(9, 0.41025641025641024, 0.45085476779939154, 0.4665271334989147, -0.015672365699523183)
(10, -0.4358974358974358, 0.43983168563768255, 0.44235155603352955, -0.0025198703958470015)
(11, -0.10256410256

In [134]:
rdm_impact.values().index(min(rdm_impact.values()))

8

In [108]:
print rdm_impact.values().index(max(rdm_impact.values())), rdm_impact.values().index(min(rdm_impact.values()))
rdm_ana = int(rdm_impact.values().index(max(rdm_impact.values())))
rdm_ina = int(rdm_impact.values().index(min(rdm_impact.values())))
print rdm_impact[rdm_ana], -1*(rdm_impact[rdm_ina])

10 2
4.3114011115919363e-05 -2.4585038523670634e-218


In [None]:
stats.kendalltau(data_full_animate, data_full_inanimate)

In [None]:
for label in range(len(labels)):
    print(label, stats.kendalltau(data_total_all, data_cluster[label]))

In [None]:
# plot figure of these correlations
f, ax = plt.subplots(1,1, figsize=(8, 7))

plt.imshow(
    data_cluster[8],
    origin='lower',
    cmap='jet', 
)
plt.colorbar()

In [None]:
# plot figure of these correlations
f, ax = plt.subplots(1,1, figsize=(8, 7))

plt.imshow(
    data_total_all,
    origin ='lower',
    cmap='jet', 
)
plt.colorbar()