In [1]:
import numpy as np
import keras
from skimage.measure import block_reduce 
from keras.models import load_model,Model,Sequential
from keras.layers import Dense,Dropout,Flatten,Activation
from keras import regularizers
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.initializers import glorot_uniform
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras import backend as K
if K.backend()=='tensorflow':
    K.set_image_dim_ordering("tf")
    
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

Using TensorFlow backend.


In [2]:
# data generator for training set
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) 

# data generator for test set
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [None]:
train_generator = train_datagen.flow_from_directory(
    '/pylon5/ms3uujp/dx10384/caltech101/101_ObjectCategories',
    target_size = (224, 224),
    color_mode = 'rgb',
    batch_size = 32)

test_generator = test_datagen.flow_from_directory(
    '/pylon5/ms3uujp/dx10384/caltech101/test',
    target_size = (224, 224),
    color_mode = 'rgb',
    batch_size = 32)

In [3]:
base = ResNet50(weights='imagenet', include_top=False, input_shape = (224,224,3))

Instructions for updating:
Colocations handled automatically by placer.




In [4]:
############################################
#Feature Extraction using numpy pooling
############################################
#which base model to extract features
#which layer(name) to extract features
#X: input data(a generator)
#steps: steps for predict_generator 
#size: block_size for image downsampling
def extract_features(base, name, X,step,size):
    target = Model(inputs=base.input,outputs=base.get_layer(name).output)
    features = target.predict_generator(X,steps=steps,verbose=1)
    n,a,b,c=features.shape
    features1=[]
    for i in range(n):
        new=block_reduce(features[i,:,:,:], block_size=(size,size,1), func=np.mean)
        a,b,c=new.shape
        features1.append(new.reshape(a*b*c))
    features1=np.stack(features1)
    return features1

In [20]:
val_generator = test_datagen.flow_from_directory(
    '/pylon5/ms3uujp/dx10384/caltech101/test',
    target_size = (224, 224),
    color_mode = 'rgb',
    batch_size = 1,
    shuffle=True)

Found 1000 images belonging to 4 classes.


In [21]:
label= val_generator.classes
steps=len(label)

In [24]:
feature1=extract_features(base,'activation_4',val_generator,steps,56)
feature2=extract_features(base,'activation_7',val_generator,steps,56)
feature3=extract_features(base,'activation_10',val_generator,steps,56)
feature4=extract_features(base,'activation_13',val_generator,steps,28)
feature5=extract_features(base,'activation_16',val_generator,steps,28)
feature6=extract_features(base,'activation_19',val_generator,steps,28)
feature7=extract_features(base,'activation_22',val_generator,steps,28)
feature8=extract_features(base,'activation_25',val_generator,steps,14)
feature9=extract_features(base,'activation_28',val_generator,steps,14)
feature10=extract_features(base,'activation_31',val_generator,steps,14)
feature11=extract_features(base,'activation_34',val_generator,steps,14)
feature12=extract_features(base,'activation_37',val_generator,steps,14)
feature13=extract_features(base,'activation_40',val_generator,steps,14)
feature14=extract_features(base,'activation_43',val_generator,steps,7)
feature15=extract_features(base,'activation_46',val_generator,steps,7)
feature16=extract_features(base,'activation_49',val_generator,steps,7)



In [8]:
##################################################
#Projection correlation & Distance correlation
##################################################

def get_arccos_1d(X):

    # X -- a 1D array
    
    X = np.squeeze(X)
    Y = X[:,None] - X
    Z = Y.T[:,:,None]*Y.T[:,None]
    n = len(X)
    
    a = np.zeros([n, n, n])
    a[Z == 0.] = np.pi/2.
    a[Z < 0.] = np.pi
    
    a = np.transpose(a, (1,2,0))

    a_bar_12 = np.mean(a, axis = 0, keepdims = True)
    a_bar_02 = np.mean(a, axis = 1, keepdims = True)
    a_bar_2  = np.mean(a, axis = (0,1), keepdims = True)
    A = a - a_bar_12 - a_bar_02 + a_bar_2
    
    return a, A


def get_arccos(X):

    # X -- a 2D array
    
    n, p = X.shape
    cos_a = np.zeros([n, n, n])
    
    for r in range(n):
        
        xr = X[r]
        X_r = X - xr
        cross = np.dot(X_r, X_r.T)
        row_norm = np.sqrt(np.sum(X_r**2, axis = 1))
        outer_norm = np.outer(row_norm, row_norm)
        
        zero_idx = (outer_norm == 0.)
        outer_norm[zero_idx] = 1.
        cos_a_kl = cross / outer_norm
        cos_a_kl[zero_idx] = 0.

        cos_a[:,:,r] = cos_a_kl
        
    cos_a[cos_a > 1] = 1.
    cos_a[cos_a < -1] = -1.
    a = np.arccos(cos_a)

    a_bar_12 = np.mean(a, axis = 0, keepdims = True)
    a_bar_02 = np.mean(a, axis = 1, keepdims = True)
    a_bar_2  = np.mean(a, axis = (0,1), keepdims = True)
    A = a - a_bar_12 - a_bar_02 + a_bar_2
        
    return a, A

def projection_corr_1dy(X, Y):

    """
    compute the projection correlation where
    X -- an n*p 2D array
    Y -- an n*1 2D array
    """
    
    nx, p = X.shape
    ny, q = Y.shape
    
    if nx == ny:
        n = nx
    else:
        raise ValueError("sample sizes do not match.")
        
    a_x, A_x = get_arccos(X)
    a_y, A_y = get_arccos_1d(Y)
    
    S_xy = np.sum(A_x * A_y) / (n**3)
    S_xx = np.sum(A_x**2) / (n**3)
    S_yy = np.sum(A_y**2) / (n**3)
    
    if S_xx * S_yy == 0.:
        corr = 0.
    else:
        corr = np.sqrt( S_xy / np.sqrt(S_xx * S_yy) )
    
    return corr

def distance_corr(X, Y):

    """
    compute the distance correlation where
    X -- an n*p 2D array
    Y -- an n*p 2D array

    return: a list of two elements: 
            [distance correlation, bias-corrected distance correlation]
    """
    
    nx, p = X.shape
    ny, q = Y.shape
    
    if nx == ny:
        n = nx
    else:
        raise ValueError("sample sizes do not match.")
        
    if n < 4:
        raise ValueError("sample size is less than 4.")
        
    outer_diff_x = X[:, np.newaxis] - X
    outer_diff_y = Y[:, np.newaxis] - Y
    
    a = np.linalg.norm(outer_diff_x, axis = 2)
    b = np.linalg.norm(outer_diff_y, axis = 2)
    
    a0_bar = np.mean(a, axis = 0, keepdims = True)
    a1_bar = np.mean(a, axis = 1, keepdims = True)
    a_bar  = np.mean(a, axis = (0,1), keepdims = True)
    b0_bar = np.mean(b, axis = 0, keepdims = True)
    b1_bar = np.mean(b, axis = 1, keepdims = True)
    b_bar  = np.mean(b, axis = (0,1), keepdims = True)
    
    A = a - a0_bar - a1_bar + a_bar
    B = b - b0_bar - b1_bar + b_bar
    
    S_xy = np.sum(A*B)
    S_xx = np.sum(A**2)
    S_yy = np.sum(B**2)
    
    if S_xy * S_xx == 0.:
        corr1 = 0.
    else:
        corr1 = np.sqrt(S_xy / np.sqrt(S_xx * S_yy))
        
    A_tilde = a - n*a0_bar/(n-2.) - n*a1_bar/(n-2.) + n*n*a_bar/((n-1.)*(n-2.))
    B_tilde = b - n*b0_bar/(n-2.) - n*b1_bar/(n-2.) + n*n*b_bar/((n-1.)*(n-2.))
    np.fill_diagonal(A_tilde, 0.)
    np.fill_diagonal(B_tilde, 0.)
    
    S_xy_tilde = np.sum(A_tilde*B_tilde)
    S_xx_tilde = np.sum(A_tilde**2)
    S_yy_tilde = np.sum(B_tilde**2)
    
    if S_xy_tilde * S_xx_tilde == 0.:
        corr3 = 0.
    else:
        corr3 = S_xy_tilde / np.sqrt(S_xx_tilde * S_yy_tilde)
    
    return [corr1, corr3]

In [None]:
from sklearn.utils import shuffle
x_train, y_train = shuffle(x_train, y_train, random_state=0)
x_test, y_test = shuffle(x_test, y_test, random_state=0)

In [14]:
dis1=distance_corr(feature1,label.reshape(-1,1))
dis2=distance_corr(feature2,label.reshape(-1,1))
dis3=distance_corr(feature3,label.reshape(-1,1))
dis4=distance_corr(feature4,label.reshape(-1,1))
dis5=distance_corr(feature5,label.reshape(-1,1))
dis6=distance_corr(feature6,label.reshape(-1,1))
dis7=distance_corr(feature7,label.reshape(-1,1))
dis8=distance_corr(feature8,label.reshape(-1,1))
dis9=distance_corr(feature9,label.reshape(-1,1))
dis10=distance_corr(feature10,label.reshape(-1,1))
dis11=distance_corr(feature11,label.reshape(-1,1))

In [15]:
print (dis1)
print (dis2)
print (dis3)
print (dis4)
print (dis5)
print (dis6)
print (dis7)
print (dis8)
print (dis9)
print (dis10)
print (dis11)

[0.30555807515636846, 0.08944141612844135]
[0.3221554728382715, 0.09971918101818494]
[0.3491906783757611, 0.11753164819858986]
[0.4039298997665289, 0.1588345049898712]
[0.4190955395154591, 0.17119731105829222]
[0.42289699491793686, 0.1744235079645256]
[0.43731026102893567, 0.18681553775043802]
[0.4357218141559415, 0.18529716777572805]
[0.4668576853879687, 0.21395927750371266]
[0.4821415965232514, 0.22881181996600164]
[0.503459287935368, 0.25051326905055343]
