In [1]:
import numpy as np
import keras
from skimage.measure import block_reduce 
from keras.models import load_model,Model,Sequential
from keras.layers import Dense,Dropout,Flatten,Activation
from keras import regularizers
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.initializers import glorot_uniform
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras import backend as K
if K.backend()=='tensorflow':
    K.set_image_dim_ordering("tf")
    
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

Using TensorFlow backend.


In [3]:
# data generator for training set
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) 

# data generator for test set
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [4]:
batch_size=32
steps_per_epoch=np.ceil(7785/batch_size)
validation_steps=np.ceil(1359/batch_size)

train_generator = train_datagen.flow_from_directory(
    '/pylon5/ms3uujp/dx10384/caltech101/101_ObjectCategories',
    target_size = (224, 224),
    color_mode = 'rgb',
    batch_size = batch_size)

test_generator = test_datagen.flow_from_directory(
    '/pylon5/ms3uujp/dx10384/caltech101/test',
    target_size = (224, 224),
    color_mode = 'rgb',
    batch_size = batch_size)

Found 7785 images belonging to 102 classes.
Found 1359 images belonging to 102 classes.


In [5]:
####create model
def base_model(name):
    base_model = VGG16(weights = "imagenet", include_top=False, input_shape = (224,224,3))
    for layer in base_model.layers:
        layer.trainable = False

    x = base_model.get_layer(name).output
    x = Flatten()(x)
    x = Dense(512, activation="relu")(x)
    x = Dropout(0.5)(x)
    predictions = Dense(102, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.RMSprop(lr=0.01),
                  metrics=['accuracy'])
    model.summary()
    return model

In [21]:
model=base_model('block5_pool')
hist10=model.fit_generator(train_generator,
                    epochs = 30,
                    validation_data = test_generator,
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [6]:
model=base_model('block4_pool')
hist4=model.fit_generator(train_generator,
                    epochs = 30,
                    validation_data = test_generator,
                    steps_per_epoch=243,
                    validation_steps=42)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)

In [None]:
model=base_model('block3_pool')
hist4=model.fit_generator(train_generator,
                    epochs = 30,
                    validation_data = test_generator,
                    steps_per_epoch=243,
                    validation_steps=42)

In [24]:
model=base_model('block2_pool')
hist4=model.fit_generator(train_generator,
                    epochs = 30,
                    validation_data = test_generator,
                    steps_per_epoch=243,
                    validation_steps=42)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

ResourceExhaustedError: OOM when allocating tensor with shape[401408,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node training_6/RMSprop/Square}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


In [None]:
model=base_model('block1_pool')
hist4=model.fit_generator(train_generator,
                    epochs = 30,
                    validation_data = test_generator,
                    steps_per_epoch=243,
                    validation_steps=42)

In [9]:
############################################
####          Model Preparation 
############################################

base_model = VGG16(weights = "imagenet", include_top=False, input_shape = (256,256,3))

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output

x = Flatten()(x)
#x = Dense(1024, activation="relu")(x)
#x = Dropout(0.5)(x)

predictions = Dense(102, activation="softmax")(x)

# creating the final model 
model = Model(inputs=base_model.input, outputs=predictions)

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.RMSprop(lr=0.0001),
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 256, 256, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 64, 64, 128)       0         
__________

In [5]:
# data generator for training set
train_datagen = ImageDataGenerator(rescale = 1./255) 

# data generator for test set
test_datagen = ImageDataGenerator(rescale = 1./255)

train_generator = train_datagen.flow_from_directory(
    '/pylon5/ms3uujp/dx10384/caltech101/101_ObjectCategories',
    target_size = (256, 256),
    color_mode = 'rgb',
    batch_size = 32)

test_generator = test_datagen.flow_from_directory(
    '/pylon5/ms3uujp/dx10384/caltech101/test',
    target_size = (256, 256),
    color_mode = 'rgb',
    batch_size = 32)

Found 7785 images belonging to 102 classes.
Found 1359 images belonging to 102 classes.


In [None]:
model.fit_generator(train_generator,
                    epochs = 50,
                    validation_data = test_generator,
                    steps_per_epoch=243,
                    validation_steps=42)

In [16]:
base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 256, 256, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 64, 64, 128)       0         
__________

In [17]:
############################################
#Feature Extraction using numpy pooling
############################################
#which base model to extract features
#which layer(name) to extract features
#X: input data(a generator)
#steps: steps for predict_generator 
#size: block_size for image downsampling
def extract_features(base, name, X,step,size):
    target = Model(inputs=base.input,outputs=base.get_layer(name).output)
    features = target.predict_generator(test_generator,steps=steps,verbose=1)
    n,a,b,c=features.shape
    features1=[]
    for i in range(n):
        new=block_reduce(features[i,:,:,:], block_size=(size,size,1), func=np.mean)
        a,b,c=new.shape
        features1.append(new.reshape(a*b*c))
    features1=np.stack(features1)
    return features1

In [18]:
test_generator = test_datagen.flow_from_directory(
    '/pylon5/ms3uujp/dx10384/caltech101/test',
    target_size = (256, 256),
    color_mode = 'rgb',
    batch_size = 1,
    shuffle=False)

Found 1359 images belonging to 102 classes.


In [19]:
label= test_generator.classes
steps=len(label)

In [20]:
feature1=extract_features(base,'block1_pool',test_generator,steps,16)
feature2=extract_features(base,'block2_pool',test_generator,steps,16)
feature3=extract_features(base,'block3_pool',test_generator,steps,8)
feature4=extract_features(base,'block4_pool',test_generator,steps,8)
feature5=extract_features(base,'block5_pool',test_generator,steps,4)



In [21]:
##################################################
#Projection correlation & Distance correlation
##################################################

def get_arccos_1d(X):

    # X -- a 1D array
    
    X = np.squeeze(X)
    Y = X[:,None] - X
    Z = Y.T[:,:,None]*Y.T[:,None]
    n = len(X)
    
    a = np.zeros([n, n, n])
    a[Z == 0.] = np.pi/2.
    a[Z < 0.] = np.pi
    
    a = np.transpose(a, (1,2,0))

    a_bar_12 = np.mean(a, axis = 0, keepdims = True)
    a_bar_02 = np.mean(a, axis = 1, keepdims = True)
    a_bar_2  = np.mean(a, axis = (0,1), keepdims = True)
    A = a - a_bar_12 - a_bar_02 + a_bar_2
    
    return a, A


def get_arccos(X):

    # X -- a 2D array
    
    n, p = X.shape
    cos_a = np.zeros([n, n, n])
    
    for r in range(n):
        
        xr = X[r]
        X_r = X - xr
        cross = np.dot(X_r, X_r.T)
        row_norm = np.sqrt(np.sum(X_r**2, axis = 1))
        outer_norm = np.outer(row_norm, row_norm)
        
        zero_idx = (outer_norm == 0.)
        outer_norm[zero_idx] = 1.
        cos_a_kl = cross / outer_norm
        cos_a_kl[zero_idx] = 0.

        cos_a[:,:,r] = cos_a_kl
        
    cos_a[cos_a > 1] = 1.
    cos_a[cos_a < -1] = -1.
    a = np.arccos(cos_a)

    a_bar_12 = np.mean(a, axis = 0, keepdims = True)
    a_bar_02 = np.mean(a, axis = 1, keepdims = True)
    a_bar_2  = np.mean(a, axis = (0,1), keepdims = True)
    A = a - a_bar_12 - a_bar_02 + a_bar_2
        
    return a, A

def projection_corr_1dy(X, Y):

    """
    compute the projection correlation where
    X -- an n*p 2D array
    Y -- an n*1 2D array
    """
    
    nx, p = X.shape
    ny, q = Y.shape
    
    if nx == ny:
        n = nx
    else:
        raise ValueError("sample sizes do not match.")
        
    a_x, A_x = get_arccos(X)
    a_y, A_y = get_arccos_1d(Y)
    
    S_xy = np.sum(A_x * A_y) / (n**3)
    S_xx = np.sum(A_x**2) / (n**3)
    S_yy = np.sum(A_y**2) / (n**3)
    
    if S_xx * S_yy == 0.:
        corr = 0.
    else:
        corr = np.sqrt( S_xy / np.sqrt(S_xx * S_yy) )
    
    return corr

def distance_corr(X, Y):

    """
    compute the distance correlation where
    X -- an n*p 2D array
    Y -- an n*p 2D array

    return: a list of two elements: 
            [distance correlation, bias-corrected distance correlation]
    """
    
    nx, p = X.shape
    ny, q = Y.shape
    
    if nx == ny:
        n = nx
    else:
        raise ValueError("sample sizes do not match.")
        
    if n < 4:
        raise ValueError("sample size is less than 4.")
        
    outer_diff_x = X[:, np.newaxis] - X
    outer_diff_y = Y[:, np.newaxis] - Y
    
    a = np.linalg.norm(outer_diff_x, axis = 2)
    b = np.linalg.norm(outer_diff_y, axis = 2)
    
    a0_bar = np.mean(a, axis = 0, keepdims = True)
    a1_bar = np.mean(a, axis = 1, keepdims = True)
    a_bar  = np.mean(a, axis = (0,1), keepdims = True)
    b0_bar = np.mean(b, axis = 0, keepdims = True)
    b1_bar = np.mean(b, axis = 1, keepdims = True)
    b_bar  = np.mean(b, axis = (0,1), keepdims = True)
    
    A = a - a0_bar - a1_bar + a_bar
    B = b - b0_bar - b1_bar + b_bar
    
    S_xy = np.sum(A*B)
    S_xx = np.sum(A**2)
    S_yy = np.sum(B**2)
    
    if S_xy * S_xx == 0.:
        corr1 = 0.
    else:
        corr1 = np.sqrt(S_xy / np.sqrt(S_xx * S_yy))
        
    A_tilde = a - n*a0_bar/(n-2.) - n*a1_bar/(n-2.) + n*n*a_bar/((n-1.)*(n-2.))
    B_tilde = b - n*b0_bar/(n-2.) - n*b1_bar/(n-2.) + n*n*b_bar/((n-1.)*(n-2.))
    np.fill_diagonal(A_tilde, 0.)
    np.fill_diagonal(B_tilde, 0.)
    
    S_xy_tilde = np.sum(A_tilde*B_tilde)
    S_xx_tilde = np.sum(A_tilde**2)
    S_yy_tilde = np.sum(B_tilde**2)
    
    if S_xy_tilde * S_xx_tilde == 0.:
        corr3 = 0.
    else:
        corr3 = S_xy_tilde / np.sqrt(S_xx_tilde * S_yy_tilde)
    
    return [corr1, corr3]

In [22]:
dis1=distance_corr(feature1,label.reshape(-1,1))
dis2=distance_corr(feature2,label.reshape(-1,1))
dis3=distance_corr(feature3,label.reshape(-1,1))
dis4=distance_corr(feature4,label.reshape(-1,1))
dis5=distance_corr(feature5,label.reshape(-1,1))

In [23]:
print (dis1)
print (dis2)
print (dis3)
print (dis4)
print (dis5)

[0.19956280117031025, 0.033402860705677336]
[0.23150005391473327, 0.04715629750509841]
[0.3086559500180125, 0.08842580010870653]
[0.347464970483844, 0.1144196109145059]
[0.38064744428025504, 0.13983810424604773]
