In [None]:
'''
My Attempt at the Hilbert CNN at https://openreview.net/forum?id=HJvvRoe0W
Works by dividing the sequence into a series of overlapping 4-mers, 
then fits it to a image-like grid using a hilbert curve
such that each 'pixel' is a 4mer of length 256 (4**4=256)
'''

In [1]:
from keras.layers import Conv2D, BatchNormalization, AveragePooling2D, Dense, Dropout
from keras.layers import Activation, Input, Concatenate, Flatten, MaxPooling2D
from keras.models import Model
from keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, CSVLogger, LearningRateScheduler
import image
from keras import backend as K
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
start_target_size = (32, 32, 256)
batch_size = 16
train_path = 'D:/Projects/iSynPro/iSynPro/HilbertCNN/train_val_npys/train'
test_path = 'D:/Projects/iSynPro/iSynPro/HilbertCNN/train_val_npys/test'

# define generators
train_datagen = image.ImageDataGenerator()
test_datagen = image.ImageDataGenerator()

train_generator = train_datagen.flow_np_from_directory(train_path, 
                                                    target_size= start_target_size, 
                                                    batch_size=batch_size, 
                                                    class_mode='binary',
                                                    seed=42) 

validation_generator = test_datagen.flow_np_from_directory(test_path, 
                                                        target_size= start_target_size, 
                                                        batch_size=batch_size, 
                                                        class_mode='binary',
                                                        seed=42)

Found 17591 images belonging to 2 classes.
Found 1955 images belonging to 2 classes.


In [47]:
preview  = []
p = np.load('D:/Projects/iSynPro/iSynPro/HilbertCNN/train_val_npys/train/high/2.npy')
i = 0
for batch in train_datagen.flow_np_from_directory(p, 
                                                    target_size= start_target_size, 
                                                    batch_size= 1, 
                                                    save_to_dir='../',
                                                  save_prefix='preview') :
    i += 1
    if 1 > 1:
        break


ValueError: listdir: path too long for Windows

In [6]:
###Only call this if you want to clear the model, so you can train a new one
K.clear_session()
del model

In [83]:
# modified implementation of Hilbert-CNN
# wider and shallower than original
# https://openreview.net/forum?id=HJvvRoe0W
# 

inputs = Input(shape=[32, 32, 256])

# stem
x = Conv2D(64, (7, 7), strides=(1, 1), padding='same')(inputs)
x = BatchNormalization()(x)
x = Conv2D(64, (5, 5), strides=(1, 1), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)

# computation block1
# residual 1
p1 = Conv2D(4, (8, 8), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)

# computation block2
# residual 1
p1 = Conv2D(4, (8, 8), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)

# mid-stem
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)

# computation block 3
# residual 1
p1 = Conv2D(4, (8, 8), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)
'''
# computation block 4
# residual 1
p1 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)

# computation block 5
# residual 1
p1 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)
'''
# exit stem
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
#x = BatchNormalization()(x)
#x = Activation('relu')(x)
#x = AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(x) #we omit this last avgpool to retain dimensionality
x = Flatten()(x)

# FC layers
x = Dense(1024)(x)
x = Activation('relu')(x)
#x = Dropout(0.1)(x)

x = Dense(1024)(x)
x = Activation('relu')(x)
#x = Dropout(0.1)(x)

predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=inputs, outputs=predictions)

model.compile(optimizer= RMSprop(),
              loss= 'binary_crossentropy',
              metrics=[ 'binary_accuracy'])

In [7]:
# original implementation of Hilbert-CNN
# https://openreview.net/forum?id=HJvvRoe0W

inputs = Input(shape=[32, 32, 256])

# stem
x = Conv2D(64, (7, 7), strides=(1, 1), padding='same')(inputs)
x = BatchNormalization()(x)
x = Conv2D(64, (5, 5), strides=(1, 1), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(x)

# computation block1
# residual 1
p1 = Conv2D(4, (8, 8), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)

# computation block 2
# residual 1
p1 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)

# mid-stem
x = AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(x)

# computation block 3
# residual 1
p1 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (4, 4), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)

# computation block 4
# residual 1
p1 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)

# computation block 5
# residual 1
p1 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(x)
p1 = BatchNormalization()(p1)
p1 = Activation('relu')(p1)
p1 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(p1)
p1 = BatchNormalization()(p1)

# residual 2
p2 = Conv2D(4, (2, 2), strides=(1, 1), padding='same')(x)
p2 = BatchNormalization()(p2)
p2 = Activation('relu')(p2)
p2 = Conv2D(4, (3, 3), strides=(1, 1), padding='same')(p2)
p2 = BatchNormalization()(p2)

x = Concatenate()([x, p1, p2])
x = BatchNormalization()(x)
x = Activation('relu')(x)

# exit stem
x = AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(x)
#x = BatchNormalization()(x)
#x = Activation('relu')(x)
#x = AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(x) #we omit this last avgpool to retain dimensionality
x = Flatten()(x)

# FC layers
x = Dense(1024)(x)
x = Activation('relu')(x)
#x = Dropout(0.1)(x)

x = Dense(1024)(x)
x = Activation('relu')(x)
#x = Dropout(0.1)(x)

predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=inputs, outputs=predictions)

model.compile(optimizer= RMSprop(),
              loss= 'binary_crossentropy',
              metrics=[ 'binary_accuracy'])

In [8]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 256)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 32, 32, 64)   802880      input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 32, 32, 64)   256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 32, 32, 64)   102464      batch_normalization_1[0][0]      
__________________________________________________________________________________________________
batch_norm

In [9]:
train_size = 17591
test_size = 1955
learning_decay = 0.96 #not used
batch_size = 16


#our callbacks
lr_descent = ReduceLROnPlateau(monitor='val_loss',
                                               factor=0.5,
                                               patience=5,
                                               verbose=1,
                                               mode='auto',
                                               epsilon=0.0001,
                                               cooldown=1,
                                               min_lr=0)

save_model = ModelCheckpoint('D:/Projects/iSynPro/iSynPro/HilbertCNN/weights/weights-{epoch:02d}-{val_loss:.2f}.hdf5',
                                             monitor='val_loss',
                                             verbose=1, 
                                             save_best_only=True,
                                             save_weights_only=False,
                                             mode='auto',
                                             period=1)

csv_path = 'D:/Projects/iSynPro/iSynPro/HilbertCNN/weights/training_history.csv'
csv_logger = CSVLogger(csv_path, separator=',', append=False)

def incep_resnet_schedule(epoch):
    if epoch % 2 == 0:
        return 0.045*(0.96**(epoch))
    else:
        return 0.045*(0.96**((epoch)-1.0))

lr_scheduler = LearningRateScheduler(incep_resnet_schedule)

#tracking = keras.callbacks.ProgbarLogger(count_mode='samples')

#train the model
print('Beginning Training for 100 Epochs...')
model.fit_generator(train_generator, 
                                   steps_per_epoch= train_size // batch_size,
                                   epochs=100, 
                                   validation_data= validation_generator, 
                                   validation_steps= test_size // batch_size,
                                   verbose=2,
                                   callbacks = [save_model, csv_logger])

Beginning Training for 100 Epochs...
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.54609, saving model to D:/Projects/iSynPro/iSynPro/HilbertCNN/weights/weights-01-0.55.hdf5
 - 1368s - loss: 0.7798 - binary_accuracy: 0.6906 - val_loss: 0.5461 - val_binary_accuracy: 0.7418
Epoch 2/100

Epoch 00002: val_loss improved from 0.54609 to 0.51950, saving model to D:/Projects/iSynPro/iSynPro/HilbertCNN/weights/weights-02-0.52.hdf5
 - 1291s - loss: 0.5525 - binary_accuracy: 0.7271 - val_loss: 0.5195 - val_binary_accuracy: 0.7411
Epoch 3/100

Epoch 00003: val_loss did not improve
 - 1290s - loss: 0.5356 - binary_accuracy: 0.7285 - val_loss: 0.5231 - val_binary_accuracy: 0.7303
Epoch 4/100

Epoch 00004: val_loss did not improve
 - 1322s - loss: 0.5350 - binary_accuracy: 0.7382 - val_loss: 0.5450 - val_binary_accuracy: 0.7437
Epoch 5/100

Epoch 00005: val_loss did not improve
 - 1324s - loss: 0.5244 - binary_accuracy: 0.7408 - val_loss: 0.5271 - val_binary_accuracy: 0.7437
Epoch 6/100



KeyboardInterrupt: 