In [None]:
from neural_network.nn_manager.TrainManager import TrainManager
from neural_network.store.DBNNSave import DBNNSave
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Reshape, Input, Conv2D, BatchNormalization
from keras.layers.convolutional import Convolution1D, Convolution2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
import keras.backend as K
import keras
from random import shuffle
import h5py
import numpy as np
import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
np.random.seed(7)

In [None]:
class SequenceDetectionNN(TrainManager):
    def __init__(self):
        self.path_to_data = './sd-size-150.hdf5'
        self.prepare_data(self.path_to_data)
        self.batch_size = 64
        self.epochs = 20
        super(SequenceDetectionNN, self).__init__()
        
        self.datagen_train = ImageDataGenerator(
            vertical_flip=True,
            horizontal_flip=True,
            zoom_range=0.2,
            shear_range=0.2,
            rotation_range = 45
        )
        
        self.datagen_val = ImageDataGenerator(
        )
        
    def prepare_data(self, path):
        hdf5_file = h5py.File(path, 'r')
        self.get_handlers(hdf5_file)
        self.load_sizes()
        
    def load_sizes(self):
        self.img_size_1 = self.X_train.shape[1]
        self.img_size_2 = self.X_train.shape[2]
        self.outputs_size = self.Y_train.shape[1]
        self.num_train_samples = self.X_train.shape[0]
        self.num_val_samples = self.X_val.shape[0]
    
    def get_handlers(self, file):
        self.X_train = file['train_data_x']
        self.Y_train = file['train_data_y']
        self.X_val = file['val_data_x']
        self.Y_val = file['val_data_y']
    
    def store_method(self):
        return DBNNSave()

    def train_data_generator(self):
        generator = self._generator(self.X_train, self.Y_train, self.datagen_train)
        return generator

    def test_data_generator(self):
        generator = self._generator(self.X_val, self.Y_val, self.datagen_val)
        return generator
        
    def store_method(self):
        return DBNNSave()

    def create_model(self):
        input_image_1 = Input(shape=(self.img_size_1, self.img_size_2, 1))
        input_image_2 = Input(shape=(self.img_size_1, self.img_size_2, 1))
        merged_vector = keras.layers.concatenate([input_image_1, input_image_2], axis=-1)
        
        layer = Conv2D(filters=32, kernel_size=(3, 3))(merged_vector)
        layer = BatchNormalization(axis=1)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling2D(pool_size=(2, 2))(layer)
        
        layer = Conv2D(filters=32, kernel_size=(3, 3))(layer)
        layer = BatchNormalization(axis=1)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling2D(pool_size=(2, 2))(layer)
        
        layer = Conv2D(filters=32, kernel_size=(3, 3))(layer)
        layer = BatchNormalization(axis=1)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling2D(pool_size=(2, 2))(layer)
        
        layer = Flatten()(layer)
        
        layer = Dense(1024)(layer)
        layer = BatchNormalization(axis=1)(layer)
        layer = Activation('relu')(layer)
        
        layer = Dense(32)(layer)
        layer = BatchNormalization(axis=1)(layer)
        layer = Activation('relu')(layer)

        layer = Dense(1)(layer)
        layer = BatchNormalization(axis=1)(layer)
        output_layer = Activation('sigmoid')(layer)
        model = Model(inputs=[input_image_1, input_image_2], outputs=output_layer)
        model.compile( optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        return model
    
    
    def _generator(self,X,Y,datagen):
        X_1 = np.expand_dims(X[:,:,:,0],3); X_2 = np.expand_dims(X[:,:,:,1],3)
        batches_1 = datagen.flow(X_1,Y, batch_size=self.batch_size, shuffle = True, seed = 7)
        batches_2 = datagen.flow(X_2,Y, batch_size=self.batch_size, shuffle = True, seed = 7)

        while 1:
             for batch_1, batch_2 in zip(batches_1,batches_2):
                x1 = batch_1[0]
                x2 = batch_2[0]
                y = batch_1[1]
                yield [x1,x2], y

        
    def train(self):
        self.train_model(
            self.num_train_samples // self.batch_size,
            self.num_val_samples // self.batch_size,
            epochs=self.epochs
        )




In [None]:
sd = SequenceDetectionNN()
history = sd.train()

In [None]:
from neural_network.store.ModelLoader import load_weights_from_file
sd = SequenceDetectionNN()
nn = NeuralNetwork.objects.get(id=161)
sd.model = load_weights_from_file(model=sd.model, file_path=nn.weights.path)

In [None]:
from scipy.stats import kendalltau
import re
    
def get_tau(struct):
    sorted_struct = sorted(struct.items(), key=lambda x:x[1])
    order_predicted = []; order_original = []; i = 0
    for item in sorted_struct:
        key,value = item
        order_predicted.append(key)
        order_original.append(i)
        i = i + 1
    #print(order_predicted)
    tau, p_value = kendalltau(order_original, order_predicted)
    return tau
    
def predict_data(model, x_test):
    x_1 = np.expand_dims(x_test[:,:,:,0],3); x_2 = np.expand_dims(x_test[:,:,:,1],3)
    return model.predict([x_1,x_2])

def get_name(data):
    img_name = data.value
    return int(re.search(r'\d+', img_name).group())
    
def add_result(meta, seq_res, y_score):
    first = get_name(meta['first_name']); second = get_name(meta['second_name'])
    if first not in seq_res:
        seq_res[first] = 0
    if second not in seq_res:
        seq_res[second] = 0
    seq_res[first] += y_score
    seq_res[second] += (1-y_score)

def calc_tau(Meta, y_score):
    results = dict()
    for i in range(len(Meta)):
        meta = Meta[str(i)]
        series_id = meta['series'].value
        if series_id not in results:
            results[series_id] = dict()
        add_result(meta,results[series_id], y_score[i])
    
    mean_tau = []
    i = 0
    for item in results.items():
        k, v = item
        tau = get_tau(v)
        mean_tau.append(tau)
        print(k,tau)
        i = i + 1
    print(len(mean_tau))
    print(np.mean(mean_tau))
    
    
def test(nn):
    file = h5py.File(nn.path_to_data, 'r')
    X_test = file['test_data_x']; Y_test = file['test_data_y']; Meta = file['test_data_metadata']
    print(len(Meta))
    y_score = predict_data(nn.model,X_test)
    calc_tau(Meta, y_score)
    
    
test(sd)

In [None]:
file = h5py.File(sd.path_to_data, 'r')
X_test = file['test_data_x']; Y_test = file['test_data_y']; Meta = file['test_data_metadata']