In [1]:
from neural_network.nn_manager.TrainManager import TrainManager
from neural_network.store.DBNNSave import DBNNSave
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Reshape, Input, Conv2D, BatchNormalization
from keras.layers.convolutional import Convolution1D, Convolution2D, MaxPooling2D
import keras.backend as K
from random import shuffle
import h5py
import numpy as np
import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
np.random.seed(7)

Using TensorFlow backend.


In [2]:
class retinaNN(TrainManager):
    def __init__(self):
        self.path_to_data = './size_100o_one_img.hdf5'
        self.prepare_data(self.path_to_data)
        self.batch_size = 64
        self.epochs = 20
        self.example_images = []
        super(retinaNN, self).__init__()
        
    def prepare_data(self, path):
        hdf5_file = h5py.File(path, 'r')
        self.get_handlers(hdf5_file)
        self.load_sizes()
        
    def load_sizes(self):
        self.img_size_1 = self.X_train.shape[1]
        self.img_size_2 = self.X_train.shape[2]
        self.outputs_size = self.Y_train.shape[1]
        self.num_train_samples = self.X_train.shape[0]
        self.num_val_samples = self.X_val.shape[0]
        self.num_test_samples = self.X_test.shape[0]
    
    def get_handlers(self, file):
        self.X_train = file['train_data_x']
        self.Y_train = file['train_data_y']
        self.X_val = file['val_data_x']
        self.Y_val = file['val_data_y']
        self.X_test = file['test_data_x']
        self.Y_test = file['test_data_y']
    
    def store_method(self):
        return DBNNSave()

    def train_data_generator(self):
        generator = self._generator(self.X_train, self.Y_train)
        return generator

    def test_data_generator(self):
        generator = self._generator(self.X_val, self.Y_val)
        return generator

    def create_model(self):
        input_image = Input(shape=(self.img_size_1, self.img_size_2, 1))

        layer = Conv2D(filters=32, kernel_size=(3, 3))(input_image)
        layer = BatchNormalization(axis=1)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling2D(pool_size=(2, 2))(layer)
        
        layer = Conv2D(filters=32, kernel_size=(3, 3))(input_image)
        layer = BatchNormalization(axis=1)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling2D(pool_size=(2, 2))(layer)
        
        layer = Flatten()(layer)

        layer = Dense(self.outputs_size)(layer)
        layer = BatchNormalization(axis=1)(layer)
        output_layer = Activation('sigmoid')(layer)
        model = Model(inputs=input_image, outputs=output_layer)
        model.compile( optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=[self.f1_score, self.precision, self.recall])
        return model

    def f1_score(self, y_true, y_pred):
        c1 = self.get_true_positive(y_true,y_pred)
        c2 = self.get_positive_pred(y_pred)
        c3 = self.get_positive_true(y_true)

        if c3 == 0:
            return 0

        precision = c1 / c2
        recall = c1 / c3
        f1_score = 2 * (precision * recall) / (precision + recall)
        return f1_score
    
    def precision(self,y_true,y_pred):
        c1 = self.get_true_positive(y_true,y_pred)
        c2 = self.get_positive_pred(y_pred)
        return c1/c2
    
    def recall(self,y_true,y_pred):
        c1 = self.get_true_positive(y_true,y_pred)
        c3 = self.get_positive_true(y_true)
        return c1/c3
    
    def get_true_positive(self,y_true,y_pred):
        return K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    
    def get_positive_pred(self,y_pred):
        return K.sum(K.round(K.clip(y_pred, 0, 1)))
    
    def get_positive_true(self,y_true):
        return K.sum(K.round(K.clip(y_true, 0, 1)))
    
    def _generator(self,X,Y):       
        while 1:
            batch_s = self.batch_size
            for i in range(X.shape[0] // self.batch_size):
                x_part = X[i*batch_s: (i+1)*batch_s]
                y_part = Y[i*batch_s: (i+1)*batch_s]
                if np.random.random_sample() < 0.1:
                    self.example_images.append(x_part[0])
                yield x_part, y_part
        
    def train(self):
        self.train_model(
            self.num_train_samples // self.batch_size,
            self.num_val_samples // self.batch_size,
            epochs=self.epochs
        )

In [3]:
retina = retinaNN()
retina.train()

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [29]:
from sklearn.metrics import roc_curve, auc

def change_y_to_words(y):
    hdf5_file = h5py.File(retina.path_to_data, 'r')
    n_gram = hdf5_file.attrs['n_gram']
    predicted_words = []
    print(n_gram)
    for i in range(len(y)):
        if y[i] > 0.5:
            predicted_words.append(tuple(n_gram[i]))
    return predicted_words

def get_i_word(i):
    hdf5_file = h5py.File(retina.path_to_data, 'r')
    n_gram = hdf5_file.attrs['n_gram']
    return n_gram[i]

def get_model_quality(y_true,y_pred):
    c1 = sum(np.around(y_true*y_pred)) #TP
    c2 = sum(np.around(y_pred))
    c3 = sum(np.around(y_true))
    precision = c1/c2
    recall = c1 / c3
    f1_score = 2 * (precision * recall) / (precision + recall)
    return precision, recall, f1_score

w = len(retina.Y_train[0])
h = retina.Y_train.shape[0]
pred_y = [[0 for i in range(h)] for j in range(w)]
true_y = [[0 for i in range(h)] for j in range(w)]
print(len(pred_y))
def test_model():
    for i in range(10):
        x_test = retina.X_train[i:i+1]
        y_test = retina.Y_train[i]
        y_score = retina.model.predict(x_test)
        c = []
        for j in range(len(y_test)):
            pred_y[j][i] = y_test[j]
            true_y[j][i] = y_score[0][j]
            if y_score[0][j] > 0.5:
                c.append(1)
            else:
                c.append(0)
        #count = 0
        #for e in c:
        #    if(e == 1):
        #        count = count + 1
        #print(count)
        #count = 0
        #for e in y_test:
        #    if(e == 1):
        #        count = count + 1
        #print(count)
        #print(y_test)
        #print(np.array(c))
        #print()
        precision, recall, f1_score = get_model_quality(y_test,y_score[0])
        print(precision, recall, f1_score)
        #print(change_y_to_words(y_score[0]))
    
    aucs = []
    for i in range(len(pred_y)):
        roc_auc = generate_results(pred_y[i],true_y[i],i)
        aucs.append(roc_auc)
    
    print(np.nanmean(aucs))
test_model()

1000
0.235294117647 0.903225806452 0.373333333333
0.0 nan nan
0.0212765957447 1.0 0.0416666666667
0.168224299065



 0.947368421053 0.285714285714
0.188679245283 0.869565217391 0.31007751938
0.227722772277 0.958333333333 0.368
0.154545454545 0.944444444444 0.265625
0.0802919708029 0.916666666667 0.147651006711
0.196721311475 0.96 0.326530612245
0.101851851852 0.916666666667 0.183333333333
['faza' 'późny']
['faza' 'wczesny']
['obraz' 'angiograficzny']
['późny' 'obraz']
['ukazować' 'faza']
['wszyscy' 'faza']
['hiperfluorescencja' 'odpowiadać']
['biegun' 'tylny']
['obrzęk' 'siatkówka']
['ognisko' 'hiperfluorescencja']




['ognisko' 'hipofluorescencja']
['wynikać' 'obecność']
['hipofluorescencja' 'odpowiadać']
['koniec' 'badanie']
['naczynia' 'siatkówka']
['laseroterapia' 'siatkówka']
['faza' 'kolejny']
['badanie' 'soct']
['brak' 'cech']
['widoczny' 'plamisty']
['wskazany' 'monitorować']
['odpowiadać' 'mikroaneuryzmaty']
['dołek' 'plamka']
['korelacja' 'obraz']
['norma' 'wiekowy']
['obraz' 'af']
['strefa' 'awaskularne']
['wskazany' 'korelacja']
['granica' 'norma']
['warstwa' 'barwnikowy']
['badanie' 'obraz']
['monitorować' 'zmiana']
['wypełnienie' 'naczynia']
['zmiana' 'badanie']
['wypełniać' 'naczynia']
['ubytek' 'rpe']
['kontrola' 'okulistyczny']
['wyrównanie' 'cukrzyca']
['obwód' 'siatkówka']
['siatkówka' 'faza']
['neowaskularyzacja' 'podsiatkówkowa']
['widoczny' 'zlokalizować']
['hiperfluorescencja' 'zlokalizować']
['tło' 'naczyniówkowy']
['stała' 'charakter']
['faza' 'żylny']
['uniesienie' 'siatkówka']
['widoczny' 'hiperfluorescencja']
['zmiana' 'naczyniowy']
['pomoc' 'soct']
['hiperfluorescencja' 

In [5]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
def generate_results(y_test, y_score,i, plot=False):
    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    if plot:
        plt.figure()
        plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.05])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic curve')
        plt.show()
    if roc_auc > 0.7:
        print(get_i_word(i))
    #print('AUC: %f' % roc_auc)
    return roc_auc


In [33]:
import PIL
for img in retina.example_images[0:1]:
    img = img.reshape(74,100,3)
    
    #restore = lambda x: (x+3)*255
    #print(restore(img[0]))
    img = PIL.Image.fromarray(img[0])
    img.show()
    

In [27]:
hdf5_file = h5py.File(retina.path_to_data, 'r')
n_gram = hdf5_file.attrs['n_gram']
for g in n_gram:
    print(g)

['faza' 'późny']
['faza' 'wczesny']
['obraz' 'angiograficzny']
['późny' 'obraz']
['ukazować' 'faza']
['wszyscy' 'faza']
['faza' 'mieszany']
['hiperfluorescencja' 'odpowiadać']
['biegun' 'tylny']
['obrzęk' 'siatkówka']
['ognisko' 'hiperfluorescencja']
['faza' 'angiogram']
['powodować' 'faza']
['angiograficzny' 'odpowiadać']
['angiogram' 'widoczny']
['ognisko' 'hipofluorescencja']
['wynikać' 'obecność']
['plamisty' 'hiperfluorescencja']
['odpowiadać' 'obecność']
['hipofluorescencja' 'odpowiadać']
['koniec' 'badanie']
['hiperfluorescencja' 'wynikać']
['naczynia' 'siatkówka']
['narastać' 'powodować']
['laseroterapia' 'siatkówka']
['faza' 'kolejny']
['intensywność' 'zmiana']
['badanie' 'soct']
['obraz' 'odpowiadać']
['brak' 'cech']
['nabłonek' 'barwnikowy']
['wczesny' 'widoczny']
['maskowanie' 'tło']
['zmiana' 'narastać']
['widoczny' 'plamisty']
['terapia' 'doszklistkowa']
['wskazany' 'monitorować']
['odpowiadać' 'mikroaneuryzmaty']
['rozpocząć' 'laseroterapia']
['dołek' 'plamka']
['amd' 'p

['dolny' 'widoczny']
['pozaangiogram' 'zakres']
['cnv' 'amd']
['hiperfluorescencja' 'powiększać']
['błona' 'przedsiatkówkowa']
['terapia' 'anta']
['mikroaneuryzmaty' 'obszar']
['wskazany' 'parametr']
['krwotoczki' 'wysięk']
['liczny' 'plamisty']
['ukryć' 'błona']
['rpe' 'ognisko']
['cukrzycowy' 'oba']
['odpowiadać' 'może']
['pilny' 'panfotokoagulacja']
['zmiana' 'podobny']
['górny' 'skroniowy']
['praktyczny' 'jednooczność']
['wzgląd' 'praktyczny']
['włóknisty' 'przebieg']
['druz' 'miękki']
['płatkowaty' 'hiperfluorescencja']
['angiograficzny' 'potwierdzać']
['czytelność' 'angiogram']
['awaskularne' 'wskazany']
['plamisty' 'ognisko']
['odpowiadać' 'csr']
['powiększać' 'faza']
['widoczny' 'zmiana']
['siatkówka' 'częściowy']
['żylny' 'siatkówka']
['barwnik' 'faza']
['widoczny' 'drobny']
['wykluczyć' 'obecność']
['wybroczyna' 'faza']
['pozaangiogram' 'prawidłowy']
['awaskularne' 'intensywność']
['narastać' 'koniec']
['łączny' 'analiza']
['widoczny' 'rozległy']
['otoczyć' 'hiperfluorescencj