In [None]:
import os, sys
import random
import math
import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import json
import pydicom
import skimage.io
from imgaug import augmenters as iaa
from tqdm import tqdm
import pandas as pd 
import glob 
from skimage.transform import resize

import keras

In [None]:
DATA_DIR = '../rsna-pneumonia-detection-challenge/'
ROOT_DIR = '../'

### Get annotation main dataset

In [None]:
train_dicom_dir = os.path.join(DATA_DIR, 'stage_1_train_images')
test_dicom_dir = os.path.join(DATA_DIR, 'stage_1_test_images')

def parse_dataset(anns): 
    image_annotations = []
    for index, row in anns.iterrows():
        row['path'] = os.path.join(train_dicom_dir, row['patientId']+'.dcm')
        image_annotations.append(row)
    return image_annotations 

# training dataset
anns = pd.read_csv(os.path.join(DATA_DIR, 'stage_1_train_labels.csv'))
image_annotations = parse_dataset(anns=anns)

### Get  annotation second dataset

In [None]:
data = pd.read_csv('../data/Data_Entry_2017.csv')
data = data[~data['Finding Labels'].str.contains('Pneumonia', regex=False)]
fnames = np.array(data['Image Index'])

paths_to_files_without_pneumonia = []
path_to_images = '../data/images_{part}/images'
for part in ('001','002','003','004','005','006','007','008','009','010','011','012'):
    path_to_part = path_to_images.replace('{part}', part)
    names_in_dir = np.array(next(os.walk(path_to_part))[2])
    paths_to_files_without_pneumonia += [os.path.join(path_to_part, fname) for fname in names_in_dir]
    
second_dataset = []
for path in paths_to_files_without_pneumonia:
    img_info = pd.Series({
            'x':float('Nan'), 'y':float('Nan'), 
            'width':float('Nan'), 'height':float('Nan'), 
            'Target':0, 
            'path':path,
            'patientId':path.split('/')[-1].split('.')[0]})
    second_dataset.append(img_info)

In [None]:
merge_dataset = image_annotations + second_dataset
len(merge_dataset)

### Create dataget

In [None]:
class train_data_generator:
    
    def create(dataset_info, batch_size, shape, border=128):
        while True:
                        
            batch_info = [dataset_info[i] for i in np.random.choice(len(dataset_info), batch_size)]
            batch_crop_image = np.empty((batch_size, shape[0], shape[1], shape[2]))
            batch_labels = np.zeros((batch_size,1))
            for i, e in enumerate(batch_info):
                
                #get fullsize image
                fp = e['path']
                full_image = None
                if fp.split('.')[-1] == 'dcm':
                    ds = pydicom.read_file(fp)
                    full_image = ds.pixel_array
                if fp.split('.')[-1] == 'jpeg' or fp.split('.')[-1] == 'png':
                    full_image = skimage.io.imread(fp)
                
                # crop image
                if not e['Target']:
                    x = np.random.randint(border, full_image.shape[0]-shape[0]-border)
                    y = np.random.randint(border, full_image.shape[1]-shape[1]-border)
                else:
                    x = int(e['x'] + (e['width']/2) - (shape[0]/2))
                    y = int(e['y'] + (e['height']/2) - (shape[1]/2))
                    batch_labels[i] = 1
                    
                crop_image = full_image[y:y+shape[0], x:x+shape[1]].copy()
                
                if np.random.uniform(0,1) > 0.5:
                    crop_image = np.fliplr(crop_image)
                
                if crop_image.shape[:2] != (shape[0], shape[1]):
                    break
                
                # If grayscale. Convert to RGB for consistency.
                if len(crop_image.shape) == 3:
                    if crop_image.shape[2] > 3:
                        crop_image = crop_image[:,:,0]
                if len(crop_image.shape) != 3 or crop_image.shape[2] != 3:
                    crop_image = np.stack((crop_image,) * 3, -1)
                batch_crop_image[i] = crop_image
            yield batch_crop_image/255, batch_labels

### Create model

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.callbacks import ModelCheckpoint
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import keras

def create_model(input_shape, n_out):
    pretrain_model = InceptionResNetV2(include_top=False, weights='imagenet', input_shape=input_shape)
    model = Sequential()
    model.add(pretrain_model)
    model.add(Flatten())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1024))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_out))
    model.add(Activation('sigmoid'))
    return model

In [None]:
keras.backend.clear_session()

if os.path.exists('../keras.model'):
    model = load_model('../keras.model')
elif os.path.exists('../inceptionresnetv2-classificator/keras.model'):
    model = load_model('../inceptionresnetv2-classificator/keras.model')
else:
    model = create_model(
        input_shape=(299,299,3),
        n_out=1)

model.layers[0].trainable = False

model.compile(
    loss='binary_crossentropy', 
    optimizer=Adam(0.000001), 
    metrics=['acc'])

model.summary()

In [None]:
epochs = 50; batch_size = 16
checkpointer = ModelCheckpoint(
    '../keras.model', 
    verbose=2, 
    save_best_only=True)

train_generator = train_data_generator.create(
    merge_dataset, batch_size, (299,299,3))
validation_generator = train_data_generator.create(
    image_annotations, 100, (299,299,3))

history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    validation_data=next(validation_generator),
    epochs=epochs, 
    verbose=1,
    callbacks=[checkpointer])

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15,5))
ax[0].set_title('loss')
ax[0].plot(history.epoch, history.history["loss"], label="Train loss")
ax[0].plot(history.epoch, history.history["val_loss"], label="Validation loss")
ax[1].set_title('acc')
ax[1].plot(history.epoch, history.history["acc"], label="Train acc")
ax[1].plot(history.epoch, history.history["val_acc"], label="Validation acc")
ax[0].legend()
ax[1].legend()

In [None]:
if os.path.exists('../keras.model'):
    model = load_model('../keras.model')
elif os.path.exists('../inceptionresnetv2-classificator/keras.model'):
    model = load_model('../inceptionresnetv2-classificator/keras.model')
    !cp "../inceptionresnetv2-classificator/keras.model" "../"

In [None]:
validation_generator = train_data_generator.create(
    image_annotations, 100, (299,299,3))

In [None]:
true_labels = []; predict_labels = []; 
for i in range(50):
    imgs, labels = next(validation_generator)
    true_labels += [int(label) for label in labels]
    predict_labels += [float(predict) for predict in model.predict(imgs)]

In [None]:
true_labels = np.array(true_labels)
predict_labels = (np.array(predict_labels) > 0.5).astype(int)

In [None]:
from sklearn.metrics import confusion_matrix
pd.DataFrame(
    confusion_matrix(true_labels, predict_labels),
    columns=[['predict', 'predict'],['N', 'P']],
    index=[['true', 'true'],['N', 'P']])

In [None]:
from sklearn.metrics import f1_score
f1_score(true_labels, predict_labels, average='macro')