In [1]:
import csv
import keras
import numpy as np
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras import backend as K

from keras.initializers import he_normal
from keras.layers import Dense, Input, add, Activation, Lambda, concatenate, Dropout
from keras.layers import Conv2D, AveragePooling2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras import optimizers, regularizers
from keras.callbacks import LearningRateScheduler, TensorBoard
from keras.callbacks import ModelCheckpoint
import os
import pandas as pd
from tensorflow.python.keras.utils.data_utils import Sequence
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
image_size=224
growth_rate=12
growth_rate_121=32
depth = 10
compression= 0.5
img_rows, img_cols = 224,224
img_channels = 1
num_classes= 6
batch_size=25        
epochs=10
iterations =10000
weight_decay= 1e-4
data_path= "C:/Users/xiang/Desktop/rsna-intracranial-hemorrhage-detection-224"
images_path=f'{data_path}/stage_2_train_images'
csv_path=f'{data_path}/stage_2_train.csv'

In [3]:
def conv(x, out_filters, k_size):
    return Conv2D(filters=out_filters,
                  kernel_size=k_size,
                  strides=(1,1),
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=regularizers.l2(weight_decay),
                  use_bias=False)(x)

def dense_layer(x):
    return Dense(units=num_classes,
                 activation='sigmoid',
                 kernel_initializer='he_normal',
                 kernel_regularizer=regularizers.l2(weight_decay))(x)

def bn_relu(x):
    x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x)
    x = Activation('relu')(x)
    return x

def bottleneck(x):
    channels = growth_rate * 4
    x = bn_relu(x)
    x = conv(x, channels, (1,1)) # 48
    x = bn_relu(x)
    x = conv(x, growth_rate, (3,3)) # 12
    return x

# feature map size and channels half
def transition(x, inchannels):
    outchannels = int(inchannels * compression)
    x = bn_relu(x)
    x = conv(x, outchannels, (1,1))
    x = AveragePooling2D((2,2), strides=(2, 2))(x)
    return x, outchannels

def dense_block(x,blocks,nchannels):
    concat = x
    for i in range(blocks):
        x = bottleneck(concat)
        concat = concatenate([x,concat], axis=-1)
        nchannels += growth_rate
    return concat, nchannels


def densenet(img_input, classes_num):
    nblocks = (depth - 4) // 6  # 16
    nchannels = growth_rate * 2  # 12*2 = 24

    x = conv(img_input, nchannels, (3, 3))  # 32*32*3 to 32*32*24
    # 32*32*24 to 32*32*(24+nblocks*growth_rate) = 24+16*12 = 216
    x, nchannels = dense_block(x, nblocks, nchannels)  # 32*32*24 to 32*32*216
    x, nchannels = transition(x, nchannels)  # 32*32*216 to 16*16*108

    x, nchannels = dense_block(x, nblocks, nchannels)  # 16*16*108 to 16*16*（108+16*12） = 16*16*300
    x, nchannels = transition(x, nchannels)  # 16*16*300 to 8*8*150

    x, nchannels = dense_block(x, nblocks, nchannels)  # 8*8*150 to 8*8*(150+16*12) = 8*8*342
    x = bn_relu(x)
    x = GlobalAveragePooling2D()(x)  # 8*8*342 to 342
    # x=Dropout(0.5)
    x = dense_layer(x)  # 342 to 10
    return x


def densenet_121(img_input, classes_num):
    nchannels = growth_rate_121 * 2
    x = conv(img_input, nchannels, (3, 3))
    x, nchannels = dense_block(x, 6, nchannels)
    x, nchannels = transition(x, nchannels)
    x, nchannels = dense_block(x, 12, nchannels)
    x, nchannels = transition(x, nchannels)
    x, nchannels = dense_block(x, 24, nchannels)
    x, nchannels = transition(x, nchannels)
    x, nchannels = dense_block(x, 16, nchannels)
    x = bn_relu(x)
    x = GlobalAveragePooling2D()(x)
    x = dense_layer(x)  # 342 to 10
    return x

In [4]:
def shuffle_data(x):
    rand_indices=np.random.permutation(x.shape[0])
    train_indices=rand_indices[0:750000]
    vali_indices=rand_indices[750000:755940]
    return train_indices,vali_indices

def scheduler(epoch):
    if epoch < 25:
        return 0.1
    if epoch < 40:
        return 0.01
    return 0.001

In [5]:
import augmentation as aug

In [3]:
data_path_image=f'{data_path}/stage_2_train_images'
data_path_csv=f'{data_path}/stage_2_train.csv'

In [4]:
image_index=os.listdir(f'{data_path}/stage_2_train_images')
image_index=pd.DataFrame(image_index)
image_index=image_index[0].str.slice(stop=12).values

In [8]:
gener=aug.DataGenerator(data_path,images_path=f'{data_path}/stage_2_train_images',csv_path=f'{data_path}/stage_2_train.csv',
                    num_thread=None,is_train=True)

tr_indices,vali_indices=shuffle_data(image_index)

In [9]:
img_input = Input(shape=(img_rows, img_cols, img_channels))
sgd = optimizers.SGD(lr=.1, momentum=0.9, nesterov=True)
output = densenet(img_input,num_classes)

model = Model(img_input, output)

In [10]:
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['binary_accuracy'])
print(model.summary())

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 224, 224, 24) 216         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 224, 224, 24) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 224, 224, 24) 0           batch_normalization_1[0][0]     

In [5]:
test_image_path=f'{data_path}/stage_2_test_images'
test_image_index=os.listdir(f'{data_path}/stage_2_test_images')
test_image_index=pd.DataFrame(test_image_index)
test_image_index=test_image_index[0].str.slice(stop=12).values

print(len(test_image_index))

121232


In [12]:
from keras.callbacks import ReduceLROnPlateau
# change_lr = LearningRateScheduler(scheduler)
# cbks =[change_lr]
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=10, min_lr=0.001)


In [13]:
checkpointer=ModelCheckpoint(filepath=os.path.join(data_path,'chack_point_weight.hdf5'),save_best_only=True,save_weights_only=True)
model.save_weights(os.path.join(data_path,'my_model_weights.h5'))
model.save(os.path.join(data_path,'my_model.h5'))

In [14]:
# model.load_weights(os.path.join(data_path,'my_model_weights.h5'))

In [15]:
history=model.fit_generator(generator=gener.get_X_batch(image_index[tr_indices],batch_size,image_size),steps_per_epoch=iterations,epochs=epochs,callbacks=[reduce_lr],
                            validation_data=gener.get_X_batch(image_index[vali_indices],20,image_size),validation_steps=int(len(vali_indices)//20),class_weight={0:2.,1:1.,2:1.,3:1.,4:1.,5:1.})
print(history)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
<keras.callbacks.callbacks.History object at 0x00000279626399E8>


In [6]:

test_image_path=f'{data_path}/stage_2_test_images'
test_image_index=os.listdir(f'{data_path}/stage_2_test_images')
test_image_index=pd.DataFrame(test_image_index)
test_image_index=test_image_index[0].str.slice(stop=12).values




In [7]:
from concurrent.futures import ThreadPoolExecutor as threadPool
from PIL import Image
def load_image(temp_path):
        im = np.array(Image.open(test_image_path+'/'+temp_path+'.png'))
#         print(im)
        return im
        
def get_img(img_paths, img_size):
        p = threadPool()
        X = np.zeros((len(img_paths), img_size, img_size), dtype=np.uint8)
        i = 0
        for future in p.map(load_image, img_paths):
            img = np.resize(future, (img_size, img_size))
#             print(img)
            X[i, :, :] = img
            
            i += 1
        p.shutdown(wait=False)
        return X

In [8]:
def get_test_batch(X_path,batch_size,img_size):
        try:
            if len(X_path) %batch_size != 0:
                raise Exception("batchSize not match the size of data!")
        except Exception as err:
            print(err)
        else:
            while 1:
                for i in range(0, len(X_path), batch_size):
                    X =(get_img(X_path[i:i + batch_size], img_size))
                    yield np.resize(X/255,(len(X),img_size,img_size,1)).astype('float16')


In [54]:
test_gener=get_test_batch(test_image_index,16,224)
print(test_image_index)

['ID_000000e27' 'ID_000009146' 'ID_00007b8cb' ... 'ID_fffd3cc3b'
 'ID_fffdcca96' 'ID_fffe2c218']


In [55]:
submission_result=model.predict_generator(generator=test_gener,steps=len(test_image_index)//16)
print(submission_result)

[[0.7766801  0.02606633 0.13395101 0.03158039 0.12054983 0.48744044]
 [0.08083221 0.01031113 0.02740192 0.00719813 0.06208894 0.072341  ]
 [0.41532925 0.01307261 0.04345772 0.00810015 0.05818757 0.30265513]
 ...
 [0.7963841  0.02802125 0.08093587 0.00282025 0.0982089  0.6880413 ]
 [0.5264583  0.01803511 0.05653527 0.00410849 0.07529265 0.4022559 ]
 [0.5947831  0.01987314 0.06191966 0.00568101 0.09493762 0.44380865]]


In [74]:
def evaluation(prob):        
    return max(min(prob,1-pow(10,-15)),pow(10,-15))


0.999999999999999
1e-15


0.7766801

In [75]:
for i in range(len(submission_result)):
    for j in range(len(submission_result[i])):
        submission_result[i,j]=evaluation(submission_result[i,j])

In [76]:
print(submission_result)

[[0.7766801  0.02606633 0.13395101 0.03158039 0.12054983 0.48744044]
 [0.08083221 0.01031113 0.02740192 0.00719813 0.06208894 0.072341  ]
 [0.41532925 0.01307261 0.04345772 0.00810015 0.05818757 0.30265513]
 ...
 [0.7963841  0.02802125 0.08093587 0.00282025 0.0982089  0.6880413 ]
 [0.5264583  0.01803511 0.05653527 0.00410849 0.07529265 0.4022559 ]
 [0.5947831  0.01987314 0.06191966 0.00568101 0.09493762 0.44380865]]


In [10]:
def read_csv(filename):
            df = pd.read_csv(filename)
            df["ImageID"] = df["ID"].str.slice(stop=12)
            df["Diagnosis"] = df["ID"].str.slice(start=13)
            df = df.loc[:, ["Label", "Diagnosis", "ImageID"]]
            df = df.set_index(['ImageID', 'Diagnosis']).unstack(level=-1)
            return df

In [14]:

submission_csv=read_csv('C:/Users/xiang/Desktop/stage_2_sample_submission_densnet.csv')
submission_csv.head()

Unnamed: 0_level_0,Label,Label,Label,Label,Label,Label
Diagnosis,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
ImageID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
ID_000000e27,0.77668,0.026066,0.133951,0.03158,0.12055,0.48744
ID_000009146,0.080832,0.010311,0.027402,0.007198,0.062089,0.072341
ID_00007b8cb,0.415329,0.013073,0.043458,0.0081,0.058188,0.302655
ID_000134952,0.323754,0.014197,0.059395,0.016381,0.068294,0.17145
ID_000176f2a,0.988932,0.063822,0.270169,0.049613,0.296801,0.886156


In [15]:
submission_result=submission_csv.values

In [16]:
print(submission_result)

[[0.7766801  0.02606633 0.13395101 0.03158039 0.12054983 0.48744044]
 [0.08083221 0.01031113 0.02740192 0.00719812 0.06208894 0.072341  ]
 [0.41532925 0.01307261 0.04345772 0.00810015 0.05818757 0.30265513]
 ...
 [0.7963841  0.02802125 0.08093587 0.00282025 0.0982089  0.6880413 ]
 [0.5264583  0.01803511 0.05653527 0.00410849 0.07529265 0.4022559 ]
 [0.5947831  0.01987314 0.06191966 0.00568101 0.09493762 0.44380865]]


In [17]:
print(test_image_index)

['ID_000000e27' 'ID_000009146' 'ID_00007b8cb' ... 'ID_fffd3cc3b'
 'ID_fffdcca96' 'ID_fffe2c218']


In [18]:
result_dict={}
for i in range(len(test_image_index)):
    result_dict[test_image_index[i]]=submission_result[i]

In [20]:
for i in result_dict.keys():
    print(result_dict[i])

[0.7766801  0.02606633 0.13395101 0.03158039 0.12054983 0.48744044]
[0.08083221 0.01031113 0.02740192 0.00719812 0.06208894 0.072341  ]
[0.41532925 0.01307261 0.04345772 0.00810015 0.05818757 0.30265513]
[0.323754   0.01419672 0.0593949  0.01638055 0.06829441 0.17144993]
[0.98893166 0.06382236 0.2701692  0.04961339 0.29680073 0.88615644]
[0.87624264 0.02822402 0.10455278 0.00643077 0.09907252 0.7168857 ]
[0.8346267  0.02482218 0.14103046 0.0231784  0.17118335 0.43569207]
[0.9574548  0.02757072 0.31739378 0.26779237 0.37106565 0.40899315]
[0.4939605  0.02123806 0.04992828 0.0172019  0.1078046  0.2974741 ]
[0.9724548  0.04747632 0.28591216 0.08628377 0.31979993 0.68919325]
[0.96977156 0.02383578 0.4248146  0.23056573 0.29951626 0.31245524]
[0.10756445 0.0062893  0.03184778 0.00280842 0.04480872 0.07739168]
[0.3079239  0.01098734 0.03335944 0.02337071 0.05162227 0.19118309]
[0.09482089 0.01274905 0.03183502 0.00782874 0.07308564 0.07652643]
[0.9332649  0.03124583 0.14910564 0.05838677 0.2

In [21]:
for i in submission_csv.index:
    submission_csv.loc[i]=result_dict[i]

In [22]:
submission_csv.head()

Unnamed: 0_level_0,Label,Label,Label,Label,Label,Label
Diagnosis,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
ImageID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
ID_000000e27,0.77668,0.026066,0.133951,0.03158,0.12055,0.48744
ID_000009146,0.080832,0.010311,0.027402,0.007198,0.062089,0.072341
ID_00007b8cb,0.415329,0.013073,0.043458,0.0081,0.058188,0.302655
ID_000134952,0.323754,0.014197,0.059395,0.016381,0.068294,0.17145
ID_000176f2a,0.988932,0.063822,0.270169,0.049613,0.296801,0.886156


In [23]:
submission_csv = submission_csv.stack().reset_index()
submission_csv.insert(loc=0, column='ID', value=submission_csv['ImageID'].astype(str) + "_" + submission_csv['Diagnosis'])
submission_csv= submission_csv.drop(["ImageID", "Diagnosis"], axis=1)
submission_csv.to_csv('stage_2_sample_submission.csv', index=False)
submission_csv.head()

Unnamed: 0,ID,Label
0,ID_000000e27_any,0.77668
1,ID_000000e27_epidural,0.026066
2,ID_000000e27_intraparenchymal,0.133951
3,ID_000000e27_intraventricular,0.03158
4,ID_000000e27_subarachnoid,0.12055


In [88]:
submission_csv.iloc[:, :] =submission_result

print(submission_csv.head())

                 Label                                              \
Diagnosis          any  epidural intraparenchymal intraventricular   
ImageID                                                              
ID_000000e27  0.776680  0.026066         0.133951         0.031580   
ID_000009146  0.080832  0.010311         0.027402         0.007198   
ID_00007b8cb  0.415329  0.013073         0.043458         0.008100   
ID_000134952  0.323754  0.014197         0.059395         0.016381   
ID_000176f2a  0.988932  0.063822         0.270169         0.049613   

                                     
Diagnosis    subarachnoid  subdural  
ImageID                              
ID_000000e27     0.120550  0.487440  
ID_000009146     0.062089  0.072341  
ID_00007b8cb     0.058188  0.302655  
ID_000134952     0.068294  0.171450  
ID_000176f2a     0.296801  0.886156  


In [89]:
submission_csv = submission_csv.stack().reset_index()


submission_csv.head()

Unnamed: 0,ImageID,Diagnosis,Label
0,ID_000000e27,any,0.77668
1,ID_000000e27,epidural,0.026066
2,ID_000000e27,intraparenchymal,0.133951
3,ID_000000e27,intraventricular,0.03158
4,ID_000000e27,subarachnoid,0.12055


In [90]:
submission_csv.insert(loc=0, column='ID', value=submission_csv['ImageID'].astype(str) + "_" + submission_csv['Diagnosis'])
submission_csv.head()

Unnamed: 0,ID,ImageID,Diagnosis,Label
0,ID_000000e27_any,ID_000000e27,any,0.77668
1,ID_000000e27_epidural,ID_000000e27,epidural,0.026066
2,ID_000000e27_intraparenchymal,ID_000000e27,intraparenchymal,0.133951
3,ID_000000e27_intraventricular,ID_000000e27,intraventricular,0.03158
4,ID_000000e27_subarachnoid,ID_000000e27,subarachnoid,0.12055


In [91]:
submission_csv= submission_csv.drop(["ImageID", "Diagnosis"], axis=1)
submission_csv.to_csv('stage_2_sample_submission.csv', index=False)


In [92]:
submission_csv.head()

Unnamed: 0,ID,Label
0,ID_000000e27_any,0.77668
1,ID_000000e27_epidural,0.026066
2,ID_000000e27_intraparenchymal,0.133951
3,ID_000000e27_intraventricular,0.03158
4,ID_000000e27_subarachnoid,0.12055


In [None]:
new_csv=read_csv('C:/Users/xiang/Desktop/rsna-intracranial-hemorrhage-detection-224/stage_2_sample_submission.csv')
print(len(new_csv.index))
print(new_csv.head())