In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

['train.csv', 'sample_submission.csv', 'test_images', 'train_images', 'test.csv']


In [2]:
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tqdm import tqdm
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Dense, PReLU, Dropout
from keras.models import Model
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard, EarlyStopping, ReduceLROnPlateau
from keras.optimizers import SGD, Adam
from keras.applications.xception import Xception
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import Callback, ModelCheckpoint
from keras.layers import GlobalAveragePooling2D
from sklearn.metrics import cohen_kappa_score, accuracy_score, classification_report
%matplotlib inline

Using TensorFlow backend.


In [3]:
train_data = pd.read_csv("../input/train.csv")
train_data.groupby(by='diagnosis').size()

diagnosis
0    1805
1     370
2     999
3     193
4     295
dtype: int64

In [4]:
train_data.head()

Unnamed: 0,id_code,diagnosis
0,000c1434d8d7,2
1,001639a390f0,4
2,0024cdab0c1e,1
3,002c21358ce6,0
4,005b95c28852,0


In [5]:
def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        return img

In [6]:
IMG_SIZE = 224
batch_size = 32
epochs = 10

In [7]:
def preprocess_image(img_path):
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image = cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , 30) ,-4 ,128)
    return image

In [8]:
data = []
label = []
for img_id, img_label in tqdm(train_data.values):
    img_path = os.path.join("../input/train_images",str(img_id)+".png")
    img = preprocess_image(img_path)
    data.append(img)
    label.append(int(img_label))

100%|██████████| 3662/3662 [14:34<00:00,  3.78it/s]


In [9]:
data = np.array(data).astype('float32')/255
label = np.array(label)
label = to_categorical(label)
print("data.shape : ",data.shape)
print("label.shape : ",label.shape)

X_train, X_test, y_train, y_test = train_test_split(data, label,test_size=0.15,random_state=2019)
print("X_train.shape : ",X_train.shape)
print("y_train.shape : ",y_train.shape)
print("X_test.shape : ",X_test.shape)
print("y_test.shape : ",y_test.shape)

data.shape :  (3662, 224, 224, 3)
label.shape :  (3662, 5)
X_train.shape :  (3112, 224, 224, 3)
y_train.shape :  (3112, 5)
X_test.shape :  (550, 224, 224, 3)
y_test.shape :  (550, 5)


In [10]:
train_datagen = ImageDataGenerator(
        zoom_range=0.15,
        fill_mode='constant',
        cval=0.,  
        rotation_range=120,
        horizontal_flip=True,  
        vertical_flip=True,
    )
train_datagen.fit(X_train)
#test_datagen = ImageDataGenerator(rescale=1./255)
adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

class Metrics(Callback):
    def on_train_begin(self, logs={}):
        self.val_kappas = []

    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict(X_test)
        y_pred = np.array([np.argmax(pre) for pre in y_pred])
        y_true = np.array([np.argmax(y_true) for y_true in y_test])

        _val_kappa = cohen_kappa_score(
            y_true,
            y_pred, 
            weights='quadratic'
        )

        self.val_kappas.append(_val_kappa)
        
        print(classification_report(y_true, y_pred))

        print(f"val_kappa: {_val_kappa:.4f}")
        
        if _val_kappa == max(self.val_kappas):
            print("Validation Kappa has improved. Saving model.")
            self.model.save('resNet50.h5')

        return

kappa_metrics = Metrics()

In [11]:
def getResNet50(input_shape=(224, 224, 3), classes = 5, weights = None):
    input_layer = Input(shape=input_shape)
    resNet50 = ResNet50(include_top=False, weights=weights)(input_layer)
    x = GlobalAveragePooling2D(name='avg_pool')(resNet50)
    x = Dense(1024, name = 'fc1')(x)
    x = PReLU()(x)
    x = Dropout(0.5)(x)
    x = Dense(classes, activation='softmax', name='output')(x)
    model = Model(input_layer, x)
    return model

In [12]:
resNet50 = getResNet50(weights='imagenet')
resNet50.compile(loss='categorical_crossentropy', optimizer=adam,metrics=['accuracy'])



Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [13]:
resNet50_history = resNet50.fit_generator(
    train_datagen.flow(X_train, y_train, batch_size=batch_size),
    steps_per_epoch=X_train.shape[0] // batch_size,
    epochs=epochs,
    validation_data=(X_test, y_test),
    callbacks=[kappa_metrics])

Epoch 1/10
              precision    recall  f1-score   support

           0       0.81      1.00      0.90       286
           1       0.35      0.16      0.22        50
           2       0.67      0.74      0.70       149
           3       0.00      0.00      0.00        24
           4       0.78      0.17      0.28        41

    accuracy                           0.75       550
   macro avg       0.52      0.42      0.42       550
weighted avg       0.69      0.75      0.70       550

val_kappa: 0.7358
Validation Kappa has improved. Saving model.
Epoch 2/10
              precision    recall  f1-score   support

           0       0.91      1.00      0.95       286
           1       0.79      0.30      0.43        50
           2       0.73      0.87      0.80       149
           3       0.50      0.38      0.43        24
           4       0.80      0.39      0.52        41

    accuracy                           0.83       550
   macro avg       0.75      0.59      0.63   

In [14]:
resNet50.load_weights("resNet50.h5")
resNet50.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),metrics=['accuracy'])
resNet50_history = resNet50.fit_generator(
    train_datagen.flow(X_train, y_train, batch_size=batch_size),
    steps_per_epoch=X_train.shape[0] // batch_size,
    epochs=epochs,
    validation_data=(X_test, y_test),
    callbacks=[kappa_metrics])

Epoch 1/10
              precision    recall  f1-score   support

           0       0.93      0.99      0.96       286
           1       0.71      0.48      0.57        50
           2       0.77      0.85      0.81       149
           3       0.45      0.38      0.41        24
           4       0.70      0.46      0.56        41

    accuracy                           0.84       550
   macro avg       0.71      0.63      0.66       550
weighted avg       0.83      0.84      0.83       550

val_kappa: 0.8796
Validation Kappa has improved. Saving model.
Epoch 2/10
              precision    recall  f1-score   support

           0       0.94      0.99      0.97       286
           1       0.53      0.60      0.56        50
           2       0.78      0.77      0.77       149
           3       0.47      0.33      0.39        24
           4       0.70      0.46      0.56        41

    accuracy                           0.83       550
   macro avg       0.68      0.63      0.65   