In [1]:
import cv2


In [2]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PIL
from PIL import Image


In [3]:
import skimage.io
from skimage.transform import resize
from imgaug import augmenters as iaa

In [4]:
from tqdm import tqdm

In [5]:
from sklearn.utils import class_weight, shuffle
import tensorflow as tf

In [6]:
import warnings
warnings.filterwarnings("ignore")
WINDOW_SIZE = 331
IMAGE_SIZE  = 512
IMAGE_CHANNELS=3
NUM_CLASSES=28

In [7]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Reshape, Dense, Concatenate, GlobalMaxPooling2D
from keras.layers import BatchNormalization, Input, Conv2D, Lambda, Average
from keras.applications.nasnet import NASNetLarge
from keras.callbacks import ModelCheckpoint
from keras import metrics
from keras.optimizers import Adam
from keras import backend as K
import keras
from keras.models import Model
from keras.utils import multi_gpu_model, multi_gpu_utils

Using TensorFlow backend.


In [8]:
path_to_train = '../../Human_Protein_Atlas/input/train/'
data = pd.read_csv('../../Human_Protein_Atlas/input/train.csv')

train_dataset_info = []
for name, labels in zip(data['Id'], data['Target'].str.split(' ')):
    train_dataset_info.append({
        'path':os.path.join(path_to_train, name),
        'labels':np.array([int(label) for label in labels])})
train_dataset_info = np.array(train_dataset_info)

class data_generator:
    def __init__(self, it):
        self.it = it
    def __call__(self):
        return self.it

    def get_dataset(dataset_info, batch_size, shape, augument=True):
        gen = data_generator.create_train(dataset_info, batch_size, shape, augument)
        gen = data_generator(gen)
        types = (tf.float32, tf.float32)
        shapes=(tf.TensorShape((WINDOW_SIZE, WINDOW_SIZE, IMAGE_CHANNELS)), tf.TensorShape([NUM_CLASSES]))
        dataset = tf.data.Dataset.from_generator(
            gen, types, shapes
        )
        #dataset = dataset.repeat()
        dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(batch_size*8)
        return dataset

    def create_train(dataset_info, batch_size, shape, augument=True):
        assert shape[2] == 3
        dataset_info = shuffle(dataset_info)
        while True:
            for xs, xe, ys, ye in data_generator.slice_images():
                for idx in range(len(dataset_info)):
                    #X_train_batch = dataset_info[start:end]
                    batch_labels = np.zeros((NUM_CLASSES))
                    image = data_generator.load_image(
                            dataset_info[idx]['path'], shape)   
                    if augument:
                        image = data_generator.augment(image)
                    #print(image)
                    image=image/255.
                    #print(image)
                    batch_labels[dataset_info[idx]['labels']] = 1
                    yield image[xs:xe, ys:ye, :], batch_labels

    def load_image(path, shape):
        image_red_ch = Image.open(path+'_red.png')
        image_yellow_ch = Image.open(path+'_yellow.png')
        image_green_ch = Image.open(path+'_green.png')
        image_blue_ch = Image.open(path+'_blue.png')
        image = np.stack((
        np.array(image_red_ch), 
        np.array(image_green_ch), 
        np.array(image_blue_ch)), -1)
        #image = cv2.resize(image, (shape[0], shape[1]))
        return image

    def augment(image):
        augment_img = iaa.Sequential([
            iaa.OneOf([
                iaa.Affine(rotate=0),
                iaa.Affine(rotate=90),
                iaa.Affine(rotate=180),
                iaa.Affine(rotate=270),
                iaa.Fliplr(0.5),
                iaa.Flipud(0.5),
            ])], random_order=True)

        image_aug = augment_img.augment_image(image)
        return image_aug

    def slice_images():
        offset = int(IMAGE_SIZE%WINDOW_SIZE)
        for i in range(2):
            for j in range(2):
                x_start=i*offset
                x_end=x_start+WINDOW_SIZE

                y_start=j*offset
                y_end=y_start+WINDOW_SIZE
                
                yield x_start, x_end, y_start, y_end

In [9]:
with tf.device('/cpu:0'):
    input_shape=(WINDOW_SIZE,WINDOW_SIZE, IMAGE_CHANNELS)
    input_tensor = Input(shape=(WINDOW_SIZE, WINDOW_SIZE, IMAGE_CHANNELS))
    base_model = NASNetLarge(include_top=False,
                             weights='imagenet',
                             input_shape=input_shape
                             #input_shape=(WINDOW_SIZE, WINDOW_SIZE, IMAGE_CHANNELS)
                            )
    bn = BatchNormalization()(input_tensor)
    x = base_model(bn)
    x = Conv2D(32, kernel_size=(1,1), activation='relu')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(NUM_CLASSES, activation='sigmoid')(x)
    model = Model(input_tensor, output)

In [10]:
model = multi_gpu_model(model, gpus=2)

In [16]:
# create callbacks list
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

epochs = 50; batch_size = 16
checkpoint = ModelCheckpoint('../../Human_Protein_Atlas/working/NASNetLarge.h5', monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only = True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, 
                                   verbose=1, mode='auto', epsilon=0.0001)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=6)
callbacks_list = [checkpoint, early, reduceLROnPlat]

# split data into train, valid
indexes = np.arange(train_dataset_info.shape[0])
np.random.shuffle(indexes)
train_indexes, valid_indexes = train_test_split(indexes, test_size=0.05, random_state=74)

# create train and valid datagens
train_generator = data_generator.get_dataset(
    train_dataset_info[train_indexes], batch_size, (IMAGE_SIZE,IMAGE_SIZE,IMAGE_CHANNELS), augument=True)
validation_generator = data_generator.get_dataset(
    train_dataset_info[valid_indexes], 32, (IMAGE_SIZE,IMAGE_SIZE,IMAGE_CHANNELS), augument=False)

In [12]:
for layer in model.layers:
    layer.trainable = False
model.layers[-1].trainable = True
model.layers[-2].trainable = True
model.layers[-3].trainable = True
model.layers[-4].trainable = True
model.layers[-5].trainable = True

model.compile(
    loss='binary_crossentropy', 
    optimizer=Adam(1e-03),
    metrics=['acc'])
# model.summary()
train_images, train_labels = train_generator.make_one_shot_iterator().get_next()
val_images, val_labels = validation_generator.make_one_shot_iterator().get_next()

In [13]:
model.fit(
    x=train_images, y=train_labels,
    steps_per_epoch=int(np.ceil(float(len(train_indexes)) / float(batch_size))*4),
    validation_data=(val_images, val_labels),
    validation_steps=int(np.ceil(float(len(valid_indexes)) / float(batch_size))*4),
    epochs=2, 
    verbose=1)

Train on 16 samples, validate on 32 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f0ddee438d0>

In [14]:
# train all layers
for layer in model.layers:
    layer.trainable = True
model.compile(loss='binary_crossentropy',
            optimizer=Adam(lr=1e-4),
            metrics=['accuracy'])
model.fit(
    x=train_images, y=train_labels,
    steps_per_epoch=int(np.ceil(float(len(train_indexes)) / float(batch_size))*4),
    validation_data=(val_images, val_labels),
    validation_steps=int(np.ceil(float(len(valid_indexes)) / float(batch_size))*4),
    epochs=epochs,
    verbose=1,
    callbacks=callbacks_list)

Train on 16 samples, validate on 32 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.13352, saving model to ../../working/NASNetLarge.h5


OSError: Unable to create file (unable to open file: name = '../../working/NASNetLarge.h5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)

In [15]:
model.save_weights('../../Human_Protein_Atlas/working/NASNetLarge.h5')

In [None]:
model.fit(
    x=train_images, y=train_labels,
    steps_per_epoch=int(np.ceil(float(len(train_indexes)) / float(batch_size))*4),
    validation_data=(val_images, val_labels),
    validation_steps=int(np.ceil(float(len(valid_indexes)) / float(batch_size))*4),
    epochs=epochs,
    verbose=1,
    callbacks=callbacks_list)

Train on 16 samples, validate on 32 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.15637, saving model to ../../Human_Protein_Atlas/working/NASNetLarge.h5
Epoch 2/50

Epoch 00002: val_loss improved from 0.15637 to 0.13075, saving model to ../../Human_Protein_Atlas/working/NASNetLarge.h5
Epoch 3/50

Epoch 00003: val_loss improved from 0.13075 to 0.11473, saving model to ../../Human_Protein_Atlas/working/NASNetLarge.h5
Epoch 4/50

Epoch 00004: val_loss improved from 0.11473 to 0.11248, saving model to ../../Human_Protein_Atlas/working/NASNetLarge.h5
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.11248
Epoch 6/50

Epoch 00007: val_loss did not improve from 0.10706
Epoch 8/50

Epoch 00008: val_loss improved from 0.10706 to 0.10167, saving model to ../../Human_Protein_Atlas/working/NASNetLarge.h5
Epoch 9/50

In [18]:
1

1

In [19]:
2

2

In [21]:
with tf.device('/cpu:0'):
    input_shape=(WINDOW_SIZE,WINDOW_SIZE, IMAGE_CHANNELS)
    input_tensor = Input(shape=(WINDOW_SIZE, WINDOW_SIZE, IMAGE_CHANNELS))
    base_model = NASNetLarge(include_top=False,
                             weights='imagenet',
                             input_shape=input_shape
                             #input_shape=(WINDOW_SIZE, WINDOW_SIZE, IMAGE_CHANNELS)
                            )
    bn = BatchNormalization()(input_tensor)
    x = base_model(bn)
    x = Conv2D(32, kernel_size=(1,1), activation='relu')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(NUM_CLASSES, activation='sigmoid')(x)
    model = Model(input_tensor, output)
    
model = multi_gpu_model(model, gpus=2)

model.load_weights('../../Human_Protein_Atlas/working/NASNetLarge.h5')

ValueError: axes don't match array

In [None]:
1

In [None]:
submit = pd.read_csv('../input/sample_submission.csv')
predicted = []
draw_predict = []
for name in tqdm(submit['Id']):
    path = os.path.join('../input/test/', name)
    image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
    score_predict = model.predict(image[np.newaxis])[0]
    draw_predict.append(score_predict)
    label_predict = np.arange(28)[score_predict>=0.2]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)