In [1]:
!pip install -U efficientnet

Collecting efficientnet
  Downloading efficientnet-1.1.0-py3-none-any.whl (18 kB)
Installing collected packages: efficientnet
Successfully installed efficientnet-1.1.0


In [2]:
# Loading in libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedShuffleSplit
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
import efficientnet.tfkeras as efn
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers

In [3]:
meta = pd.read_csv("../input/siim-isic-melanoma-classification/train.csv") 
meta['image_name'] = meta['image_name'] + '.png'
meta['target'] = meta['target'].astype('str')

In [7]:
# Randomly upsampling malignant cases. Trying to overcome class imbalance
#ros = meta[meta['target'] == '1'].sample(584*3, replace=True, random_state=1)
#meta1 = pd.concat([meta, ros]).sample(frac=1, random_state=1)

In [8]:
# Getting indexes to split data into test and train
sss = StratifiedShuffleSplit(n_splits=2, test_size=.1, random_state=1)
train_index, test_index = sss.split(np.zeros(len(meta)), meta['target'])

# Getting train and validation dataframes
train_df, val_df = meta.iloc[train_index[0], :], meta.iloc[train_index[1], :]

In [None]:
import random
import numpy as np


class CutMixImageDataGenerator():
    def __init__(self, generator1, generator2, img_size, batch_size):
        self.batch_index = 0
        self.samples = generator1.samples
        self.class_indices = generator1.class_indices
        self.generator1 = generator1
        self.generator2 = generator2
        self.img_size = img_size
        self.batch_size = batch_size

    def reset_index(self):  # Ordering Reset (If Shuffle is True, Shuffle Again)
        self.generator1._set_index_array()
        self.generator2._set_index_array()

    def reset(self):
        self.batch_index = 0
        self.generator1.reset()
        self.generator2.reset()
        self.reset_index()

    def get_steps_per_epoch(self):
        quotient, remainder = divmod(self.samples, self.batch_size)
        return (quotient + 1) if remainder else quotient
    
    def __len__(self):
        self.get_steps_per_epoch()

    def __next__(self):
        if self.batch_index == 0: self.reset()

        crt_idx = self.batch_index * self.batch_size
        if self.samples > crt_idx + self.batch_size:
            self.batch_index += 1
        else:  # If current index over number of samples
            self.batch_index = 0

        reshape_size = self.batch_size
        last_step_start_idx = (self.get_steps_per_epoch()-1) * self.batch_size
        if crt_idx == last_step_start_idx:
            reshape_size = self.samples - last_step_start_idx
            
        X_1, y_1 = self.generator1.next()
        X_2, y_2 = self.generator2.next()
        
        cut_ratio = np.random.beta(a=1, b=1, size=reshape_size)
        cut_ratio = np.clip(cut_ratio, 0.2, 0.8)
        label_ratio = cut_ratio.reshape(reshape_size, 1)
        cut_img = X_2

        X = X_1
        for i in range(reshape_size-2):
            cut_size = int((self.img_size-1) * cut_ratio[i])
            y1 = random.randint(0, (self.img_size-1) - cut_size)
            x1 = random.randint(0, (self.img_size-1) - cut_size)
            y2 = y1 + cut_size
            x2 = x1 + cut_size
            cut_arr = cut_img[i][y1:y2, x1:x2]
            cutmix_img = X_1[i]
            cutmix_img[y1:y2, x1:x2] = cut_arr
            X[i] = cutmix_img
            
        y = y_1 * (1 - (label_ratio ** 2)) + y_2 * (label_ratio ** 2)
        return X, y

    def __iter__(self):
        while True:
            yield next(self)

In [9]:
# Creating data generators
train_gen = ImageDataGenerator()#horizontal_flip=True,
                               #vertical_flip=True
                               #preprocessing_function=efn.preprocess_input)
val_gen = ImageDataGenerator()#preprocessing_function=efn.preprocess_input)

# Cutmix generator
img_dir = "../input/siic-isic-224x224-images/train"
train_generator1 = train_gen.flow_from_dataframe(dataframe=train_df, directory=img_dir, 
                                                x_col='image_name',
                                                y_col='target',
                                                color_mode='rgb',
                                                target_size=(224,224),
                                                batch_size=32,
                                                shuffle=True,
                                                class_mode='categorical',
                                                seed=1)
#train_generator2 = train_gen.flow_from_dataframe(dataframe=train_df, directory=img_dir, 
#                                                x_col='image_name',
#                                                y_col='target',
#                                                color_mode='rgb',
#                                                target_size=(224,224),
#                                                batch_size=32,
#                                                shuffle=True,
#                                                class_mode='categorical',
#                                                seed=1)
#
#train_generator = CutMixImageDataGenerator(generator1=train_generator1,
#                                           generator2=train_generator2,
#                                           img_size=224,
#                                           batch_size=32)

val_generator = val_gen.flow_from_dataframe(dataframe=val_df, directory=img_dir, 
                                                x_col='image_name',
                                                y_col='target',
                                                class_mode='categorical', 
                                                target_size=(224,224),
                                                batch_size=32,
                                                seed=1,
                                                shuffle=False)

Found 31390 validated image filenames belonging to 2 classes.
Found 3488 validated image filenames belonging to 2 classes.


In [None]:
# Plotting example of the newly augmented cutmix images
#images = next(train_generator)
#image_examples = np.hstack(images[0][:2])
#plt.imshow(image_examples)

In [10]:
# Getting model function
def make_model():
    # Creating model architecture
    
    # Input
    inputs = layers.Input(shape=(224, 224, 3))
    
    # Data augmentation layers to utilize gpus
    x = layers.experimental.preprocessing.RandomFlip("horizontal")(inputs)
    x = layers.experimental.preprocessing.RandomRotation(0.1)(x)
    x = layers.experimental.preprocessing.Rescaling(1./255)(x)
    
    # Efficient net block
    x = efn.EfficientNetB4(weights='imagenet', include_top=False)(x)
    
    # Dense block and output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dropout(.5)(x)
    outputs = layers.Dense(2, activation='softmax')(x)
    
    # Creating and compiling model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(Adam(lr=.001), loss='categorical_crossentropy', metrics=[AUC(name='auc')])
    return model

In [11]:
# Model fitting params
STEP_SIZE_TRAIN=train_generator1.n//train_generator1.batch_size # train step size
STEP_SIZE_VALID=val_generator.n//val_generator.batch_size # val step size
class_weights = class_weight = {0: 1., 1: 50.}
mcp_save = ModelCheckpoint('mw_gpu_preproc.hdf5', save_best_only=True, monitor='val_auc', mode='max') # Callback

model = make_model() # getting model architecture and compiling

Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b4_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5


In [12]:
# Fitting model 
model.fit_generator(train_generator1, validation_data=val_generator, steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_steps=STEP_SIZE_VALID, callbacks=[mcp_save], class_weight=class_weight, 
                    verbose=2, epochs=10)

Epoch 1/10
980/980 - 497s - loss: 0.2144 - auc: 0.9707 - val_loss: 0.2208 - val_auc: 0.9763
Epoch 2/10


KeyboardInterrupt: 

In [13]:
# Loading in test labels and images
test_df = pd.read_csv("../input/siim-isic-melanoma-classification/test.csv")
test_img_names = pd.read_csv("../input/siim-isic-melanoma-classification/test.csv")['image_name'].values
test_df['image_name'] = test_df['image_name'] + '.png'

In [14]:
test_gen = ImageDataGenerator(preprocessing_function=efn.preprocess_input)
test_generator = test_gen.flow_from_dataframe(dataframe=test_df,
                                              x_col='image_name',
                                              directory="../input/siic-isic-224x224-images/test", 
                                                class_mode=None, 
                                                target_size=(224,224),
                                                batch_size=32,
                                                seed=1,
                                                shuffle=False)

Found 10982 validated image filenames.


In [15]:
# Making predictions
predictions = model.predict_generator(test_generator)
predictions1 = predictions[:,1]
# Submission dataset
sub = pd.DataFrame({"image_name": test_img_names, "target": predictions1})

sub.to_csv('efn4_overfitlol.csv', index=False)