In [1]:
# Loading in libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedShuffleSplit
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers

In [2]:
# Loading in meta data 
meta = pd.read_csv("../input/siim-isic-melanoma-classification/train.csv") 
meta['image_name'] = meta['image_name'] + '.png'

In [3]:
# Getting indexes to split data into test and train
sss = StratifiedShuffleSplit(n_splits=2, test_size=.1, random_state=1)
train_index, test_index = sss.split(np.zeros(len(meta)), meta['target'])

# Getting train and validation dataframes
train_df, val_df = meta.iloc[train_index[0], :], meta.iloc[train_index[1], :]

In [4]:
# Creating data generators
train_gen = ImageDataGenerator()#horizontal_flip=True,
                               #vertical_flip=True
                               #preprocessing_function=efn.preprocess_input)
val_gen = ImageDataGenerator()#preprocessing_function=efn.preprocess_input)

# Cutmix generator
img_dir = "../input/siic-isic-224x224-images/train"
train_generator = train_gen.flow_from_dataframe(dataframe=train_df, directory=img_dir, 
                                                x_col='image_name',
                                                y_col='target',
                                                color_mode='rgb',
                                                target_size=(224,224),
                                                batch_size=50,
                                                shuffle=True,
                                                class_mode='raw',
                                                seed=1)

val_generator = val_gen.flow_from_dataframe(dataframe=val_df, directory=img_dir, 
                                                x_col='image_name',
                                                y_col='target',
                                                class_mode='raw', 
                                                target_size=(224,224),
                                                batch_size=32,
                                                seed=1,
                                                shuffle=False)

Found 29813 validated image filenames.
Found 3313 validated image filenames.


In [51]:
# Getting model function
def make_model():
    # Creating model architecture
    
    # Input
    inputs = layers.Input(shape=(224, 224, 3))
    
    # Data augmentation layers to utilize gpus
    x = layers.experimental.preprocessing.RandomFlip("horizontal")(inputs)
    x = layers.experimental.preprocessing.RandomRotation(0.1)(x)
    x = layers.experimental.preprocessing.Rescaling(1./255)(x)
    
    # Efficient net block
    x = tf.keras.applications.ResNet50(weights='imagenet', include_top=False)(x)
    x = layers.Dropout(.6)(x)
    
    # Dense block and output
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)
    
    # Creating and compiling model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(Adam(lr=.000001), loss='binary_crossentropy', metrics=[AUC(name='auc')])
    return model

In [52]:
# Model fitting params
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size # train step size
STEP_SIZE_VALID=val_generator.n//val_generator.batch_size # val step size
model_weight = {0:1.,1:50.}
mcp_save = ModelCheckpoint('mw_reznet.hdf5', save_best_only=True, monitor='val_auc', mode='max') # Callback

model = make_model() # getting model architecture and compiling

In [47]:
model.load_weights("mw_reznet.hdf5")

In [53]:
# Fitting model 
model.fit_generator(train_generator, validation_data=val_generator, steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_steps=STEP_SIZE_VALID, callbacks=[mcp_save], class_weight=model_weight,
                    verbose=2, epochs=10)

Epoch 1/10
596/596 - 184s - loss: 1.3449 - auc: 0.5968 - val_loss: 0.3407 - val_auc: 0.4748
Epoch 2/10
596/596 - 182s - loss: 1.1292 - auc: 0.7420 - val_loss: 0.8829 - val_auc: 0.7850
Epoch 3/10
596/596 - 182s - loss: 1.0209 - auc: 0.8013 - val_loss: 0.6383 - val_auc: 0.8157
Epoch 4/10
596/596 - 182s - loss: 0.9584 - auc: 0.8259 - val_loss: 0.6319 - val_auc: 0.8290
Epoch 5/10
596/596 - 182s - loss: 0.9184 - auc: 0.8414 - val_loss: 0.5638 - val_auc: 0.8407
Epoch 6/10
596/596 - 182s - loss: 0.8572 - auc: 0.8644 - val_loss: 0.5339 - val_auc: 0.8476
Epoch 7/10
596/596 - 182s - loss: 0.8309 - auc: 0.8729 - val_loss: 0.5039 - val_auc: 0.8525
Epoch 8/10
596/596 - 203s - loss: 0.8029 - auc: 0.8798 - val_loss: 0.4484 - val_auc: 0.8579
Epoch 9/10


KeyboardInterrupt: 

In [48]:
# Loading in test labels and images
test_df = pd.read_csv("../input/siim-isic-melanoma-classification/test.csv")
test_img_names = pd.read_csv("../input/siim-isic-melanoma-classification/test.csv")['image_name'].values
test_df['image_name'] = test_df['image_name'] + '.png'

In [19]:
test_gen = ImageDataGenerator()
test_generator = test_gen.flow_from_dataframe(dataframe=test_df,
                                              x_col='image_name',
                                              directory="../input/siic-isic-224x224-images/test", 
                                                class_mode=None, 
                                                target_size=(224,224),
                                                batch_size=32,
                                                seed=1,
                                                shuffle=False)

Found 10982 validated image filenames.


In [49]:
# Making predictions
predictions = model.predict_generator(test_generator).ravel()

# Submission dataset
sub = pd.DataFrame({"image_name": test_img_names, "target": predictions})

sub.to_csv('rez_mw.csv', index=False)