# SkinLesion - Desafio ISIC 2019
## CNNs
### Imports

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras.metrics import Precision, Recall, AUC

In [None]:
# Open dataframes containing the images for training, validation and test
train_df = pd.read_csv(r"isic2019_train.csv")
aug_df = pd.read_csv(r"isic2019_train_aug_full.csv")
val_df = pd.read_csv(r"isic2019_val.csv")
test_df = pd.read_csv(r"isic2019_test.csv")

### Informações sobre os *datasets* de treino, validação e teste

In [None]:
# Train dataframe WITHOUT data augmentation
print(train_df.info(), end='\n\n')
aug_df.head()

In [None]:
# Augmented train dataframe
print(aug_df.info(), end='\n\n')
aug_df.head()

In [None]:
# Validation dataframe
print(val_df.info(), end='\n\n')
val_df.head()

In [None]:
# Test dataframe
print(test_df.info(), end='\n\n')
test_df.head()

### Geradores dos tensores

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = 260

train_dataGen = ImageDataGenerator(rescale=1./255)

train_generator = train_dataGen.flow_from_dataframe(dataframe=aug_df, x_col='image_path', class_mode='raw',
                                                    seed=31415,
                                                    y_col=['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC'],
                                                    target_size=(IMG_SIZE, IMG_SIZE), batch_size=32)

val_generator = train_dataGen.flow_from_dataframe(dataframe=val_df, x_col='image_path', class_mode='raw', seed=31415,
                                                    y_col=['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC'],
                                                    target_size=(IMG_SIZE, IMG_SIZE), batch_size=32)

test_generator = train_dataGen.flow_from_dataframe(dataframe=test_df, x_col='image_path', class_mode='raw', seed=31415,
                                                    y_col=['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC'],
                                                    target_size=(IMG_SIZE, IMG_SIZE), batch_size=32)

### Dependências
#### Métricas

In [None]:
from tensorflow.keras import backend as K

# Method for specificity metric
def specificity(y_true, y_pred):
    true_negatives = K.sum(K.round(K.clip((1-y_true) * (1-y_pred), 0, 1)))
    possible_negatives = K.sum(K.round(K.clip(1-y_true, 0, 1)))
    return true_negatives / (possible_negatives + K.epsilon())

In [None]:
# F1 score
class F1_Score(tf.keras.metrics.Metric):

    def __init__(self, name='f1_score', **kwargs):
        super().__init__(name=name, **kwargs)
        self.f1 = self.add_weight(name='f1', initializer='zeros')
        self.precision_fn = Precision(thresholds=0.5)
        self.recall_fn = Recall(thresholds=0.5)

    def update_state(self, y_true, y_pred, sample_weight=None):
        p = self.precision_fn(y_true, y_pred)
        r = self.recall_fn(y_true, y_pred)
        # since f1 is a variable, we use assign
        self.f1.assign(2 * ((p * r) / (p + r + 1e-6)))

    def result(self):
        return self.f1

    def reset_states(self):
        # we also need to reset the state of the precision and recall objects
        self.precision_fn.reset_states()
        self.recall_fn.reset_states()
        self.f1.assign(0)

f1_score = F1_Score()


### Imports p/ arquiteturas desenvolvidas

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization, Activation
from tensorflow.keras.layers import GlobalAvgPool2D

### ResNet-34

In [None]:
import tensorflow.keras as keras

# Residual units
class ResidualUnit(keras.layers.Layer):
    def __init__(self, filters, strides=1, activation='relu', **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.main_layers = [keras.layers.Conv2D(filters, 3, strides=strides, padding='same', use_bias=False),
                            keras.layers.BatchNormalization(),
                            self.activation,
                            keras.layers.Conv2D(filters, 3, strides=1, padding='same', use_bias=False),
                            keras.layers.BatchNormalization()]
        self.skip_layers = []

        if strides > 1:
            self.skip_layers = [keras.layers.Conv2D(filters, 1, strides=strides, padding='same', use_bias=False),
                                keras.layers.BatchNormalization()]


    def call(self, inputs):
        Z = inputs

        for layer in self.main_layers:
            Z = layer(Z)

        skip_Z = inputs

        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)

        return self.activation(Z + skip_Z)


In [None]:
# ResNet-34 implementation as in Hands-On ML book
resnet34 = keras.models.Sequential()

# Convolutional layer
resnet34.add(Conv2D(64, 7, strides=2, input_shape=[224, 224, 3], padding='same', use_bias=False))
resnet34.add(BatchNormalization())
resnet34.add(Activation('relu'))
resnet34.add(MaxPooling2D(pool_size=3, strides=2, padding='same'))

# Residual units layers
prev_filters = 64
for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filters == prev_filters else 2

    resnet34.add(ResidualUnit(filters, strides=strides))

    prev_filters = filters

# Output layers
resnet34.add(GlobalAvgPool2D())
resnet34.add(Flatten())
resnet34.add(Dense(8, activation='softmax'))

# Compile
resnet34.compile(optimizer='adam', loss='categorical_crossentropy',
                 metrics=['accuracy', AUC(name="AUC"), Recall(name="Recall"), specificity, Precision(name="Precision"),
                          f1_score])

# Summary
resnet34.summary()

In [None]:
# Train with callback (early stopping)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = resnet34.fit(train_generator, validation_data=val_generator, epochs=50, callbacks=[callback])

# Train model
# history = resnet34.fit(train_generator, validation_data=val_generator, epochs=30)

In [None]:
results = resnet34.evaluate(test_generator, batch_size=128)

### Rede neural convolucional

In [None]:
classifier = Sequential()

# 1s conv layer
classifier.add(Conv2D(filters=64, kernel_size=(7,7), activation='relu', input_shape=(384, 384, 3), strides=3))
# pooling layer
classifier.add(MaxPooling2D(pool_size=(2,2)))

# 2nd conv layer
classifier.add(Conv2D(128, (3,3), activation='relu'))
classifier.add(Conv2D(128, (3,3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2,2)))

# 3rd conv layer
classifier.add(Conv2D(256, (3,3), activation='relu'))
classifier.add(Conv2D(256, (3,3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Flatten
classifier.add(Flatten())
classifier.add(Dense(units=64, activation='relu'))
classifier.add(Dropout(rate=0.5))

# Hidden layer
classifier.add(Dense(units=64, activation='relu'))
classifier.add(Dropout(rate=0.5))
classifier.add(Dense(units=32, activation='relu'))

# Output layer
classifier.add(Dense(9, activation='softmax'))

# Compile network
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# classifier.compile(optimizer='adam', loss='categorical_crossentropy',
#                    metrics=['accuracy', Precision(), Recall(), AUC()])

classifier.summary()

In [None]:
history = classifier.fit(train_generator, validation_data=val_generator, epochs=50)

In [None]:
test_predictions = classifier.predict(test_generator, batch_size=128)

In [None]:
history.history.keys()

In [None]:
from sklearn.metrics import classification_report

print(classification_report(test_generator.labels.argmax(1), test_predictions.argmax(1), zero_division=0))

In [None]:
import matplotlib.pyplot as plt

# Confusion matrix
plt.matshow(confusion)

confusion = tf.math.confusion_matrix(test_generator.labels.argmax(1), test_predictions.argmax(1))

#### Salvar/Carregar modelo ou pesos

In [None]:
# Save model's current weights (CAUTION: defaults to overwrite)
# model.save_weights('./models/efficientNetB3_topTrained')

# Load model (the whole model, including architecture and weights)
# model = tf.keras.models.load_model('./models/efficientNetB2_topTrained.tf')

# Load model weights
# model.load_weights('./models/efficientNetB3_topTrained')

### Resultados

#### ResNet-34
##### 1° Teste
- Usando:
  - Split estratificado;
  - **1° conjunto de data augmentation**;
  - *Early stop* (`patience=5`):
    - Parou na 12ª época:
- Resultados do teste:
  - 38s 2s/step;
  - Loss: 1.1328;
  - Accuracy: 0.6054;
  - Precision: 0.6930;
  - Recall: 0.5264;
  - AUC: 0.9116.

##### 2° Teste
- Usando:
  - Split estratificado;
  - **1° conjunto de data augmentation**;
  - Parada após 30 épocas.
- Resultados do teste:
  - 37s 2s/step;
  - Loss: 2.2576;
  - Accuracy: 0.6326;
  - Precision: 0.6376;
  - Recall: 0.6271;
  - AUC: 0.8746.

In [None]:
import matplotlib.pyplot as plt

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for recall
plt.plot(history.history['recall_1'])
plt.plot(history.history['val_recall_1'])
plt.title('Model recall')
plt.ylabel('Recall')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for precision
plt.plot(history.history['precision_1'])
plt.plot(history.history['val_precision_1'])
plt.title('Model precision')
plt.ylabel('Precision')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for auc
plt.plot(history.history['auc_1'])
plt.plot(history.history['val_auc_1'])
plt.title('Model AUC')
plt.ylabel('AUC')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

##### ResNet-34: 3° Teste
- Usando:
  - Split estratificado;
  - **Ambos conjuntos de data augmentation**;
  - Parada após 19 épocas;
  - Teste executado com a época de melhor resultado (9).
- Resultados do teste:
  - 38s 2s/step;
  - Loss: 1.0280;
  - Accuracy: 0.6263;
  - Precision: 0.7632
  - Recall: 0.5063
  - AUC: 0.9226

In [None]:
import matplotlib.pyplot as plt

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for recall
plt.plot(history.history['recall_2'])
plt.plot(history.history['val_recall_2'])
plt.title('Model recall')
plt.ylabel('Recall')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for precision
plt.plot(history.history['precision_2'])
plt.plot(history.history['val_precision_2'])
plt.title('Model precision')
plt.ylabel('Precision')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for auc
plt.plot(history.history['auc_2'])
plt.plot(history.history['val_auc_2'])
plt.title('Model AUC')
plt.ylabel('AUC')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

##### ResNet-34: 4° Teste
- Usando:
  - Split estratificado;
  - DS **sem** data augmentation;
  - Early stopping:
    - Parada após 24 épocas;
    - Teste executado com a época de melhor resultado (14).
- Resultados do teste:
  - 35s 2s/step;
  - Loss: 1.0600;
  - Accuracy: 0.6247;
  - Precision: 0.7225;
  - Recall: 0.5517;
  - AUC: 0.9220.

In [None]:
import matplotlib.pyplot as plt

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for recall
plt.plot(history.history['Recall'])
plt.plot(history.history['val_Recall'])
plt.title('Model recall')
plt.ylabel('Recall')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for precision
plt.plot(history.history['Precision'])
plt.plot(history.history['val_Precision'])
plt.title('Model precision')
plt.ylabel('Precision')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for auc
plt.plot(history.history['AUC'])
plt.plot(history.history['val_AUC'])
plt.title('Model AUC')
plt.ylabel('AUC')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

#### Segundo teste
* CNN mais robusta:
```
    classifier = Sequential()

    # 1s conv layer
    classifier.add(Conv2D(filters=64, kernel_size=(7,7), activation='relu', input_shape=(384, 384, 3), strides=3))
    # pooling layer
    classifier.add(MaxPooling2D(pool_size=(2,2)))

    # 2nd conv layer
    classifier.add(Conv2D(128, (3,3), activation='relu'))
    classifier.add(Conv2D(128, (3,3), activation='relu'))
    classifier.add(MaxPooling2D(pool_size=(2,2)))

    # 3rd conv layer
    classifier.add(Conv2D(256, (3,3), activation='relu'))
    classifier.add(Conv2D(256, (3,3), activation='relu'))
    classifier.add(MaxPooling2D(pool_size=(2,2)))

    # Flatten
    classifier.add(Flatten())
    classifier.add(Dense(units=64, activation='relu'))
    classifier.add(Dropout(rate=0.5))

    # Hidden layer
    classifier.add(Dense(units=64, activation='relu'))
    classifier.add(Dropout(rate=0.5))
    classifier.add(Dense(units=32, activation='relu'))

    # Output layer
    classifier.add(Dense(9, activation='softmax'))

    # Compile network
    classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
```
  * Com split estratificado;
  * Com o **1° conjunto** de transformações de data augmentation:
    * Acurácia do teste: 61,52%

In [None]:
import matplotlib.pyplot as plt

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

  * Com o **2° conjunto** de transformações de data augmentation:
    * Acurácia do teste: 63,18%

In [None]:
import matplotlib.pyplot as plt

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#### Primeiro teste
* CNN simples, com duas camadas conv. seguidas de duas camadas densas;
  * Utilizada apenas para verificações iniciais.
* Sem split estratificado;
* Sem data augmentation;

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()