## TRANSFER LEARNING: IMPROVING ALGORITHM
---

### Libraries

In [None]:
import pickle
import os

#basic
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

#tensorflow and keras
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Dense, GlobalAveragePooling2D, Flatten, MaxPooling2D, Dropout, Resizing, Rescaling, RandomBrightness, RandomContrast, RandomCrop, RandomFlip, RandomRotation
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import Model
from keras.utils import load_img, img_to_array

#sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

#open cv
import cv2 as cv


from cascid.configs import config, pad_ufes
from cascid import database

### Constants

In [None]:
# PATH = "/home/fernandofincatti/Documents/insper/pfe/ComputerAidedSkinCancerIdentificationAndDiagnosis/data/"
RANDOM_STATE = 42
TRAIN_SIZE = 0.7
VALIDATION_SIZE = 0.15
TEST_SIZE = 0.15
EPOCHS = 3000
IMAGE_SHAPE = (256, 256, 3)

FERNANDO_PATH = config.DATA_DIR / 'experiments' / 'fernando'
FERNANDO_PATH.mkdir(exist_ok=True, parents=True)

IMAGE_CACHE = FERNANDO_PATH / 'img_cache.pkl'
FEATURES_FILE = FERNANDO_PATH / 'features.pkl'
MODEL_PATH = FERNANDO_PATH / 'models' / 'deep_learning'

COMPUTE_FEATURES = True
FORCE_IMAGE_CACHE = False
TRAIN_MODEL = True


### Loading data

In [None]:
df = database.get_db()
df.head(5).transpose()

### Split train, validation and test

In [None]:
MulticlassEncoder = OneHotEncoder(sparse=False)
Y = MulticlassEncoder.fit_transform(df[["diagnostic"]].to_numpy())
x_train, x_test, y_train, y_test = train_test_split(df["img_id"].to_numpy(), Y, test_size=0.2, random_state=RANDOM_STATE)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=RANDOM_STATE)

print("x_train shape: {0}".format(x_train.shape))
print("x_validation shape: {0}".format(x_valid.shape))
print("x_test shape: {0}".format(x_test.shape))

print("y_train shape: {0}".format(y_train.shape))
print("y_validation shape: {0}".format(y_valid.shape))
print("y_test shape: {0}".format(y_test.shape))

### Image Test

In [None]:
def load_image(name: str):
    pil_img = load_img(
        str(pad_ufes.IMAGES_DIR / name),
        grayscale=False,
        color_mode='rgb',
        target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
        interpolation='nearest',
        keep_aspect_ratio=False
    )

    return img_to_array(pil_img, dtype=np.uint8)

plt.imshow(load_image(x_train[7]))
plt.show()

### Image Loading and cache

In [None]:
# Automatic caching of image read operations (slow)
if not os.path.exists(IMAGE_CACHE) or FORCE_IMAGE_CACHE:
    print("Cache not found, doing read operations...")
    reader = lambda img_path_list : np.array(list(map(load_image, img_path_list)))
    image_dict = {
        "train": reader(x_train),
        "test": reader(x_test),
        "valid": reader(x_valid)
    }
    with open(IMAGE_CACHE, 'wb') as file:
        pickle.dump(image_dict, file)
    print("Read operation done, cache file available at {}".format(IMAGE_CACHE))
else:
    with open(IMAGE_CACHE, 'rb') as file:
        image_dict = pickle.load(file)

# Return to original variables
x_train = image_dict["train"]
x_test = image_dict["test"]
x_valid = image_dict["valid"]

### Data augmentation

In [None]:
# train_data_augmentation_generator = ImageDataGenerator(
#     rotation_range=20,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     horizontal_flip=True,
# )

input_layer = keras.Sequential([
    Rescaling(1./255), # Rescale from 0 to 255 UINT8 to 0 to 1 float.
])

augmentor = keras.Sequential([
    RandomBrightness(factor=(-0.3, 0.3), value_range=(0.0, 1.0), seed=RANDOM_STATE), # Randomly change brightness anywhere from -30% to +30%
    RandomContrast(factor=0.5, seed=RANDOM_STATE), # Randomly change contrast anywhere from -30% to +30%
    RandomFlip(mode="horizontal_and_vertical", seed=RANDOM_STATE), # Randomly flip images either horizontally, vertically or both
    RandomRotation(factor=(-0.3, 0.3), fill_mode="nearest", interpolation="bilinear", seed=RANDOM_STATE), # Randomly rotate anywhere from -30% * 2PI to +30% * 2PI, filling gaps by using 'nearest' strategy
])

### Model

In [None]:
if COMPUTE_FEATURES:
    resnet = keras.applications.ResNet50(
        weights='imagenet',
        input_shape=IMAGE_SHAPE,
        pooling='avg',
        include_top=False,
    )
    resnet.trainable = False  #to make sure it's not being trained
    # Augmentation only on training
    feature_extractor_train = keras.Sequential([
        input_layer,
        augmentor,
        resnet
    ])
    # Test/Validation only get rescaled
    feature_extractor_test_valid = keras.Sequential([
        input_layer,
        resnet
    ])
    features_train = feature_extractor_train(x_train[:5])
    features_valid = feature_extractor_test_valid(x_valid[:5])
    features_test = feature_extractor_test_valid(x_test[:5])

    features = {
        "train": features_train.numpy(),
        "test": features_test.numpy(),
        "valid": features_valid.numpy()
    }

    with open(FEATURES_FILE, 'wb') as file:
        pickle.dump(features, file)
else:
    with open(FEATURES_FILE, 'rb') as file:
        features = pickle.load(file)

In [None]:
print(f'{features["train"].shape=}\n{features["test"].shape=}\n{features["valid"].shape=}')

In [None]:
model = keras.Sequential([
    Input(shape = features["train"].shape[1]),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32),
    Dropout(0.2),
    Dense(16),
    Dropout(0.2),
    Dense(y_train.shape[1], activation='softmax')
])

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
model.summary()

### Train

In [None]:
best_model_checkpoint_early_stopping = EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    verbose=1,
    patience=100,
    restore_best_weights=True
)

best_model_checkpoint = ModelCheckpoint(
    filepath=
    "/home/fernandofincatti/Documents/insper/pfe/ComputerAidedSkinCancerIdentificationAndDiagnosis/experiments/fernando/transfer-learning/test02/model",
    monitor='val_accuracy',
    save_best_only=True,
    save_weights_only=True
)


In [None]:
if TRAIN_MODEL:

    training_history = model.fit(
        features["train"],
        y_train,
        epochs=EPOCHS,
        validation_split=0.2,
        batch_size=512,
        #callbacks=[best_model_checkpoint, best_model_checkpoint_early_stopping]
    )

    model.save(MODEL_PATH)

    with open(MODEL_PATH / 'history.pkl', 'wb') as fl:
        pickle.dump(training_history.history)

else:

    print("Load model")



In [None]:
pd.DataFrame(training_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.xlabel("Epochs")
plt.ylabel("Categorical Crossentropy (loss)")
plt.ylim((0,10))
plt.title("Model History")
plt.show()


### Evaluating the model