In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers

import zipfile

from sklearn.utils.class_weight import compute_class_weight

Exemplos de parâmetros para data augmentation:
*  rotation_range: faixa de graus para rotações aleatórias;
*  width_shift_range e height_shift_range: fração da largura e algura total;
*  shear_range: ângulo de cisalhamento no sentido anti-horário em graus;
*  zoom_range: intervalo para zoom aleatório;
*  horizontal_flip: inverte aleatoriamente as entradas horizontalmente; e
*  fill_mode: tipo de preenchimento que será realizado fora dos limites de entrada.

In [None]:
# Aplicando o augmentation que será definido em todas as imagens do treino e validação
import cv2
import random

def modify_hue(image):
    image_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    hue_shift = random.uniform(-0.1, 0.1)
    image_hsv[:,:,0] += hue_shift
    image_hsv[:,:,0] = np.clip(image_hsv[:,:,0], 0, 1)
    image_rgb = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2RGB)
    return image_rgb

from tensorflow.keras.applications.resnet50 import preprocess_input

def preprocess_image(image):
    modified_image = modify_hue(image)
    standardized_image = preprocess_input(modified_image)
    return standardized_image

train_datagen = ImageDataGenerator(
    rotation_range=0,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest',
    preprocessing_function=preprocess_image,
)
 
train_generator = train_datagen.flow_from_directory(
    '/content/drive/Shareddrives/IA901/dados melanoma/data/interim/treino',
    target_size=(224, 224),
    batch_size=64,
    class_mode='categorical'
)

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=0,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest',
    preprocessing_function=modify_hue
)

validation_generator = test_datagen.flow_from_directory(
         '/content/drive/Shareddrives/IA901/dados melanoma/data/interim/validacao',
        target_size=(224, 224),
        batch_size=64,
        class_mode='categorical'
       )

# Visualiza as transformações de augmentation nas imagens
for i in range(1):
    image = train_generator[0][0][0].astype('uint8')
    plt.imshow(tf.keras.preprocessing.image.array_to_img(image))
    plt.show()

In [None]:
# Calculamos os pesos das classes para que seja penalisado no treino posteriormente
classes = np.unique(train_generator.classes)
class_counts = np.bincount(train_generator.classes)
total_samples = np.sum(class_counts)
class_weights = total_samples / (len(classes) * class_counts)

train_class_weights = dict(enumerate(class_weights))
print(train_class_weights)

In [None]:
sgd = tf.keras.optimizers.legacy.SGD(learning_rate=0.001, momentum=0.9, decay=0.01, nesterov=True)

model_ResNet50 = tf.keras.Sequential([
     tf.keras.applications.ResNet50(
        input_shape=(224, 224, 3),
        weights='imagenet'
    ),
    
  tf.keras.layers.Reshape((-1, 1, 1000)),  # Reshape to a 4-dimensional tensor
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(2, activation='sigmoid')
    
])
    
model_ResNet50.compile(
    optimizer=sgd, metrics=['AUC'],
    loss = 'binary_crossentropy',
)

In [None]:
# We reduce significantly number of trainable parameters by freezing certain layers, excluding from training, i.e. their weights will never be updated

# freeze the first 1 layer

model_ResNet50.layers[0].trainable = False
for layer in model_ResNet50.layers[:5]:
    layer.trainable = False

model_ResNet50.summary()

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Reshape, GlobalAveragePooling2D, Dense
from tensorflow.keras.callbacks import Callback

tf.config.run_functions_eagerly(True) # otherwise error

# Callbacks
cb_early_stopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
cb_checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='/content/drive/Shareddrives/IA901/dados melanoma/models/', monitor='val_loss', save_best_only=True)


callbacks_list = [cb_checkpointer, cb_early_stopper]

# Model architecture
base_model = tf.keras.applications.ResNet50(
    include_top=False,
    input_shape=(224, 224, 3)
)
x = base_model.output
x = tf.keras.layers.Reshape((-1, 1, 2048))(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
outputs = tf.keras.layers.Dense(2, activation='sigmoid')(x)
model = tf.keras.Model(inputs=base_model.input, outputs=outputs)

# Compiling the model
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['AUC'])

# Training
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    callbacks=callbacks_list,
    class_weight=train_class_weights
)

In [None]:
# Salvar o histórico de treinamento em um arquivo separado
history_path = '/content/drive/Shareddrives/IA901/dados melanoma/models/history_resnet50.pickle'
with open(history_path, 'wb') as f:
    pickle.dump(history.history, f)

In [None]:
from tensorflow.keras.models import load_model

model_path = '/content/drive/Shareddrives/IA901/dados melanoma/models/'
history = load_model(model_path)

In [None]:
# Plotting AUC and loss curves
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Treino')
plt.plot(history.history['val_loss'], label='Validação')
plt.xlabel('Época')
plt.ylabel('Erro')
plt.title('Erro de Treinamento e de Validação')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['auc'], label='Treino')
plt.plot(history.history['val_auc'], label='Validação')
plt.xlabel('Época')
plt.ylabel('AUC')
plt.title('AUC de Treinamento e de Validação')
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
# Evaluation and Metrics
predictions = model.predict(validation_generator)
y_true = validation_generator.classes
y_pred = predictions.argmax(axis=1)

confusion_mat = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(confusion_mat)

false_positive_rate = confusion_mat[0, 1] / (confusion_mat[0, 1] + confusion_mat[0, 0])
print("False Positive Rate:", false_positive_rate)

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools

# Obtém as classes reais e as classes previstas
y_true = validation_generator.classes
y_pred = predictions.argmax(axis=1)

# Calcula a matriz de confusão
cm = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix with total numbers
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.colorbar()

# Obtém os rótulos das classes
class_names = validation_generator.class_indices
class_names = list(class_names.keys())

tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45)
plt.yticks(tick_marks, class_names)

# Calculate the total numbers in each group
group_counts = ['{0:0.0f}'.format(value) for value in cm.flatten()]
group_percentages = ['{0:.2%}'.format(value) for value in cm.flatten() / np.sum(cm)]

# Add the total numbers to the plot
labels = [f"{v1}\n{v2}" for v1, v2 in zip(group_counts, group_percentages)]
labels = np.asarray(labels).reshape(cm.shape[0], cm.shape[1])

# Show the total numbers in each group
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, labels[i, j], horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

plt.xlabel('Predito')
plt.ylabel('Verdadeiro')
plt.title('Matriz de confusão')
plt.show()

