### BINARY CLASSIFIER: PAD-UFES
---

In [1]:
#basic
import pandas as pd
import numpy as np

#keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import \
    Input, Dense, Conv2D, GlobalAveragePooling2D, Flatten,\
    MaxPooling2D, Dropout, Resizing, Rescaling, RandomContrast,\
    RandomCrop, RandomFlip, RandomRotation, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import load_img, img_to_array

#sklearn
from sklearn.model_selection import train_test_split

#cascid
from cascid.configs import config, pad_ufes
from cascid import database

#utils
from utils import transform_diagnose_to_binary, read_data

2022-10-07 17:33:25.877943: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-07 17:33:26.187603: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-10-07 17:33:26.994835: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-10-07 17:33:26.994903: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

2022-10-07 17:33:28.340166: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-07 17:33:28.370845: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-07 17:33:28.371033: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


### Setting paths

In [3]:
FERNANDO_PATH = config.DATA_DIR / 'experiments' / 'fernando'
FERNANDO_PATH.mkdir(exist_ok=True, parents=True)

IMAGE_CACHE = FERNANDO_PATH / 'img_cache.pkl'
FEATURES_FILE = FERNANDO_PATH / 'features.pkl'
MODEL_PATH = FERNANDO_PATH / 'models' / 'deep_learning'

IMDIR = pad_ufes.IMAGES_DIR

### Globals

In [4]:
RANDOM_STATE = 42
TRAIN_SIZE = 0.7
VALIDATION_SIZE = 0.15
TEST_SIZE = 0.15
EPOCHS = 3000
IMAGE_SHAPE = (64, 64, 3)
IMAGES_ON_GPG = 64
BATCH_SIZE = 32

### Get images

In [5]:
pad_ufes_df = read_data(image_shape=IMAGE_SHAPE)

### Change to binary

In [6]:
diagnose_to_binary_dict = {
    "BCC": 1, "SCC": 1, "MEL": 1,
    "ACK": 0, "NEV": 0, "SEK": 0}
dataframe_to_binary = pad_ufes_df.copy()
dataframe_to_binary["diagnostic_binary"] = dataframe_to_binary["diagnostic"].apply(lambda diagnostic: 
    transform_diagnose_to_binary(diagnostic, diagnose_to_binary_dict))

In [7]:
dataframe_to_binary.columns

Index(['patient_id', 'lesion_id', 'smoke', 'drink', 'background_father',
       'background_mother', 'age', 'pesticide', 'gender',
       'skin_cancer_history', 'cancer_history', 'has_piped_water',
       'has_sewage_system', 'fitspatrick', 'region', 'diameter_1',
       'diameter_2', 'diagnostic', 'itch', 'grew', 'hurt', 'changed', 'bleed',
       'elevation', 'img_id', 'biopsed', 'image_array', 'diagnostic_binary'],
      dtype='object')

In [8]:
filtered_df = dataframe_to_binary[["image_array","diagnostic_binary"]].copy()
filtered_df.rename(columns = {"image_array":"x", "diagnostic_binary": "y"}, inplace = True)

In [9]:
filtered_df.head()

Unnamed: 0,x,y
0,"[[[0.7058823529411765, 0.6, 0.5176470588235295...",0
1,"[[[0.8235294117647058, 0.6745098039215687, 0.6...",1
2,"[[[0.592156862745098, 0.34901960784313724, 0.2...",0
3,"[[[0.8784313725490196, 0.7333333333333333, 0.6...",0
4,"[[[0.7058823529411765, 0.49019607843137253, 0....",0


### Train and test split

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(filtered_df["x"], filtered_df["y"], test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

### Transform to tensor data

In [11]:
with tf.device("/GPU:0"):
    x = tf.constant(filtered_df["x"].tolist())
    y = tf.constant(filtered_df["y"].tolist())

2022-10-07 17:34:23.048022: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-07 17:34:23.049004: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-07 17:34:23.049217: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-07 17:34:23.049330: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

### Building model

In [12]:
input_shape = IMAGE_SHAPE
model = Sequential(
    [
        Input(input_shape),
        RandomContrast(factor=0.3,seed=RANDOM_STATE),
        RandomFlip(mode="horizontal_and_vertical",seed=RANDOM_STATE),
        RandomRotation(factor=(-0.3, 0.3), fill_mode="nearest", interpolation="bilinear", seed=RANDOM_STATE),    
        Conv2D(64, kernel_size=(7, 7), activation='relu', name="TopConv1"),
        Conv2D(64, kernel_size=(7, 7), activation='relu', name="TopConv2"),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2), name="TopBatchNorm"),
        Conv2D(32, kernel_size=(5, 5), activation='relu', name="CenterConv1"),
        Conv2D(32, kernel_size=(5, 5), activation='relu', name="CenterConv2"),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2), name="CenterBatchNorm"),
        Conv2D(16, kernel_size=(3, 3), activation='relu', name="BottomConv1"),
        Conv2D(16, kernel_size=(3, 3), activation='relu', name="BottomConv2"),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2), name="BottomBatchNorm"),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32),
        Dropout(0.2),
        Dense(32),
        Dense(1, activation='sigmoid'),
    ]
 )




In [13]:
model.compile(
    optimizer='adam',
    loss=keras.losses.BinaryCrossentropy(),
    metrics=["accuracy"])   

In [14]:
es = EarlyStopping(
    monitor='val_loss',
    mode='min',
    verbose=1,
    patience=30,
    restore_best_weights=True)

### Training

In [16]:
history = model.fit(
    x,
    y,
    callbacks=es,
    epochs=1000,
    validation_split=0.2,
    verbose=1,
    batch_size=BATCH_SIZE
    )

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [25]:
model.save(MODEL_PATH)






INFO:tensorflow:Assets written to: /home/fernandofincatti/.cascid_data/experiments/fernando/models/deep_learning/assets


INFO:tensorflow:Assets written to: /home/fernandofincatti/.cascid_data/experiments/fernando/models/deep_learning/assets


In [24]:
model.evaluate(
    np.asarray(X_test).astype(np.float32),
    np.asarray(y_test).astype(np.float32))

ValueError: setting an array element with a sequence.