### BINARY CLASSIFIER: PAD-UFES
---

In [1]:
#basic
import pandas as pd
import numpy as np

#keras
import tensorflow as tf
from tensorflow import keras

#sklearn
from sklearn.model_selection import train_test_split

#cascid
from cascid.configs import config, pad_ufes
from cascid import database

#utils
from utils import transform_diagnose_to_binary, read_data

2022-10-07 11:49:23.647389: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-07 11:49:23.759240: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-10-07 11:49:24.207913: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-10-07 11:49:24.207952: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

### Setting paths

In [2]:
FERNANDO_PATH = config.DATA_DIR / 'experiments' / 'fernando'
FERNANDO_PATH.mkdir(exist_ok=True, parents=True)

IMAGE_CACHE = FERNANDO_PATH / 'img_cache.pkl'
FEATURES_FILE = FERNANDO_PATH / 'features.pkl'
MODEL_PATH = FERNANDO_PATH / 'models' / 'deep_learning'

IMDIR = pad_ufes.IMAGES_DIR

### Globals

In [3]:
RANDOM_STATE = 42
TRAIN_SIZE = 0.7
VALIDATION_SIZE = 0.15
TEST_SIZE = 0.15
EPOCHS = 3000
IMAGE_SHAPE = (64, 64, 3)
IMAGES_ON_GPG = 64
BATCH_SIZE = 64

### Get images

In [4]:
pad_ufes_df = read_data(image_shape=IMAGE_SHAPE)

### Change to binary

In [5]:
diagnose_to_binary_dict = {
    "BCC": 1, "SCC": 1, "MEL": 1,
    "ACK": 0, "NEV": 0, "SEK": 0}
dataframe_to_binary = pad_ufes_df.copy()
dataframe_to_binary["diagnostic_binary"] = dataframe_to_binary["diagnostic"].apply(lambda diagnostic: 
    transform_diagnose_to_binary(diagnostic, diagnose_to_binary_dict))

In [6]:
dataframe_to_binary.columns

Index(['patient_id', 'lesion_id', 'smoke', 'drink', 'background_father',
       'background_mother', 'age', 'pesticide', 'gender',
       'skin_cancer_history', 'cancer_history', 'has_piped_water',
       'has_sewage_system', 'fitspatrick', 'region', 'diameter_1',
       'diameter_2', 'diagnostic', 'itch', 'grew', 'hurt', 'changed', 'bleed',
       'elevation', 'img_id', 'biopsed', 'image_array', 'diagnostic_binary'],
      dtype='object')

In [7]:
filtered_df = dataframe_to_binary[["image_array","diagnostic_binary"]].copy()
filtered_df.rename(columns = {"image_array":"x", "diagnostic_binary": "y"}, inplace = True)

In [8]:
filtered_df.head()

Unnamed: 0,x,y
0,"[[[0.7058823529411765, 0.6, 0.5176470588235295...",0
1,"[[[0.8235294117647058, 0.6745098039215687, 0.6...",1
2,"[[[0.592156862745098, 0.34901960784313724, 0.2...",0
3,"[[[0.8784313725490196, 0.7333333333333333, 0.6...",0
4,"[[[0.7058823529411765, 0.49019607843137253, 0....",0


### Transform to tensor data

In [9]:
tf_dataset = tf.data.Dataset.from_tensor_slices((
    filtered_df["x"].tolist(),
    filtered_df["y"].tolist()
)).batch(IMAGES_ON_GPG)

2022-10-07 11:50:16.415729: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-07 11:50:16.419812: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-07 11:50:16.419947: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-07 11:50:16.420214: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [10]:
for element in tf_dataset.as_numpy_iterator():
  print(element)

2022-10-07 11:50:23.796175: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 225902592 exceeds 10% of free system memory.


(array([[[[0.70588235, 0.6       , 0.51764706],
         [0.75686275, 0.69411765, 0.64313725],
         [0.77254902, 0.7254902 , 0.69019608],
         ...,
         [0.79215686, 0.69803922, 0.61176471],
         [0.78039216, 0.68235294, 0.56470588],
         [0.76470588, 0.68627451, 0.58823529]],

        [[0.75686275, 0.6745098 , 0.61568627],
         [0.70980392, 0.64705882, 0.58823529],
         [0.74117647, 0.67843137, 0.61960784],
         ...,
         [0.81960784, 0.71764706, 0.62745098],
         [0.77647059, 0.68627451, 0.59607843],
         [0.78039216, 0.70588235, 0.60784314]],

        [[0.69803922, 0.62745098, 0.5372549 ],
         [0.75294118, 0.69411765, 0.61568627],
         [0.74117647, 0.69019608, 0.60392157],
         ...,
         [0.76862745, 0.68627451, 0.56470588],
         [0.71372549, 0.63529412, 0.50980392],
         [0.78039216, 0.68235294, 0.58039216]],

        ...,

        [[0.47843137, 0.34901961, 0.24705882],
         [0.43921569, 0.30196078, 0.21568627