# Small Model v1

In [None]:
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf


import pathlib
import pandas as pd

import pydicom # for DICOM images
from skimage.transform import resize

import matplotlib.pyplot as plt
from matplotlib import gridspec
%matplotlib inline

In [None]:
print(tf.__version__)

In [None]:
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

In [None]:
# Directory
directory = '/Users/renau/Desktop/DATA/data_proj_melanoma'

# Import the 2 csv s
train_df = pd.read_csv(directory + '/train.csv')
test_df = pd.read_csv(directory + '/test.csv')

print('Train has {:,} rows and Test has {:,} rows.'.format(len(train_df), len(test_df)))

# Change columns names
new_names = ['dcm_name', 'ID', 'sex', 'age', 'anatomy', 'diagnosis', 'benign_malignant', 'target']
train_df.columns = new_names
test_df.columns = new_names[:5]

In [None]:
# === DICOM ===
# Create the paths
path_train = directory + '/train/' + train_df['dcm_name'] + '.dcm'
path_test = directory + '/test/' + test_df['dcm_name'] + '.dcm'

# Append to the original dataframes
train_df['path_dicom'] = path_train
test_df['path_dicom'] = path_test

# === JPEG ===
# Create the paths
path_train = directory + '/jpeg/train/' + train_df['dcm_name'] + '.jpg'
path_test = directory + '/jpeg/test/' + test_df['dcm_name'] + '.jpg'

# Append to the original dataframes
train_df['path_jpeg'] = path_train
test_df['path_jpeg'] = path_test

In [None]:
def show_images(data, n = 5, rows=1, cols=5, title='Default'):
    plt.figure(figsize=(16,4))

    for k, path in enumerate(data['path_dicom'][:n]):
        image = pydicom.read_file(path)
        image = image.pixel_array
        
        image = resize(image, (200, 200), anti_aliasing=True)

        plt.suptitle(title, fontsize = 16)
        plt.subplot(rows, cols, k+1)
        plt.imshow(image)
        plt.axis('off')

In [None]:
show_images(train_df[train_df['target'] == 0], n=5, rows=1, cols=5, title='Benign Sample')

In [None]:
# Show Malignant Samples
show_images(train_df[train_df['target'] == 1], n=5, rows=1, cols=5, title='Malignant Sample')

In [None]:
train_df.head()

In [None]:
file_paths = train_df['path_jpeg'].values
labels = train_df['target'].values
train_ds = tf.data.Dataset.from_tensor_slices((file_paths, labels))

In [None]:
ex_ds = train_ds.take(3)
for element in ex_ds :
    print(element[0].numpy(),'and target: ',element[1].numpy())

In [None]:
def read_image(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_image(image, channels=3, dtype = tf.uint8, expand_animations = False)
    return image, label
dstry = train_ds.map(read_image)

In [None]:
ex_ds = dstry.take(3)
for element in ex_ds :
    print(element[0].numpy().shape)

In [None]:
cut = 24000
ds_train = dstry.take(cut) 
ds_val = dstry.skip(cut)

def adapt_data(image, label):
    image = tf.image.resize(image, [128,128])
    return image, label

def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label
    
ds_train= ds_train.map(adapt_data).map(convert_to_float).batch(64)
ds_val= ds_val.map(adapt_data).map(convert_to_float).batch(64)

ex_ds = ds_train.take(3)
for element in ex_ds :
    print(element[0].shape)
    print(element[1].shape)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

ds_train = ds_train.cache().prefetch(buffer_size=AUTOTUNE)
ds_val = ds_val.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
from tensorflow import keras
from tensorflow.keras import layers


model = keras.Sequential([
    # Block One
    layers.Conv2D(filters=32, kernel_size=3, activation='relu', padding='same',
                  input_shape=[128,128, 3]),
    layers.MaxPool2D(),

    # Block Two
    layers.Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Block Three
    layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
    layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Head
    layers.Flatten(),
    layers.Dense(6, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid'),
])


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=0.01),
    loss='binary_crossentropy',
    metrics=['binary_accuracy']
)

In [None]:
history = model.fit(
    ds_train,
    validation_data=ds_val,
    epochs=100,
)

In [None]:
import pandas as pd
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

In [None]:
predictions = model.predict(ds_val)
predictions = tf.argmax(predictions, axis=-1)
b =0
m = 0
for element in predictions :
    if(element.numpy() != 0) :
        m +=1
    else :
        b +=1
print("Prédictions ;\n - Malin(s): ", m, "\n - Bénins: ", b)