In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import ResNet50V2

tf.__version__

'2.5.0'

In [2]:
!git clone https://github.com/CartagenaMinas/Melanomas.git

fatal: destination path 'Melanomas' already exists and is not an empty directory.


In [3]:
import os
os.listdir("Melanomas")

['Keras_baseline.ipynb', 'data', 'exploracion.ipynb', '.git']

In [4]:
PATH="Melanomas/data"
os.listdir(PATH)

['jpeg224',
 'test.csv',
 'sample_submission.csv',
 'train.csv',
 'train_split.csv',
 'val_split.csv',
 'subset.csv']

In [5]:
import pandas as pd
train=pd.read_csv(f"{PATH}/subset.csv")
val=pd.read_csv(f"{PATH}/val_split.csv")
train.shape,val.shape

((2220, 8), (10932, 8))

In [6]:
train.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_0533349,IP_5208504,female,45.0,lower extremity,unknown,benign,0
1,ISIC_8814612,IP_0414408,male,50.0,torso,unknown,benign,0
2,ISIC_6515241,IP_6245507,male,45.0,lower extremity,unknown,benign,0
3,ISIC_5075261,IP_2117218,male,40.0,upper extremity,unknown,benign,0
4,ISIC_2624460,IP_1969685,male,50.0,torso,unknown,benign,0


In [16]:
BATCH_SIZE = 64
AUTO = tf.data.experimental.AUTOTUNE

def decode(filename, label):
    img = tf.io.read_file(filename)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32)
    return img, label

def augment(img, label):
    # augmentation
    #img = tf.image.flip_left_right(img)
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_flip_up_down(img)
    return img, label

def load_ds(df):
    options = tf.data.Options()
    options.experimental_deterministic = False
    imgs, labels = df["image_name"].values, df["target"].values
    imgs = [f'{PATH}/jpeg224/train/{img}.jpg' for img in imgs]
    ds = tf.data.Dataset.from_tensor_slices((imgs, labels))
    ds = ds.with_options(options)
    ds = ds.map(decode, num_parallel_calls=AUTO)
    #ds = ds.cache()
    ds = ds.map(augment, num_parallel_calls=AUTO)
    ds = ds.shuffle(2048)
    ds = ds.batch(BATCH_SIZE)
    ds = ds.prefetch(buffer_size=AUTO)
    return ds

In [17]:
train_ds=load_ds(train)
val_ds=load_ds(val)

In [18]:
IMAGE_SIZE = (224, 224, 3)

encoder = ResNet50V2(
    include_top=False,
    input_shape=IMAGE_SIZE,
    weights='imagenet'
)
encoder.trainable = False

inputs = keras.Input(shape=IMAGE_SIZE)
x = keras.layers.experimental.preprocessing.Rescaling(1./255)(inputs)
x = encoder(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
outputs = keras.layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
rescaling_1 (Rescaling)      (None, 224, 224, 3)       0         
_________________________________________________________________
resnet50v2 (Functional)      (None, 7, 7, 2048)        23564800  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 2049      
Total params: 23,566,849
Trainable params: 2,049
Non-trainable params: 23,564,800
_________________________________________________________________


In [20]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.AUC(name="auc")]
)

In [21]:
filepath="./checkpoints/checkpoint"
cb=tf.keras.callbacks.ModelCheckpoint(
    filepath=filepath,
    monitor="val_auc",
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode="max"
)

In [22]:
model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds,
    validation_steps=10,
    callbacks=[cb])

Epoch 1/10

Epoch 00001: val_auc improved from -inf to 0.55514, saving model to ./checkpoints/checkpoint
Epoch 2/10

Epoch 00002: val_auc improved from 0.55514 to 0.61368, saving model to ./checkpoints/checkpoint
Epoch 3/10

Epoch 00003: val_auc improved from 0.61368 to 0.73946, saving model to ./checkpoints/checkpoint
Epoch 4/10

Epoch 00004: val_auc did not improve from 0.73946
Epoch 5/10

Epoch 00005: val_auc did not improve from 0.73946
Epoch 6/10

Epoch 00006: val_auc improved from 0.73946 to 0.81483, saving model to ./checkpoints/checkpoint
Epoch 7/10

Epoch 00007: val_auc did not improve from 0.81483
Epoch 8/10

Epoch 00008: val_auc did not improve from 0.81483
Epoch 9/10

Epoch 00009: val_auc improved from 0.81483 to 0.81498, saving model to ./checkpoints/checkpoint
Epoch 10/10

Epoch 00010: val_auc did not improve from 0.81498


<tensorflow.python.keras.callbacks.History at 0x7f605e29e3d0>

In [23]:
#Cargar los Pesos
model.load_weights(filepath)
model.save("model.h5")



In [24]:
model.evaluate(val_ds)



[0.07765401899814606, 0.7894809246063232]

In [25]:
m=tf.keras.metrics.AUC()
aucs=[]

for imgs, labels in val_ds:
  imgs_lr=tf.image.flip_left_right(imgs)
  imgs_ud=tf.image.flip_up_down(imgs)
  preds=(model.predict(imgs)+model.predict(imgs_lr)+model.predict(imgs_ud))/3
  _=m.update_state(labels,preds)#Calcula la metrica
  aucs.append(m.result().numpy())


In [26]:
import numpy as np

np.mean(aucs)

0.7777581

In [27]:
test=pd.read_csv(f"{PATH}/test.csv")
test.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge
0,ISIC_0052060,IP_3579794,male,70.0,
1,ISIC_0052349,IP_7782715,male,40.0,lower extremity
2,ISIC_0058510,IP_7960270,female,55.0,torso
3,ISIC_0073313,IP_6375035,female,50.0,torso
4,ISIC_0073502,IP_0589375,female,45.0,lower extremity


In [28]:
BATCH_SIZE=256

def decode_test(name):
  img=tf.io.read_file(name)#lo lee en byts
  img=tf.image.decode_jpeg(img,channels=3)#lo carga como imagen
  img=tf.cast(img,tf.float32)#/255#cambia la presicion
  return img

def load_test_ds(df):
  imgs=df["image_name"].values#Creo una lista 
  imgs=[f"{PATH}/jpeg224/test/{name}.jpg" for name in imgs]#Creo una lista con mis rutas
  ds=tf.data.Dataset.from_tensor_slices(imgs)
  ds=ds.map(decode_test)
  ds=ds.batch(BATCH_SIZE)
  return ds

In [29]:
test_ds=load_test_ds(test)

In [30]:
preds = []

for imgs in test_ds:
  imgs_lr=tf.image.flip_left_right(imgs)
  imgs_ud=tf.image.flip_up_down(imgs)
  _preds=(model.predict(imgs)+model.predict(imgs_lr)+model.predict(imgs_ud))/3
  preds += _preds.ravel().tolist()

In [31]:
preds

[0.005456258077174425,
 0.003947441000491381,
 0.005934633780270815,
 0.0004585131537169218,
 0.0030168790835887194,
 0.012647464871406555,
 0.0033391450997442007,
 0.20865564048290253,
 0.009170307777822018,
 0.012854748405516148,
 0.011033720336854458,
 0.0013582626124843955,
 0.026287948712706566,
 0.000691301713231951,
 0.06317060440778732,
 0.0050403159111738205,
 0.0026162590365856886,
 0.0011795168975368142,
 0.04333004355430603,
 0.15258543193340302,
 0.018319470807909966,
 0.0019559829961508512,
 0.036599770188331604,
 0.006841955706477165,
 0.18828296661376953,
 0.012596399523317814,
 0.0009397279936820269,
 0.003729534102603793,
 0.004165541846305132,
 0.0020914613269269466,
 0.013763323426246643,
 0.0015143383061513305,
 0.11745921522378922,
 0.043167468160390854,
 0.044464025646448135,
 0.0018778688972815871,
 0.006840792950242758,
 0.07051756978034973,
 0.002778249094262719,
 0.007330911699682474,
 0.0022420065943151712,
 0.08376555889844894,
 0.08401497453451157,
 0.0034

In [32]:
submission=pd.DataFrame({
    "image_name":test["image_name"].values,"target":preds
})
submission.head()

Unnamed: 0,image_name,target
0,ISIC_0052060,0.005456
1,ISIC_0052349,0.003947
2,ISIC_0058510,0.005935
3,ISIC_0073313,0.000459
4,ISIC_0073502,0.003017


In [33]:
submission.to_csv("sumissionDA4.csv",index=False)