In [15]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import ResNet50V2
from sklearn.model_selection import StratifiedKFold 
import numpy as np
from tensorflow.keras.models import load_model

 
tf.__version__

'2.5.0'

In [2]:
!git clone https://github.com/CartagenaMinas/Melanomas.git

Cloning into 'Melanomas'...
remote: Enumerating objects: 43603, done.[K
remote: Total 43603 (delta 0), reused 0 (delta 0), pack-reused 43603[K
Receiving objects: 100% (43603/43603), 219.76 MiB | 33.86 MiB/s, done.
Resolving deltas: 100% (3/3), done.
Checking out files: 100% (44116/44116), done.


In [3]:
import os
os.listdir("Melanomas")

['Keras_baseline.ipynb', 'data', 'exploracion.ipynb', '.git']

In [4]:
PATH="Melanomas/data"
os.listdir(PATH)

['jpeg224',
 'test.csv',
 'sample_submission.csv',
 'train.csv',
 'train_split.csv',
 'val_split.csv',
 'subset.csv']

In [5]:
import pandas as pd
train=pd.read_csv(f"{PATH}/subset.csv")
#train=pd.read_csv(f"{PATH}/train.csv")
val=pd.read_csv(f"{PATH}/val_split.csv")
train.shape,val.shape

((2220, 8), (10932, 8))

In [6]:
train.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_0533349,IP_5208504,female,45.0,lower extremity,unknown,benign,0
1,ISIC_8814612,IP_0414408,male,50.0,torso,unknown,benign,0
2,ISIC_6515241,IP_6245507,male,45.0,lower extremity,unknown,benign,0
3,ISIC_5075261,IP_2117218,male,40.0,upper extremity,unknown,benign,0
4,ISIC_2624460,IP_1969685,male,50.0,torso,unknown,benign,0


In [7]:
BATCH_SIZE=64
AUTO=tf.data.experimental.AUTOTUNE

def decode(name,label):
  img=tf.io.read_file(name)#lo lee en byts
  img=tf.image.decode_jpeg(img,channels=3)#lo carga como imagen
  img=tf.cast(img,tf.float32)#/255#cambia la presicion
  return img,label

def load_ds(df):
  options=tf.data.Options()
  options.experimental_deterministic=False#Para que lo cargue en orden aleatorio
  imgs, labels=df["image_name"].values,df["target"].values#Creo una lista 
  imgs=[f"{PATH}/jpeg224/train/{name}.jpg" for name in imgs]#Creo una lista con mis rutas
  ds=tf.data.Dataset.from_tensor_slices((imgs,labels))
  ds=ds.with_options(options)
  ds=ds.map(decode,num_parallel_calls=AUTO)#uso todos los procesadores
  ds=ds.cache()#Guarda la imagen en memoria, lo puedo usar si tengo mucha ram
  ds=ds.shuffle(2048)
  ds=ds.batch(BATCH_SIZE)
  ds=ds.prefetch(buffer_size=AUTO) #Mientras la gpu calcula la cpu carga imagenes
  return ds



In [8]:
FOLDS=3
aucs=[]
skf=StratifiedKFold(n_splits=FOLDS, random_state=42, shuffle=True)
for f, (train_index, val_index) in enumerate(skf.split(X=np.zeros(len(train)), y=train["target"])):
  print("Fold: ", f+1)
  #print(train_index.shape,val_index.shape)

  train_fold=train.iloc[train_index]
  val_flod=train.iloc[val_index]

  train_ds=load_ds(train_fold)
  val_ds=load_ds(val_flod)

  IMAGE_SIZE= (224,224,3)

  encoder=ResNet50V2(
      include_top=False,
      input_shape=IMAGE_SIZE,
      weights="imagenet"
  )
  encoder.trainable=False #No aplicar los gradientes calculados

  inputs=keras.Input(shape=IMAGE_SIZE)
  x=keras.layers.experimental.preprocessing.Rescaling(1./255)(inputs)
  x=encoder(x,training=False)#No calcula los gradientes
  x=keras.layers.GlobalAveragePooling2D()(x)
  outputs=keras.layers.Dense(1,activation="sigmoid")(x)
  model=keras.Model(inputs,outputs)
  #model.summary()

  model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.AUC(name="auc")])
  filepath="./checkpoints/checkpoint"
  cb=tf.keras.callbacks.ModelCheckpoint(
      filepath=filepath,
      monitor="val_auc",
      verbose=1,
      save_best_only=True,
      save_weights_only=True,
      mode="max")
  model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds,
    #validation_steps=10,
    callbacks=[cb])
  
  #Cargar los Pesos
  model.load_weights(filepath)
  model.save(f"model_fols{f+1}.h5")

  _, auc = model.evaluate(val_ds)
  aucs.append(auc)



Fold:  1
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10

Epoch 00001: val_auc improved from -inf to 0.39366, saving model to ./checkpoints/checkpoint
Epoch 2/10

Epoch 00002: val_auc improved from 0.39366 to 0.43022, saving model to ./checkpoints/checkpoint
Epoch 3/10

Epoch 00003: val_auc improved from 0.43022 to 0.56581, saving model to ./checkpoints/checkpoint
Epoch 4/10

Epoch 00004: val_auc improved from 0.56581 to 0.60168, saving model to ./checkpoints/checkpoint
Epoch 5/10

Epoch 00005: val_auc improved from 0.60168 to 0.65141, saving model to ./checkpoints/checkpoint
Epoch 6/10

Epoch 00006: val_auc did not improve from 0.65141
Epoch 7/10

Epoch 00007: val_auc improved from 0.65141 to 0.65332, saving model to ./checkpoints/checkpoint
Epoch 8/10

Epoch 00008: val_auc improved from 0.65332 to 0.67506, saving model to ./checkpoints/checkpoint
Epoch 9/10

Epoch 00009: val_au



Fold:  2
Epoch 1/10

Epoch 00001: val_auc improved from -inf to 0.50243, saving model to ./checkpoints/checkpoint
Epoch 2/10

Epoch 00002: val_auc improved from 0.50243 to 0.76801, saving model to ./checkpoints/checkpoint
Epoch 3/10

Epoch 00003: val_auc improved from 0.76801 to 0.83203, saving model to ./checkpoints/checkpoint
Epoch 4/10

Epoch 00004: val_auc improved from 0.83203 to 0.84975, saving model to ./checkpoints/checkpoint
Epoch 5/10

Epoch 00005: val_auc improved from 0.84975 to 0.86531, saving model to ./checkpoints/checkpoint
Epoch 6/10

Epoch 00006: val_auc improved from 0.86531 to 0.87885, saving model to ./checkpoints/checkpoint
Epoch 7/10

Epoch 00007: val_auc improved from 0.87885 to 0.88266, saving model to ./checkpoints/checkpoint
Epoch 8/10

Epoch 00008: val_auc improved from 0.88266 to 0.88890, saving model to ./checkpoints/checkpoint
Epoch 9/10

Epoch 00009: val_auc improved from 0.88890 to 0.89144, saving model to ./checkpoints/checkpoint
Epoch 10/10

Epoch 000

In [9]:
aucs

[0.6750608086585999, 0.8914400339126587, 0.6570204496383667]

In [10]:
np.mean(aucs),np.std(aucs)

(0.7411737640698751, 0.10650924068750381)

In [11]:
test=pd.read_csv(f"{PATH}/test.csv")
test.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge
0,ISIC_0052060,IP_3579794,male,70.0,
1,ISIC_0052349,IP_7782715,male,40.0,lower extremity
2,ISIC_0058510,IP_7960270,female,55.0,torso
3,ISIC_0073313,IP_6375035,female,50.0,torso
4,ISIC_0073502,IP_0589375,female,45.0,lower extremity


In [12]:
BATCH_SIZE=256

def decode_test(name):
  img=tf.io.read_file(name)#lo lee en byts
  img=tf.image.decode_jpeg(img,channels=3)#lo carga como imagen
  img=tf.cast(img,tf.float32)#/255#cambia la presicion
  return img

def load_test_ds(df):
  imgs=df["image_name"].values#Creo una lista 
  imgs=[f"{PATH}/jpeg224/test/{name}.jpg" for name in imgs]#Creo una lista con mis rutas
  ds=tf.data.Dataset.from_tensor_slices(imgs)
  ds=ds.map(decode_test,num_parallel_calls=AUTO)
  ds=ds.batch(BATCH_SIZE)
  return ds

In [13]:
test_ds=load_test_ds(test)

In [16]:
preds=[]
for f in range(1,FOLDS+1):
  print(f"Folds {f}")
  model_fold=load_model(f"model_fols{f}.h5")
  probas=model_fold.predict(test_ds)
  preds.append(probas)

Folds 1
Folds 2
Folds 3


In [17]:
preds

[array([[0.00737797],
        [0.00311597],
        [0.0080645 ],
        ...,
        [0.01365974],
        [0.00314299],
        [0.02379647]], dtype=float32), array([[0.00101387],
        [0.00850243],
        [0.00898517],
        ...,
        [0.02939772],
        [0.00753286],
        [0.12454367]], dtype=float32), array([[0.01013358],
        [0.00437738],
        [0.00794915],
        ...,
        [0.03528728],
        [0.00218613],
        [0.0432093 ]], dtype=float32)]

In [18]:
preds_mean =np.mean(preds,axis=0)
preds_mean

array([[0.00617514],
       [0.00533193],
       [0.00833294],
       ...,
       [0.02611492],
       [0.00428733],
       [0.06384981]], dtype=float32)

In [19]:
submission=pd.DataFrame({
    "image_name":test["image_name"].values,"target":preds_mean.ravel()
})
submission.head()

Unnamed: 0,image_name,target
0,ISIC_0052060,0.006175
1,ISIC_0052349,0.005332
2,ISIC_0058510,0.008333
3,ISIC_0073313,0.001475
4,ISIC_0073502,0.010302


In [21]:
submission.to_csv("sumissionCV.csv",index=False)