In [1]:
from glob import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys, os

from tensorflow.keras.layers import Input, Dense, Flatten, Dropout
from tensorflow.keras.applications.vgg16 import VGG16 as PretrainedModel, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_path = './data/train'
valid_path = './data/test'
train_image_files = glob(train_path +'/*/*.jpg')
valid_image_files = glob(valid_path +'/*/*.jpg')

In [2]:
folders = glob(train_path + '/*')
print(folders)

['./data/train\\melanoma', './data/train\\no_melanoma']


In [3]:
IMAGE_SIZE = [150, 150]
ptm = PretrainedModel(
    input_shape=IMAGE_SIZE+[3],
    weights='imagenet',
    include_top=False)

#freeze pre-trained model weights
ptm.trainable = False

In [4]:
#map the data into features vectors
K = len(folders) #number of classes
x = Flatten()(ptm.output)
#x = Dense(K,activation='softmax')(x)

model = Model(inputs=ptm.input, outputs=x)

In [5]:
#Generador para el conjunto de entrenamiento
batch_size = 16

#Se crea un Image Generator 
gen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = gen.flow_from_directory(
    train_path,
    target_size=IMAGE_SIZE,
    batch_size=batch_size,
    class_mode = 'binary'
)
# Generador para el conjunto de validación
valid_generator = gen.flow_from_directory(
    valid_path,
    target_size=IMAGE_SIZE,
    batch_size=batch_size,
    class_mode = 'binary'
)

Found 6916 images belonging to 2 classes.
Found 1729 images belonging to 2 classes.


In [None]:
N_train = len(train_image_files)
N_valid = len(valid_image_files)

#figure output size
feat = model.predict(np.random.random([1]+IMAGE_SIZE+[3]))
D = feat.shape[1]

X_train = np.zeros((N_train,D))
Y_train = np.zeros(N_train)
X_valid = np.zeros((N_valid,D))
Y_valid = np.zeros(N_valid)

In [None]:
i = 0
for x, y in train_generator:
    D = model.predict(x)
     #get features
    features = model.predict(x)
  
    #size of the batch (not always is the size of the batch)
    sz = len(y) #nuber of samples on this batch
    X_train[i:i+sz]=features
    Y_train[i:i+sz]=y

    i += sz
    if i%1280==0:
        print(i)

    if i>=N_train:
        print('breaking now')
        break

print(i)

In [None]:
np.save('X_train_VGG.npy', X_train)
np.save('Y_train_VGG.npy', Y_train)

In [None]:
i = 0
for x, y in valid_generator:
    D = model.predict(x)
     #get features
    features = model.predict(x)
  
    #size of the batch (not always is the size of the batch)
    sz = len(y) #nuber of samples on this batch
    X_valid[i:i+sz]=features
    Y_valid[i:i+sz]=y

    i += sz
    if i%1280==0:
        print(i)

    if i>=N_valid:
        print('breaking now')
        break

print(i)

In [None]:
np.save('X_valid_VGG.npy', X_valid)
np.save('Y_valid_VGG.npy', Y_valid)

In [6]:
X_train = np.load('X_train_VGG.npy')
Y_train = np.load('Y_train_VGG.npy')
X_valid = np.load('X_valid_VGG.npy')
Y_valid = np.load('Y_valid_VGG.npy')

In [7]:
import tensorflow as tf

METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
]

feat = model.predict(np.random.random([1]+IMAGE_SIZE+[3]))
D = feat.shape[1]

#Modelo de red neuronal para clasificación binaria
in3 = Input(shape=(D,))
x = Dense(2048,activation='relu')(in3)
x = Dropout(0.2)(x)
x = Dense(1024,activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(512,activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(256,activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(1,activation='sigmoid')(x)

mela_model = Model(in3,x)

mela_model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=METRICS
)

In [8]:
r3 = mela_model.fit(
    X_train, Y_train,
    batch_size=128,
    epochs=20,
    validation_data=(X_valid,Y_valid)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [9]:
test = pd.read_csv('sample.csv')
test['ID']=test['ID']+'.jpg'
test.head()

Unnamed: 0,ID,riesgo
0,c890196d33d6b569680c6f089bc83b95.jpg,0.5
1,93ead62bbb22ed4ef466d308fd32476e.jpg,0.5
2,752ba789481175a51f5f986edae665ac.jpg,0.5
3,d6428bef7cf365baaeab1584f65ac80b.jpg,0.5
4,baf3e9e642184e461cf9f7f5b9c0d06e.jpg,0.5


In [10]:
path = './test/test/'
import cv2
prob = []
for file in list(test['ID']):
    img = cv2.imread(path+file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    res = cv2.resize(img, dsize=(150,150))
    x = image.img_to_array(res)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    feat = model.predict(x)
    mela = mela_model.predict(feat)
    #print(mela)
    prob.append(mela[0][0])

In [11]:
test_2 = pd.read_csv('sample.csv')
test_2['riesgo'] = prob
test_2.tail(5)

Unnamed: 0,ID,riesgo
9933,a2ad05042cc442972546c81ebf564d17,0.999545
9934,1cfc9c0844fc8c39c6db211fb57a4b00,1.0
9935,4e4deb3e76c2f32011f0c54a83864571,1.0
9936,d4d51aaef0b34007705be379219f56f3,1.0
9937,48ca1384ceab3f5261dedf80196cef9b,1.0


In [12]:
test_2['riesgo']=1-test_2['riesgo']

In [13]:
test_2.to_csv('test_VGG.csv', index=False)

In [14]:
mela_model.save('vgg_model_150x150.h5')