In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sn
import keras
from keras import utils
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation, Flatten, Dropout, Dense
from keras.layers.normalization import BatchNormalization
from keras import backend as K
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
import keras
from sklearn.metrics import roc_curve, roc_auc_score
from CNN_functions import *

**Defino las constantes**

In [7]:
#paths
img_path = '../input/biopsy_images/'
dataset_path = "../output/df_imagenes_completo.csv"

#img size
img_rows = 224
img_cols = 224
number_channels = 3

In [8]:
# Shape of images
input_shape = (img_rows, img_cols, number_channels)
# Number of labels
num_classes = 2

#NN topology
model = Sequential()

chanDim = -1
if K.image_data_format() == "channels_first":
    chanDim = 1
    
model.add(Conv2D(32, (3, 3), padding="same", input_shape=input_shape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation("softmax"))


model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [9]:
df=pd.read_csv(dataset_path)

In [10]:
df["label"] = df["label"].replace("MSI",0)
df["label"] = df["label"].replace("MSS",1)
df["label"].value_counts()

1    117273
0     75039
Name: label, dtype: int64

In [11]:
#aprox. 400 imagenes para validación
train_ratio=0.002

X_train, X_test, y_train, y_test = train_test_split(df.image, df.label, test_size=train_ratio)

In [12]:
# Save the model at the end of every epoch if val_accuracy is better 
filepath='Checkpoint_{epoch:02d}_{val_accuracy:.2f}'
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
# parameters
batchs = 40
epochs = 60
img_number=192312

#Fit model
trainGen=generator(X_train, y_train, batchs*(1-train_ratio), img_path)
testGen=generator(X_test, y_test, batchs*train_ratio, img_path)

model.fit_generator(
    trainGen,
    steps_per_epoch=img_number*(1-train_ratio)//batchs,
    validation_data=testGen,
    validation_steps=img_number*train_ratio//batchs,
    epochs=epochs,
    verbose=1,
    callbacks=callbacks_list)

Epoch 1/60




In [None]:
acc = model.history['accuracy']
val_acc = model.history['val_accuracy']
loss = model.history['loss']
val_loss = model.history['val_loss']

plot_acc_loss(acc, val_acc, loss, val_loss)

In [None]:
testGen = generator(X_test, y_test, batchs, img_path, mode="eval")

In [None]:
predictions = model.predict_generator(testGen, steps=img_number*train_ratio//batchs,verbose=1)

In [None]:
predIdxs = np.argmax(predictions, axis=1)

In [None]:
#print(y_score)
roc = roc_curve(y_test.values, predIdxs)
display(pd.DataFrame({
    "gt":y_test.values,
    "predicted":predIdxs,
    "proba MSI":predictions[:,0],
    "proba MSS":predictions[:,1]
}).head())

plt.plot(roc[0],roc[1])
plt.legend(["AUC: {}".format(round(roc_auc_score(y_test.values, predictions[:,1]),2))])

In [None]:
cm = confusion_matrix(y_test.values, predIdxs)

# Visualiamos la matriz de confusión
cm_df = pd.DataFrame(cm)  
plt.figure(figsize = (7,5))  
sn.set(font_scale=1) #for label size  
sn.heatmap(cm_df, annot=True, annot_kws={"size": 12}, fmt="d") # font size  
plt.show() 

In [None]:
# guardar modelo a JSON
model_json = model.to_json()
with open("model_CNN.json", "w") as json_file:
    json.dump(model_json, json_file)
print("Saved model to disk")

In [None]:
with open('model_prueba.json','r') as f:
    model_json = json.load(f)
loaded_model = model_from_json(model_json)
loaded_model.load_weights(#poner el path del checkpoint bueno")
print("Loaded model from disk")