In [1]:
import pandas as pd
import numpy as np
from scipy.ndimage import zoom, rotate
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import Sequence
import keras.backend as K

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

In [4]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
set_session(tf.Session(config=config))

In [5]:
df = pd.read_csv("./csvs/final_data.csv")

In [6]:
agg_func = {'calcification': 'mean', 'internalStructure': 'mean', 'lobulation': 'mean', 'malignancy': 'mean',
            'margin': 'mean', 'path': 'first', 'sphericity': 'mean', 'spiculation': 'mean', 'subtlety': 'mean', 'texture': 'mean',
            'xf': 'first', 'xi': 'first', 'yf': 'first', 'yi': 'first', 'zf': 'first', 'zi': 'first'}

In [7]:
df = df.groupby("final_id").agg(agg_func)

In [8]:
df[['calcification', 'internalStructure', 'lobulation', 'malignancy', 'margin', 'sphericity', 'spiculation', 'subtlety', 'texture']] = df[['calcification', 'internalStructure', 'lobulation', 'malignancy', 'margin', 'sphericity', 'spiculation', 'subtlety', 'texture']]/df[['calcification', 'internalStructure', 'lobulation', 'malignancy', 'margin', 'sphericity', 'spiculation', 'subtlety', 'texture']].max()

In [9]:
df = df[['path', 'calcification', 'internalStructure', 'lobulation', 'malignancy',
       'margin', 'sphericity', 'spiculation', 'subtlety', 'texture']]

In [10]:
X_train, X_test, y_train, y_test = train_test_split(df.index.tolist(), df, stratify=df.malignancy, test_size=0.2, random_state=22)

ids_train = X_train
ids_test = X_test

In [11]:
tags_train = (df.loc[ids_train].malignancy.values > 0.5).astype(np.int)

In [12]:
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(tags_train),
                                                 tags_train)

In [13]:
n_classes = 1
chanels = 1
batch_size = 70
vol_size = np.array([24, 24, 24])

path_base = './nodules/{}.npz'

In [14]:
def normalize(npzarray):
    maxHU = 400.
    minHU = -1000.
    npzarray = (npzarray - minHU) / (maxHU - minHU)
    npzarray = np.clip(npzarray, 0, 1)
    return npzarray

In [15]:
def get_data(nodule_id):
    tag = int(df.loc[nodule_id].malignancy > 0.5)
    
    file = np.load(path_base.format(nodule_id))
    vol = file['patch']
    file.close()
    
    vol = normalize(vol)
    vol = zoom(vol, vol_size/np.array(vol.shape), order=0)
    return vol, tag

In [16]:
#degrees = [0, 90, -90]
degrees = [0, 90, -90, 80, -80, 70, -70]
axis = [0, 1, 2]

In [17]:
def get_random_params():
    d = np.random.choice(degrees)
    a = np.random.choice(axis, size=2, replace=False)
    return d, a

In [18]:
class Sequence_data(Sequence):

    def __init__(self, data, batch_size, is_training):
        # recive una lista de rutas de donde están los volumenes como numpy arrays y el batch_size
        self.data = data
        self.batch_size = batch_size
        self.is_training = is_training

    def __len__(self):
        return int(np.ceil(len(self.data) / float(self.batch_size)))

    def __getitem__(self, idx):
        # lista de rutas para el batch actual
        batch_x = self.data[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        X = []
        y = []
        
        # se cargan los volumenes y las mascaras en sus respectivos arrays
        for path in batch_x:
            data, tag = get_data(path)
            d, a = get_random_params()
            if self.is_training:
                data = rotate(data, d, a, reshape=False)
            X.append(data)
            
            temp_y = [0, 0]
            temp_y[tag] = 1
            
            y.append(temp_y)
            
        X = np.array(X)
        y = np.array(y)
        
        try:
            resahped = X.reshape((*X.shape, chanels)), y#.reshape(-1, n_classes)
        except Exception as ex:
            print('ojooooooo')
            print(batch_x, ex)
        return resahped

In [19]:
np.random.shuffle(ids_train)
np.random.shuffle(ids_test)

In [20]:
seq_train = Sequence_data(ids_train, batch_size, True)
seq_test = Sequence_data(ids_test, batch_size, False)

In [21]:
from keras.models import Model
from keras.layers import Dense, Dropout, BatchNormalization, GlobalAveragePooling3D, Flatten
from keras.layers import MaxPool3D, Conv3D, Input
from keras.optimizers import SGD

In [22]:
def get_model(size = (128, 128, 64)):
    
    width, height, depth = size

    inputs = Input((width, height, depth, 1))

    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    x = MaxPool3D(pool_size=2)(x)
    
    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    x = MaxPool3D(pool_size=2)(x)
    
    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    x = Conv3D(filters=128, kernel_size=3, activation="relu")(inputs)
    x = BatchNormalization()(x)
    
    x = Flatten()(x)
    outputs = Dense(units=2, activation="softmax")(x)

    # Define the model.
    model = Model(inputs, outputs, name="3dcnn")
    return model

In [23]:
model = get_model(vol_size)

Instructions for updating:
Colocations handled automatically by placer.


In [24]:
#model.save('./cancer_model.hdf5')

In [25]:
model.compile(loss='binary_crossentropy', optimizer=SGD(lr=0.0001), metrics=['accuracy'])

In [26]:
ES = EarlyStopping(patience=20, min_delta=0.00001, restore_best_weights=True)
MCP = ModelCheckpoint(filepath="./weights_cancer/weights_2.{epoch:02d}.hdf5", save_best_only=True, save_weights_only=True)
callbacks = [ES, MCP]

In [27]:
history = model.fit_generator(seq_train, validation_data=seq_test,  class_weight=class_weights , steps_per_epoch=int(len(ids_train)/batch_size), epochs=200, callbacks=callbacks)

Instructions for updating:
Use tf.cast instead.
Epoch 1/200




Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200


In [28]:
pred = model.predict_generator(seq_test)



In [29]:
tags = (df.loc[ids_test].malignancy.values > 0.5).astype(np.int)

In [30]:
print(classification_report(tags, (pred.argmax(-1) > 0.5).astype(np.int).flatten()))

             precision    recall  f1-score   support

          0       0.55      0.56      0.56       227
          1       0.68      0.67      0.67       311

avg / total       0.62      0.62      0.62       538



In [38]:
print(accuracy_score(tags, (pred.argmax(-1) > 0.5).astype(np.int).flatten()))

0.7063197026022305
