In [1]:
import pandas as pd
import numpy as np
import pydicom 
import os
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from keras.models import Model, Sequential
from keras.layers import Concatenate, Dense, Input, concatenate, BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Conv3D, MaxPool3D, GlobalMaxPooling3D

from sklearn.preprocessing import MinMaxScaler

Using TensorFlow backend.


In [2]:
def color_change(arr):
    arr = arr.copy()
    factor = 3*np.random.uniform()
    return arr**factor  

def translation(arr):
    direction = [round(3*np.random.uniform()),round(50*np.random.uniform()),round(128*np.random.uniform())]
    result = np.zeros(arr.shape)
    for i in range(arr.shape[0]-direction[0]):
        for j in range(arr.shape[1]-direction[1]):
            for k in range(arr.shape[2]-direction[2]):
                result[i+direction[0],j+direction[1],k+direction[2]] = arr[i,j,k]
    return result

def symmetry(arr):
    result = np.zeros(arr.shape)
    for i in range(arr.shape[0]):
        result[i] = arr[i].T
    return result

def rand_crop(arr):
    x, y = round(64*np.random.uniform()), round(64*np.random.uniform())
    arr = arr.copy()
    result = arr[:, x: x+128 , y: y+128]
    tmp_ = []
    for i in range(result.shape[0]):
        tmp_.append(np.resize(result[i], (arr.shape[1], arr.shape[2])))
    return tmp_

def gaussian_noise(arr):
    noise = np.random.normal(size = arr.shape[0]*arr.shape[1]*arr.shape[2])
    return 0.05*noise.reshape(arr.shape) + arr

In [3]:
def load_best_slices(path):
    ind = []
    for file in os.listdir(path):
        if file.split('.')[1] == 'npy':
            ind.append(file.split('.')[0])
        
    df = pd.DataFrame(index = ind, columns= ['CT'])
    for ind in df.index:
        df.loc[ind].CT = np.load(path + ind + '.npy')
    
    return df

df = load_best_slices('lung_chunks_256/')

In [4]:
df = df.reset_index()
df = df.rename(columns = {'index' : 'Patient'})

In [5]:
features = pd.read_csv('features.csv')
df = df.merge(features, on= 'Patient')

In [6]:
linear_data_all = pd.read_csv('patient_slope_intercept.csv', index_col=0)
result = pd.DataFrame(index = df.Patient, columns = ['slope'])
    
for ind in result.index:
    result.loc[ind].slope = linear_data_all.loc[ind].slope

df = df.merge(result, on='Patient')
df.iloc[:,2:] = df.iloc[:,2:].astype(np.float32)

In [7]:
dataset = df.values[:,1:]

In [8]:
scaler_features = MinMaxScaler()
dataset[:,1:] = scaler_features.fit_transform(dataset[:,1:])

scale = MinMaxScaler()
scale.min_, scale.scale_ = scaler_features.min_[-1], scaler_features.scale_[-1]

In [9]:
def custom_loss_function():

    def loss_function(y_true, y_pred):
        y_true = (y_true - scale.min_)/scale.scale_
        y_pred = (y_pred - scale.min_)/scale.scale_
        diff = tf.abs(y_pred-y_true)
        
        return tf.reduce_mean(diff**2, axis=-1)**.5
    
    return loss_function

def custom_metric_function():

    def metric_function(y_true, y_pred):
        y_true = (y_true - scale.min_)/scale.scale_
        y_pred = (y_pred - scale.min_)/scale.scale_
        diff = abs(y_pred-y_true)
        diff = tf.where(diff > 7.5, 7.5, diff)
        diff = tf.where(diff < 0.5, 0.5, diff)
        
        return tf.reduce_mean(diff, axis=-1)
    
    return metric_function

In [10]:
dataset_color_change = dataset[:,:].copy()
# dataset_translation = dataset[:,:].copy()
# dataset_symmetry = dataset[:,:].copy()
# dataset_crop = dataset[:,:].copy()
dataset_gaussian_noise = dataset[:,:].copy()

In [11]:
for i in range(dataset.shape[0]):
    dataset_color_change[i,0] = color_change(dataset_color_change[i,0])
    
# for i in range(dataset.shape[0]):
#     dataset_symmetry[i,0] = symmetry(dataset_symmetry[i,0])

# for i in range(dataset.shape[0]):
#     dataset_translation[i,0] = translation(dataset_translation[i,0])

# for i in range(dataset.shape[0]):
#     dataset_crop[i,0] = rand_crop(dataset_crop[i,0])

for i in range(dataset.shape[0]):
    dataset_gaussian_noise[i,0] = gaussian_noise(dataset_gaussian_noise[i,0])

In [12]:
#dataset = np.concatenate((dataset, dataset_color_change, dataset_symmetry, dataset_translation, dataset_crop, dataset_gaussian_noise), axis = 0)

dataset = np.concatenate((dataset, dataset_color_change, dataset_gaussian_noise), axis = 0)

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(dataset[:,:-1], dataset[:,-1], test_size=0.2)

In [14]:
lungs_train = []
for i in range(X_train[:,0].shape[0]):
    lungs_train.append(X_train[i,0]) #.astype(np.float32))

lungs_train = np.array(lungs_train)    
lungs_train = lungs_train.reshape(X_train[:,0].shape[0], 7, 256, 256, 1)

lungs_test = []
for i in range(X_test[:,0].shape[0]):
    lungs_test.append(X_test[i,0]) #.astype(np.float32))

lungs_test = np.array(lungs_test)    
lungs_test = lungs_test.reshape(X_test[:,0].shape[0], 7, 256, 256, 1)

In [15]:
features_train = []
for i in range(X_train[:,1:].shape[0]):
    features_train.append(X_train[i,1:].astype(np.float32))
    
features_test = []
for i in range(X_test[:,1:].shape[0]):
    features_test.append(X_test[i,1:].astype(np.float32))

In [16]:
features_train = np.array(np.array(features_train))
features_train = features_train.reshape(features_train.shape[0], 8)

features_test = np.array(np.array(features_test))
features_test = features_test.reshape(features_test.shape[0], 8)

In [17]:
def create_model():


    inp_conv = Input(shape = (7,256,256,1), name='lungs')

    model_conv = Conv3D(16, kernel_size=(1,3,3), strides=(1,1,1), padding='valid', activation='relu')(inp_conv)
    model_conv = Conv3D(32, kernel_size=(1,3,3), strides=(1,1,1), padding='valid', activation='sigmoid')(model_conv)

    model_conv = GlobalMaxPooling3D()(model_conv)
    #model_conv = Flatten()(model_conv)
    model_conv = Dropout(0.25)(model_conv)

    model_conv =  Dense(16, activation='relu')(model_conv)
    outp_conv = Dense(8, activation='sigmoid')(model_conv)

    inp_feat = Input(shape = (8,), name='features')
    model_feat = Dense(32, activation='relu')(inp_feat)
    outp_feat = Dense(8, activation='sigmoid')(model_feat)

    model_conc = concatenate([outp_conv, outp_feat])
    model_conc = Dense(32, activation='relu')(model_conc)
    output = Dense(1, activation='linear')(model_conc)

    model = Model(inputs=[inp_conv, inp_feat], outputs=output, name="cnn_nn_model")
    
    model.compile(loss=custom_loss_function(), metrics=[custom_metric_function()], optimizer='adam')
    
    return model

In [18]:
model = create_model()

In [19]:
model.summary()

Model: "cnn_nn_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
lungs (InputLayer)              (None, 7, 256, 256,  0                                            
__________________________________________________________________________________________________
conv3d_1 (Conv3D)               (None, 7, 254, 254,  160         lungs[0][0]                      
__________________________________________________________________________________________________
conv3d_2 (Conv3D)               (None, 7, 252, 252,  4640        conv3d_1[0][0]                   
__________________________________________________________________________________________________
global_max_pooling3d_1 (GlobalM (None, 32)           0           conv3d_2[0][0]                   
_______________________________________________________________________________________

In [20]:
model.fit(
    {"lungs": lungs_train, "features": features_train},
    y_train,
    epochs=2, #experimenta mudar este número
    batch_size=14,
    validation_data = ({"lungs": lungs_test, "features": features_test}, y_test),
    mul
)

Train on 422 samples, validate on 106 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x7fefd8465fd0>

In [21]:
y_pred = scale.inverse_transform(np.array(model.predict([lungs_test,features_test])).reshape(-1,1))
y_true = scale.inverse_transform(np.array(y_test).reshape(-1,1)) 

In [22]:
for i in zip(y_true.tolist(),y_pred.tolist()):
    print(i)

([-5.530886650085451], [-3.4708447456359863])
([-3.3800275325775173], [-6.185254096984863])
([-1.0402375459671034], [-3.424771308898926])
([0.597787022590637], [-3.4471042156219482])
([-2.2209293842315683], [-6.468137264251709])
([0.3500638306140918], [-5.113730430603027])
([14.6826114654541], [-5.150026321411133])
([-3.94448208808899], [-3.9274041652679443])
([0.7165697813034049], [-3.8931472301483154])
([-6.038142204284668], [-3.0957353115081787])
([4.677616596221924], [-4.13248872756958])
([0.2653380334377305], [-5.6801838874816895])
([-16.46481704711914], [-5.895273685455322])
([-13.420945167541504], [-6.027400970458984])
([-3.873737335205078], [-5.738102436065674])
([4.515500545501709], [-2.9733831882476807])
([-0.3305682241916653], [-3.1789376735687256])
([-8.28763484954834], [-5.0095133781433105])
([-1.1424745321273815], [-3.777806043624878])
([-8.543078422546387], [-6.344699382781982])
([-1.5106828212738046], [-5.6081156730651855])
([-12.683186531066895], [-6.814350128173828])


In [23]:
print(abs(y_true - y_pred).min())
print(abs(y_true - y_pred).max())
print(abs(y_true - y_pred).mean())
print(abs(y_pred - y_true))

0.00145626068115412
25.094757795333862
4.55395464975176
[[2.06004190e+00]
 [2.80522656e+00]
 [2.38453376e+00]
 [4.04489124e+00]
 [4.24720788e+00]
 [5.46379426e+00]
 [1.98326378e+01]
 [1.70779228e-02]
 [4.60971701e+00]
 [2.94240689e+00]
 [8.81010532e+00]
 [5.94552192e+00]
 [1.05695434e+01]
 [7.39354420e+00]
 [1.86436510e+00]
 [7.48888373e+00]
 [2.84836945e+00]
 [3.27812147e+00]
 [2.63533151e+00]
 [2.19837904e+00]
 [4.09743285e+00]
 [5.86883640e+00]
 [7.07507253e+00]
 [4.18493104e+00]
 [1.16304350e+00]
 [2.26229143e+00]
 [8.40014362e+00]
 [5.02349377e+00]
 [4.39486003e+00]
 [3.12188339e+00]
 [2.07318640e+00]
 [2.21338272e-02]
 [5.49391079e+00]
 [2.65748852e+00]
 [3.18299007e+00]
 [2.89582968e-01]
 [2.67921031e+00]
 [3.39758854e+00]
 [3.87479424e+00]
 [2.44180679e+00]
 [3.00048351e-01]
 [1.61558867e+00]
 [9.73503113e-01]
 [7.73896646e+00]
 [5.83810258e+00]
 [7.58676529e-02]
 [4.98496747e+00]
 [1.17460990e+00]
 [3.40282965e+00]
 [4.06512141e+00]
 [3.42112913e+00]
 [4.14880872e+00]
 [5.5703

In [24]:
#model.save_weights('model_cnn_3d_minimal_v2')

In [25]:
#model_test.load_weights('model_cnn_3d_minimal_v1000')

In [28]:
#model.layers[1].get_weights()[1]

In [29]:
# model.layers[1].set_weights(model.layers[1].get_weights())