In [1]:
import pandas as pd
import numpy as np
import pydicom 
import os
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from keras.models import Model, Sequential
from keras.layers import Concatenate, Dense, Input, concatenate, BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Conv3D, MaxPool3D, AveragePooling3D

from sklearn.preprocessing import MinMaxScaler

Using TensorFlow backend.


In [2]:
def load_best_slices(path):
    ind = []
    for file in os.listdir(path):
        if file.split('.')[1] == 'npy':
            ind.append(file.split('.')[0])
        
    df = pd.DataFrame(index = ind, columns= ['CT'])
    for ind in df.index:
        df.loc[ind].CT = np.load(path + ind + '.npy')
    
    return df

df = load_best_slices('lung_chunks/')

In [3]:
def custom_loss_function():

    def loss_function(y_true, y_pred):
        y_true = tf.convert_to_tensor(y_true)
        diff = tf.abs(y_pred-y_true)
        
        return tf.reduce_mean(diff**4, axis=-1)**.25
    
    return loss_function

def custom_metric_function():

    def metric_function(y_true, y_pred):
        
        diff = abs(y_pred-y_true)
        diff = tf.where(diff > 7.5, 7.5, diff)
        diff = tf.where(diff < 0.5, 0.5, diff)
        
        return tf.reduce_mean(diff, axis=-1)
    
    return metric_function

In [4]:
df = df.reset_index()
df = df.rename(columns = {'index' : 'Patient'})

In [5]:
features = pd.read_csv('features.csv')
df = df.merge(features, on= 'Patient')

In [6]:
linear_data_all = pd.read_csv('patient_slope_intercept.csv', index_col=0)
result = pd.DataFrame(index = df.Patient, columns = ['slope'])
    
for ind in result.index:
    result.loc[ind].slope = linear_data_all.loc[ind].slope

df = df.merge(result, on='Patient')
df.iloc[:,2:] = df.iloc[:,2:].astype(np.float32)

In [7]:
df.head()

Unnamed: 0,Patient,CT,Percent,Age,FirstWeek,FirstFVC,Height,Male,Ex-smoker,Never smoked,slope
0,ID00283637202278714365037,"[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",54.522751,60.0,0.0,1965.0,93.974174,1.0,0.0,1.0,0.681112
1,ID00370637202296737666151,"[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",63.806259,75.0,12.0,2308.0,120.020798,1.0,1.0,0.0,-0.330568
2,ID00089637202204675567570,"[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",60.143166,63.0,9.0,2571.0,124.963547,1.0,0.0,1.0,-11.805201
3,ID00291637202279398396106,"[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",100.252426,72.0,2.0,4051.0,207.042831,1.0,1.0,0.0,-9.081413
4,ID00086637202203494931510,"[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",117.628563,65.0,-5.0,3367.0,221.294769,0.0,0.0,1.0,-0.191625


In [8]:
dataset = df.values[:,1:]

In [9]:
scaler_features = MinMaxScaler()
dataset[:,1:-1] = scaler_features.fit_transform(dataset[:,1:-1])

In [11]:
# def my_scaler(array):
#     max_ = array.max()
#     min_ = array.min()
#     return 0.5*(array-min_)/(max_-min_) + 0.25

# def un_scaler(array, original_array):
#     max_ = original_array.max()
#     min_ = original_array.min()
#     return (max_-min_)*2*(array - 0.25) + min_

#y_scaled = my_scaler(result)
# y = un_scaler(y_scaled, result)

In [12]:
#scaled_dataset = np.copy(dataset)
#scaled_dataset[:,-1] = my_scaler(scaled_dataset[:,-1])

#un_scaler(****, dataset[:,-1])

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(dataset[:,:-1], dataset[:,-1], test_size=0.2)

In [None]:
lungs_train = []
for i in range(X_train[:,0].shape[0]):
    lungs_train.append(X_train[:,0][0])
lungs_train = tf.convert_to_tensor(lungs_train)
lungs_train = lungs_train.reshape(X_train[:,0].shape[0], 7, 512, 512, 1)

In [26]:
type(lungs_train[0,0,0,0,0])

numpy.float32

In [15]:
lungs_train = []
for i in range(X_train[:,0].shape[0]):
    lungs_train.append(X_train[:,0][0]) #.astype(np.float32))

lungs_train = np.array(lungs_train)    
lungs_train = lungs_train.reshape(X_train[:,0].shape[0], 7, 512, 512, 1)

lungs_test = []
for i in range(X_test[:,0].shape[0]):
    lungs_test.append(X_test[:,0][0]) #.astype(np.float32))

lungs_test = np.array(lungs_test)    
lungs_test = lungs_test.reshape(X_test[:,0].shape[0], 7, 512, 512, 1)

In [19]:
features_train = []
for i in range(X_train[:,1:].shape[0]):
    features_train.append(X_train[i,1:].astype(np.float32))
    
features_test = []
for i in range(X_test[:,1:].shape[0]):
    features_test.append(X_test[i,1:].astype(np.float32))

In [20]:
features_train = np.array(np.array(features_train))
features_train = features_train.reshape(features_train.shape[0], 8)

features_test = np.array(np.array(features_test))
features_test = features_test.reshape(features_test.shape[0], 8)

In [21]:
inp_conv = Input(shape = (7,512,512,1), name='lungs')

model_conv = Conv3D(16, kernel_size=(1,3,3), strides=(1,1,1), padding='valid', activation='relu')(inp_conv)
model_conv = Conv3D(32, kernel_size=(1,3,3), strides=(1,1,1), padding='valid', activation='relu')(model_conv)
model_conv = BatchNormalization()(model_conv)
model_conv = MaxPool3D(pool_size=(1,3,3))(model_conv)
model_conv = Dropout(0.25)(model_conv)
model_conv = Conv3D(32, kernel_size=(1,3,3), strides=(1,1,1), padding='valid', activation='relu')(model_conv)
model_conv = Conv3D(16, kernel_size=(1,3,3), strides=(1,1,1), padding='valid', activation='relu')(model_conv)
model_conv = BatchNormalization()(model_conv)
model_conv = AveragePooling3D(pool_size=(1,5,5))(model_conv)
model_conv = MaxPool3D(pool_size=(5,2,2))(model_conv)
model_conv = Dropout(0.25)(model_conv)
model_conv = Flatten()(model_conv)
model_conv = Dense(16, activation='relu')(model_conv)
outp_conv = Dense(8, activation='sigmoid')(model_conv)

inp_feat = Input(shape = (8,), name='features')
model_feat = Dense(16, activation='relu')(inp_feat)
outp_feat = Dense(8, activation='sigmoid')(model_feat)

model_conc = concatenate([outp_conv, outp_feat])
model_conc = Dense(32, activation='relu')(model_conc)
model_conc = Dense(16, activation='relu')(model_conc)
model_conc = Dense(8, activation='relu')(model_conc)
output = Dense(1, activation='linear')(model_conc)

model = Model(inputs=[inp_conv, inp_feat], outputs=output, name="cnn_nn_model")

In [22]:
model.summary()

Model: "cnn_nn_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
lungs (InputLayer)              (None, 7, 512, 512,  0                                            
__________________________________________________________________________________________________
conv3d_1 (Conv3D)               (None, 7, 510, 510,  160         lungs[0][0]                      
__________________________________________________________________________________________________
conv3d_2 (Conv3D)               (None, 7, 508, 508,  4640        conv3d_1[0][0]                   
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 7, 508, 508,  128         conv3d_2[0][0]                   
_______________________________________________________________________________________

In [1]:
model.compile(loss=custom_loss_function(), metrics=[custom_metric_function()], optimizer='adam')

NameError: name 'model' is not defined

In [None]:
model.fit(
    {"lungs": lungs_train, "features": features_train},
    y_train,
    epochs=200, #experimenta mudar este número
    batch_size=14,
    validation_data = ({"lungs": lungs_test, "features": features_test}, y_test)
)

Train on 140 samples, validate on 36 samples
Epoch 1/200


In [None]:
model.predict(([lungs_test, features_test]))

In [None]:
#diz-me que obtemos um número pequenino aqui para eu ficar feliz :D 
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).max())
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).min())
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).mean())

In [None]:
df[['Patient','slope']]

In [1]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [None]:
lungs_total = []
for i in range(scaled_dataset[:,0].shape[0]):
    lungs_total.append(scaled_dataset[:,0][0].astype(np.float32))

lungs_total = np.array(lungs_total)    
lungs_total = lungs_total.reshape(scaled_dataset[:,0].shape[0], 512, 512, 1)

In [None]:
new_slope = un_scaler(model.predict([lungs_total, scaled_dataset[:,1:-1]]), dataset[:,-1])

In [None]:
df

In [None]:
df['pred_slope'] = new_slope

In [None]:
df[['Patient', 'slope','pred_slope']].head(20)

In [None]:
print(df.slope.mean())
print(df.pred_slope.mean())