In [1]:
import pandas as pd
import numpy as np
import pydicom 
import os
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from keras.models import Model, Sequential
from keras.layers import Concatenate, Dense, Input, concatenate
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, AveragePooling2D, Flatten

from sklearn.preprocessing import MinMaxScaler

Using TensorFlow backend.


In [2]:
def load_best_slices(path):
    ind = []
    for file in os.listdir(path):
        if file.split('.')[1] == 'txt':
            ind.append(file.split('.')[0])
        
    df = pd.DataFrame(index = ind, columns= ['CT'])
    for ind in df.index:
        df.loc[ind].CT = np.loadtxt(path + ind + '.txt')
    
    return df

df = load_best_slices('best_lung_slice/')

In [50]:
def custom_loss_function(original_array):
    original_array = original_array.astype('float32')

    def loss_function(y_true, y_pred):
        
        pred = un_scaler(y_pred, original_array)
        true = un_scaler(y_true, original_array)
        diff = abs(pred-true)
        
        return tf.reduce_mean(diff**4, axis=-1)**.25
    
    return loss_function

def custom_metric_function(original_array):
    original_array = original_array.astype('float32')

    def metric_function(y_true, y_pred):
        
        pred = un_scaler(y_pred, original_array)
        true = un_scaler(y_true, original_array)
        diff = abs(pred-true)
        diff = tf.where(diff > 7.5, 7.5, diff)
        diff = tf.where(diff < 0.5, 0.5, diff)
        
        return tf.reduce_mean(diff, axis=-1)
    
    return metric_function

In [3]:
df = df.reset_index()
df = df.rename(columns = {'index' : 'Patient'})

In [4]:
features = pd.read_csv('features.csv')
df = df.merge(features, on= 'Patient')

In [5]:
linear_data_all = pd.read_csv('patient_slope_intercept.csv', index_col=0)
result = pd.DataFrame(index = df.Patient, columns = ['slope'])
    
for ind in result.index:
    result.loc[ind].slope = linear_data_all.loc[ind].slope

df = df.merge(result, on='Patient')
df.iloc[:,2:] = df.iloc[:,2:].astype('float32')

In [6]:
#df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 176 entries, 0 to 175
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Patient       176 non-null    object 
 1   CT            176 non-null    object 
 2   Percent       176 non-null    float32
 3   Age           176 non-null    float32
 4   FirstWeek     176 non-null    float32
 5   FirstFVC      176 non-null    float32
 6   Height        176 non-null    float32
 7   Male          176 non-null    float32
 8   Ex-smoker     176 non-null    float32
 9   Never smoked  176 non-null    float32
 10  slope         176 non-null    float32
dtypes: float32(9), object(2)
memory usage: 10.3+ KB


In [7]:
dataset = df.values[:,1:]

In [9]:
scaler_features = MinMaxScaler()
dataset[:,1:-1] = scaler_features.fit_transform(dataset[:,1:-1])

In [10]:
def my_scaler(array):
    max_ = array.max()
    min_ = array.min()
    y = (array-min_)/(max_-min_)
    return 0.5*y + 0.25

def un_scaler(array, original_array):
    max_ = original_array.max()
    min_ = original_array.min()
    y = 2*(array - 0.25)
    return (max_-min_)*y + min_

#y_scaled = my_scaler(result)
# y = un_scaler(y_scaled, result)

In [11]:
scaled_dataset = np.copy(dataset)
scaled_dataset[:,-1] = my_scaler(scaled_dataset[:,-1])

#un_scaler(****, dataset[:,-1])

In [12]:
#just to set the values of the lung matrices between 0 and 1
scaled_dataset[:,0] = scaled_dataset[:,0]/10

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(scaled_dataset[:,:-1], scaled_dataset[:,-1], test_size=0.2)

In [14]:
lungs_train = []
for i in range(X_train[:,0].shape[0]):
    lungs_train.append(X_train[:,0][0].astype(np.float32))

lungs_train = np.array(lungs_train)    
lungs_train = lungs_train.reshape(X_train[:,0].shape[0], 512, 512, 1)

lungs_test = []
for i in range(X_test[:,0].shape[0]):
    lungs_test.append(X_test[:,0][0].astype(np.float32))

lungs_test = np.array(lungs_test)    
lungs_test = lungs_test.reshape(X_test[:,0].shape[0], 512, 512, 1)

In [15]:
features_train = []
for i in range(X_train[:,1:].shape[0]):
    features_train.append(X_train[i,1:].astype(np.float32))
    
features_test = []
for i in range(X_test[:,1:].shape[0]):
    features_test.append(X_test[i,1:].astype(np.float32))

In [16]:
features_train = np.array(np.array(features_train))
features_train = features_train.reshape(features_train.shape[0], 8)

features_test = np.array(np.array(features_test))
features_test = features_test.reshape(features_test.shape[0], 8)

In [37]:
inp_conv = Input(shape = (512,512,1), name='lungs')

model_conv = Conv2D(16, kernel_size=(2,2), strides=(1,1), padding='valid', activation='relu')(inp_conv)
model_conv = Conv2D(16, kernel_size=(2,2), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = AveragePooling2D(pool_size=(3,3))(model_conv)
model_conv = Dropout(0.25)(model_conv)
model_conv = Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
#model_conv = MaxPool2D(pool_size=(2,2))(model_conv)
model_conv = Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
#model_conv = MaxPool2D(pool_size=(2,2))(model_conv)
model_conv = AveragePooling2D(pool_size=(3,3))(model_conv)
model_conv = Dropout(0.25)(model_conv)
model_conv = Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = AveragePooling2D(pool_size=(3,3))(model_conv)
model_conv = MaxPool2D(pool_size=(2,2))(model_conv)
model_conv = Dropout(0.25)(model_conv)
model_conv = Flatten()(model_conv)
model_conv = Dense(16, activation='relu')(model_conv)
outp_conv = Dense(8, activation='sigmoid')(model_conv)

inp_feat = Input(shape = (8,), name='features')
model_feat = Dense(16, activation='relu')(inp_feat)
outp_feat = Dense(8, activation='relu')(model_feat)

model_conc = concatenate([outp_conv, outp_feat])
model_conc = Dense(32, activation='relu')(model_conc)
model_conc = Dense(16, activation='relu')(model_conc)
model_conc = Dense(8, activation='relu')(model_conc)
output = Dense(1, activation='sigmoid')(model_conc)

model = Model(inputs=[inp_conv, inp_feat], outputs=output, name="cnn_nn_model")

In [38]:
model.summary()

Model: "cnn_nn_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
lungs (InputLayer)              (None, 512, 512, 1)  0                                            
__________________________________________________________________________________________________
conv2d_13 (Conv2D)              (None, 511, 511, 16) 80          lungs[0][0]                      
__________________________________________________________________________________________________
conv2d_14 (Conv2D)              (None, 510, 510, 16) 1040        conv2d_13[0][0]                  
__________________________________________________________________________________________________
average_pooling2d_7 (AveragePoo (None, 170, 170, 16) 0           conv2d_14[0][0]                  
_______________________________________________________________________________________

In [51]:
model.compile(loss=custom_loss_function(dataset[:,-1]), metrics=[custom_metric_function(dataset[:,-1])], optimizer='adam')

In [52]:
model.fit(
    {"lungs": lungs_train, "features": features_train},
    y_train,
    epochs=2, #experimenta mudar este número
    batch_size=28,
    validation_data = ({"lungs": lungs_test, "features": features_test}, y_test)
)

Train on 140 samples, validate on 36 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x7fa96822e4d0>

In [32]:
un_scaler(y_test.reshape(-1,1), dataset[:,-1])

array([[3.970163106918335],
       [0.5981338620185888],
       [-4.808091163635254],
       [-4.51304912567139],
       [0.6563821434974706],
       [-3.410314321517948],
       [-1.0813171863555908],
       [-8.287634849548343],
       [-2.3353023529052734],
       [-0.8837943077087331],
       [-3.444329261779785],
       [-5.494368553161628],
       [-6.741822719573978],
       [-1.0402375459671056],
       [-7.182352542877194],
       [-9.379955291748047],
       [1.6565740108489955],
       [-3.443578004837036],
       [-18.381778717041016],
       [14.682611465454102],
       [-8.515748977661133],
       [-5.733318328857422],
       [-4.208603382110596],
       [-9.673158645629883],
       [-1.1616004705429077],
       [-8.91279411315918],
       [-20.45368194580078],
       [-12.683186531066891],
       [-2.3159203529357875],
       [-8.189459800720211],
       [-2.3561151027679443],
       [1.8965998888015747],
       [11.225947380065918],
       [-8.139245033264157],
       [

In [47]:
un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1])

array([[-3.3177586 ],
       [-4.535124  ],
       [-4.3442783 ],
       [-3.937502  ],
       [-4.491392  ],
       [-4.2014294 ],
       [-4.212549  ],
       [-4.5679245 ],
       [-4.3799763 ],
       [-5.142086  ],
       [-4.0574055 ],
       [-3.5885086 ],
       [-4.232712  ],
       [-2.9177704 ],
       [-4.319725  ],
       [-4.8080845 ],
       [-4.1323986 ],
       [-4.771208  ],
       [-4.0353928 ],
       [ 0.33620834],
       [-4.547024  ],
       [-4.320818  ],
       [-3.2156162 ],
       [-4.203663  ],
       [-4.5287914 ],
       [-4.7370777 ],
       [-4.5242653 ],
       [-5.0828457 ],
       [-2.9377995 ],
       [-2.7172756 ],
       [-2.5633144 ],
       [-2.98872   ],
       [-3.9713345 ],
       [-4.1734676 ],
       [-5.091236  ],
       [-4.775284  ]], dtype=float32)

In [46]:
#diz-me que obtemos um número pequenino aqui para eu ficar feliz :D 
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).max())
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).min())
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).mean())

15.92941665649414
0.2071993350982666
4.635238316324023


In [None]:
un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])

In [None]:
print(abs(model.predict([lungs_test, features_test])  -y_test.reshape(-1,1)).max())
print(abs(model.predict([lungs_test, features_test])  -y_test.reshape(-1,1)).min())
print(abs(model.predict([lungs_test, features_test])  -y_test.reshape(-1,1)).mean())

In [None]:
#last version

In [None]:
tf.compat.v1.enable_eager_execution()

In [None]:
tf.executing_eagerly() 

In [None]:
tf.square(dataset[:,-1].astype('float32'))

In [None]:
un_scaler(1.5,dataset[:,-1])