In [82]:
import pandas as pd
import numpy as np
import pydicom 
import os
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from keras.models import Model, Sequential
from keras.layers import Concatenate, Dense, Input, concatenate, BatchNormalization
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, AveragePooling2D, Flatten

from sklearn.preprocessing import MinMaxScaler

In [2]:
def load_best_slices(path):
    ind = []
    for file in os.listdir(path):
        if file.split('.')[1] == 'txt':
            ind.append(file.split('.')[0])
        
    df = pd.DataFrame(index = ind, columns= ['CT'])
    for ind in df.index:
        df.loc[ind].CT = np.loadtxt(path + ind + '.txt')
    
    return df

df = load_best_slices('best_lung_slice/')

In [3]:
def custom_loss_function(original_array):
    original_array = original_array.astype('float32')

    def loss_function(y_true, y_pred):
        
        pred = un_scaler(y_pred, original_array)
        true = un_scaler(y_true, original_array)
        diff = abs(pred-true)
        
        return tf.reduce_mean(diff**4, axis=-1)**.25
    
    return loss_function

def custom_metric_function(original_array):
    original_array = original_array.astype('float32')

    def metric_function(y_true, y_pred):
        
        pred = un_scaler(y_pred, original_array)
        true = un_scaler(y_true, original_array)
        diff = abs(pred-true)
        diff = tf.where(diff > 7.5, 7.5, diff)
        diff = tf.where(diff < 0.5, 0.5, diff)
        
        return tf.reduce_mean(diff, axis=-1)
    
    return metric_function

In [4]:
df = df.reset_index()
df = df.rename(columns = {'index' : 'Patient'})

In [5]:
features = pd.read_csv('features.csv')
df = df.merge(features, on= 'Patient')

In [6]:
linear_data_all = pd.read_csv('patient_slope_intercept.csv', index_col=0)
result = pd.DataFrame(index = df.Patient, columns = ['slope'])
    
for ind in result.index:
    result.loc[ind].slope = linear_data_all.loc[ind].slope

df = df.merge(result, on='Patient')
df.iloc[:,2:] = df.iloc[:,2:].astype('float32')

In [7]:
#df.info()

In [8]:
dataset = df.values[:,1:]

In [9]:
scaler_features = MinMaxScaler()
dataset[:,1:-1] = scaler_features.fit_transform(dataset[:,1:-1])

In [10]:
def my_scaler(array):
    max_ = array.max()
    min_ = array.min()
    return 0.5*(array-min_)/(max_-min_) + 0.25

def un_scaler(array, original_array):
    max_ = original_array.max()
    min_ = original_array.min()
    return (max_-min_)*2*(array - 0.25) + min_

#y_scaled = my_scaler(result)
# y = un_scaler(y_scaled, result)

In [11]:
scaled_dataset = np.copy(dataset)
scaled_dataset[:,-1] = my_scaler(scaled_dataset[:,-1])

#un_scaler(****, dataset[:,-1])

In [12]:
#just to set the values of the lung matrices between 0 and 1
scaled_dataset[:,0] = scaled_dataset[:,0]/10

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(scaled_dataset[:,:-1], scaled_dataset[:,-1], test_size=0.2)

In [14]:
lungs_train = []
for i in range(X_train[:,0].shape[0]):
    lungs_train.append(X_train[:,0][0].astype(np.float32))

lungs_train = np.array(lungs_train)    
lungs_train = lungs_train.reshape(X_train[:,0].shape[0], 512, 512, 1)

lungs_test = []
for i in range(X_test[:,0].shape[0]):
    lungs_test.append(X_test[:,0][0].astype(np.float32))

lungs_test = np.array(lungs_test)    
lungs_test = lungs_test.reshape(X_test[:,0].shape[0], 512, 512, 1)

In [15]:
features_train = []
for i in range(X_train[:,1:].shape[0]):
    features_train.append(X_train[i,1:].astype(np.float32))
    
features_test = []
for i in range(X_test[:,1:].shape[0]):
    features_test.append(X_test[i,1:].astype(np.float32))

In [16]:
features_train = np.array(np.array(features_train))
features_train = features_train.reshape(features_train.shape[0], 8)

features_test = np.array(np.array(features_test))
features_test = features_test.reshape(features_test.shape[0], 8)

In [None]:
inp_conv = Input(shape = (512,512,1), name='lungs')

model_conv = Conv2D(16, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(inp_conv)
model_conv = Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = BatchNormalization()(model_conv)
model_conv = MaxPool2D(pool_size=(3,3))(model_conv)
model_conv = Dropout(0.25)(model_conv)
model_conv = Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = Conv2D(16, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = BatchNormalization()(model_conv)
model_conv = MaxPool2D(pool_size=(5,5))(model_conv)
model_conv = Dropout(0.25)(model_conv)
model_conv = Flatten()(model_conv)
model_conv = Dense(16, activation='relu')(model_conv)
outp_conv = Dense(8, activation='sigmoid')(model_conv)

inp_feat = Input(shape = (8,), name='features')
model_feat = Dense(16, activation='relu')(inp_feat)
outp_feat = Dense(8, activation='sigmoid')(model_feat)

model_conc = concatenate([outp_conv, outp_feat])
model_conc = Dense(32, activation='relu')(model_conc)
model_conc = Dense(16, activation='relu')(model_conc)
model_conc = Dense(8, activation='relu')(model_conc)
output = Dense(1, activation='linear')(model_conc)

model = Model(inputs=[inp_conv, inp_feat], outputs=output, name="cnn_nn_model")

In [95]:
model.summary()

Model: "cnn_nn_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
lungs (InputLayer)              (None, 512, 512, 1)  0                                            
__________________________________________________________________________________________________
conv2d_59 (Conv2D)              (None, 510, 510, 16) 160         lungs[0][0]                      
__________________________________________________________________________________________________
conv2d_60 (Conv2D)              (None, 508, 508, 32) 4640        conv2d_59[0][0]                  
__________________________________________________________________________________________________
max_pooling2d_28 (MaxPooling2D) (None, 169, 169, 32) 0           conv2d_60[0][0]                  
_______________________________________________________________________________________

In [96]:
model.compile(loss=custom_loss_function(dataset[:,-1]), metrics=[custom_metric_function(dataset[:,-1])], optimizer='adam')

In [97]:
model.fit(
    {"lungs": lungs_train, "features": features_train},
    y_train,
    epochs=20, #experimenta mudar este número
    batch_size=14,
    validation_data = ({"lungs": lungs_test, "features": features_test}, y_test)
)

Train on 140 samples, validate on 36 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x7fa8de748cd0>

In [104]:
un_scaler(y_test.reshape(-1,1), dataset[:,-1])

array([[-8.515748977661133],
       [-12.683186531066891],
       [-14.362868309021],
       [-3.6220192909240723],
       [-2.0348134040832484],
       [1.3588894605636561],
       [2.557149648666389],
       [-4.263502120971683],
       [-4.49784183502198],
       [-1.564164280891422],
       [-5.87359523773193],
       [0.7165697813034058],
       [-6.531894207000736],
       [-0.8007971048355067],
       [-1.179050207138058],
       [-2.280987024307251],
       [-8.287634849548343],
       [7.331258296966546],
       [-9.379955291748047],
       [-0.037748798727985644],
       [-13.420945167541504],
       [-2.9042332172393763],
       [-5.494368553161628],
       [-2.2209293842315674],
       [-9.381847381591797],
       [1.9887955188751292],
       [0.6811119318008423],
       [-10.831542015075684],
       [-1.1424745321273804],
       [-9.756552696228027],
       [-4.0704088211059535],
       [-3.873737335205071],
       [-5.947910785675056],
       [-2.3561151027679443],
      

In [105]:
un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1])

array([[-2.8373394 ],
       [-1.6953373 ],
       [-2.769106  ],
       [-2.961214  ],
       [-2.7317123 ],
       [-2.5205536 ],
       [-2.6501007 ],
       [-2.7623196 ],
       [-1.7681274 ],
       [-2.85433   ],
       [-2.8107014 ],
       [-2.6672134 ],
       [-2.55266   ],
       [-2.6114388 ],
       [-2.3147774 ],
       [-2.610054  ],
       [-2.7095146 ],
       [-0.40749168],
       [-2.729311  ],
       [-2.8739872 ],
       [-2.1503086 ],
       [-2.0188713 ],
       [-2.7315388 ],
       [-2.066389  ],
       [-2.753189  ],
       [-0.6066551 ],
       [-2.2042904 ],
       [-2.8530369 ],
       [-2.957264  ],
       [-2.761135  ],
       [-2.5255356 ],
       [-1.9230652 ],
       [-1.3874893 ],
       [-2.5870247 ],
       [-1.8782673 ],
       [-2.0428581 ]], dtype=float32)

In [106]:
#diz-me que obtemos um número pequenino aqui para eu ficar feliz :D 
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).max())
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).min())
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).mean())

11.593762397766117
0.1545403003692627
3.749740740077363


In [65]:
df[['Patient','slope']]

Unnamed: 0,Patient,slope
0,ID00329637202285906759848,-1.040238
1,ID00225637202259339837603,-7.460479
2,ID00364637202296074419422,-15.030663
3,ID00130637202220059448013,-8.003377
4,ID00115637202211874187958,-6.403378
...,...,...
171,ID00214637202257820847190,-1.564164
172,ID00032637202181710233084,-14.362868
173,ID00089637202204675567570,-11.805201
174,ID00132637202222178761324,-0.658933


In [98]:
lungs_total = []
for i in range(scaled_dataset[:,0].shape[0]):
    lungs_total.append(scaled_dataset[:,0][0].astype(np.float32))

lungs_total = np.array(lungs_total)    
lungs_total = lungs_total.reshape(scaled_dataset[:,0].shape[0], 512, 512, 1)

In [101]:
new_slope = un_scaler(model.predict([lungs_total, scaled_dataset[:,1:-1]]), dataset[:,-1])

In [102]:
df

Unnamed: 0,Patient,CT,Percent,Age,FirstWeek,FirstFVC,Height,Male,Ex-smoker,Never smoked,slope
0,ID00329637202285906759848,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",66.249413,69.0,39.0,2805.0,140.940613,1.0,1.0,0.0,-1.040238
1,ID00225637202259339837603,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",80.172195,77.0,13.0,1583.0,113.047203,0.0,0.0,1.0,-7.460479
2,ID00364637202296074419422,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",91.171425,64.0,37.0,3191.0,155.947617,1.0,1.0,0.0,-15.030663
3,ID00130637202220059448013,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",69.852028,65.0,11.0,1690.0,111.074600,0.0,0.0,1.0,-8.003377
4,ID00115637202211874187958,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",77.749298,77.0,15.0,2548.0,134.062927,1.0,1.0,0.0,-6.403378
...,...,...,...,...,...,...,...,...,...,...,...
171,ID00214637202257820847190,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",83.702881,69.0,3.0,2869.0,144.156372,1.0,1.0,0.0,-1.564164
172,ID00032637202181710233084,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",119.629135,63.0,30.0,5045.0,245.212402,1.0,1.0,0.0,-14.362868
173,ID00089637202204675567570,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",60.143166,63.0,9.0,2571.0,124.963547,1.0,0.0,1.0,-11.805201
174,ID00132637202222178761324,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",60.164040,69.0,6.0,2582.0,129.735703,1.0,1.0,0.0,-0.658933


In [103]:
df['pred_slope'] = new_slope

In [109]:
df[['Patient', 'slope','pred_slope']].head(20)

Unnamed: 0,Patient,slope,pred_slope
0,ID00329637202285906759848,-1.040238,-2.842218
1,ID00225637202259339837603,-7.460479,-2.12105
2,ID00364637202296074419422,-15.030663,-2.989416
3,ID00130637202220059448013,-8.003377,-2.246799
4,ID00115637202211874187958,-6.403378,-2.64587
5,ID00052637202186188008618,-23.2875,-2.237402
6,ID00026637202179561894768,-2.967859,-2.705202
7,ID00183637202241995351650,0.405563,-2.806787
8,ID00229637202260254240583,-3.410314,-2.826359
9,ID00367637202296290303449,-8.287635,-2.685555


In [111]:
print(df.slope.mean())
print(df.pred_slope.mean())

-4.489635944366455
-2.371462106704712
