In [1]:
import pandas as pd
import numpy as np
import pydicom 
import os
import matplotlib.pyplot as plt

import keras
from keras.models import Model, Sequential
from keras.layers import Concatenate, Dense, Input, concatenate
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, AveragePooling2D, Flatten

from sklearn.preprocessing import MinMaxScaler

Using TensorFlow backend.


In [2]:
def load_best_slices(path):
    ind = []
    for file in os.listdir(path):
        if file.split('.')[1] == 'txt':
            ind.append(file.split('.')[0])
        
    df = pd.DataFrame(index = ind, columns= ['CT'])
    for ind in df.index:
        df.loc[ind].CT = np.loadtxt(path + ind + '.txt')
    
    return df

df = load_best_slices('best_lung_slice/')

In [3]:
df = df.reset_index()
df = df.rename(columns = {'index' : 'Patient'})

In [4]:
features = pd.read_csv('features.csv')
df = df.merge(features, on= 'Patient')

In [5]:
linear_data_all = pd.read_csv('patient_slope_intercept.csv', index_col=0)
result = pd.DataFrame(index = df.Patient, columns = ['slope'])
    
for ind in result.index:
    result.loc[ind].slope = linear_data_all.loc[ind].slope

df = df.merge(result, on='Patient')

In [6]:
df.head()

Unnamed: 0,Patient,CT,Percent,Age,FirstWeek,FirstFVC,Height,Male,Ex-smoker,Never smoked,slope
0,ID00329637202285906759848,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",66.24941,69,39,2805,140.940609,1,1,0,-1.04024
1,ID00225637202259339837603,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",80.172195,77,13,1583,113.047204,0,0,1,-7.46048
2,ID00364637202296074419422,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",91.171429,64,37,3191,155.94761,1,1,0,-15.0307
3,ID00130637202220059448013,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",69.852029,65,11,1690,111.074597,0,0,1,-8.00338
4,ID00115637202211874187958,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",77.749298,77,15,2548,134.062927,1,1,0,-6.40338


In [7]:
dataset = df.values[:,1:]

In [8]:
scaler_features = MinMaxScaler()
dataset[:,1:-1] = scaler_features.fit_transform(dataset[:,1:-1])

In [9]:
def my_scaler(array):
    max_ = array.max()
    min_ = array.min()
    y = (array-min_)/(max_-min_)
    return 0.5*y + 0.25

def un_scaler(array, original_array):
    max_ = original_array.max()
    min_ = original_array.min()
    y = 2*(array - 0.25)
    return (max_-min_)*y + min_

#y_scaled = my_scaler(result)
# y = un_scaler(y_scaled, result)

In [10]:
scaled_dataset = np.copy(dataset)
scaled_dataset[:,-1] = my_scaler(scaled_dataset[:,-1])

#un_scaler(****, dataset[:,-1])

In [11]:
#just to set the values of the lung matrices between 0 and 1
scaled_dataset[:,0] = scaled_dataset[:,0]/10

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(scaled_dataset[:,:-1], scaled_dataset[:,-1], test_size=0.2)

In [13]:
lungs_train = []
for i in range(X_train[:,0].shape[0]):
    lungs_train.append(X_train[:,0][0].astype(np.float32))

lungs_train = np.array(lungs_train)    
lungs_train = lungs_train.reshape(X_train[:,0].shape[0], 512, 512, 1)

lungs_test = []
for i in range(X_test[:,0].shape[0]):
    lungs_test.append(X_test[:,0][0].astype(np.float32))

lungs_test = np.array(lungs_test)    
lungs_test = lungs_test.reshape(X_test[:,0].shape[0], 512, 512, 1)

In [14]:
features_train = []
for i in range(X_train[:,1:].shape[0]):
    features_train.append(X_train[i,1:].astype(np.float32))
    
features_test = []
for i in range(X_test[:,1:].shape[0]):
    features_test.append(X_test[i,1:].astype(np.float32))

In [15]:
features_train = np.array(np.array(features_train))
features_train = features_train.reshape(features_train.shape[0], 8)

features_test = np.array(np.array(features_test))
features_test = features_test.reshape(features_test.shape[0], 8)

In [16]:
inp_conv = Input(shape = (512,512,1), name='lungs')

model_conv = Conv2D(16, kernel_size=(2,2), strides=(1,1), padding='valid', activation='relu')(inp_conv)
model_conv = Conv2D(32, kernel_size=(2,2), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = MaxPool2D(pool_size=(2,2))(model_conv)
model_conv = AveragePooling2D(pool_size=(3,3))(model_conv)
model_conv = Dropout(0.25)(model_conv)
model_conv = Conv2D(16, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='valid', activation='relu')(model_conv)
model_conv = MaxPool2D(pool_size=(2,2))(model_conv)
model_conv = AveragePooling2D(pool_size=(3,3))(model_conv)
model_conv = Dropout(0.2)(model_conv)
model_conv = Flatten()(model_conv)
model_conv = Dense(30, activation='relu')(model_conv)
outp_conv = Dense(8, activation='sigmoid')(model_conv)

inp_feat = Input(shape = (8,), name='features')
model_feat = Dense(16, activation='relu')(inp_feat)
outp_feat = Dense(8, activation='relu')(model_feat)

model_conc = concatenate([outp_conv, outp_feat])
model_conc = Dense(10, activation='relu')(model_conc)
model_conc = Dense(7, activation='relu')(model_conc)
model_conc = Dense(5, activation='relu')(model_conc)
output = Dense(1, activation='sigmoid')(model_conc)

model = Model(inputs=[inp_conv, inp_feat], outputs=output, name="cnn_nn_model")

In [17]:
model.summary()

Model: "cnn_nn_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
lungs (InputLayer)              (None, 512, 512, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 511, 511, 16) 80          lungs[0][0]                      
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 510, 510, 32) 2080        conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 255, 255, 32) 0           conv2d_2[0][0]                   
_______________________________________________________________________________________

In [18]:
model.compile(loss='mean_squared_error', metrics=['mean_squared_error'], optimizer='adam')

In [None]:
model.fit(
    {"lungs": lungs_train, "features": features_train},
    y_train,
    epochs=20, #experimenta mudar este número
    batch_size=52,
    validation_data = ({"lungs": lungs_test, "features": features_test}, y_test)
)

Train on 140 samples, validate on 36 samples
Epoch 1/20


In [None]:
un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1])

In [None]:
#diz-me que obtemos um número pequenino aqui para eu ficar feliz :D 
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).mean())
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).max())
print(abs(un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])).min())

In [None]:
un_scaler(model.predict(([lungs_test, features_test])), dataset[:,-1]) - un_scaler(y_test.reshape(-1,1), dataset[:,-1])

In [None]:
print(abs(model.predict([lungs_test, features_test]) - dataset[:,-1] -y_test.reshape(-1,1)).max())
print(abs(model.predict([lungs_test, features_test]) - dataset[:,-1] -y_test.reshape(-1,1)).min())
print(abs(model.predict([lungs_test, features_test]) - dataset[:,-1] -y_test.reshape(-1,1)).mean())

In [None]:
#last version