In [1]:

import numpy as np
import pydicom
import os
import sys
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import Sequence

print(tf.__version__)
print(keras.__version__)
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
import warnings
warnings.filterwarnings(action='once')
import cv2


2.8.1
2.8.0
Num GPUs Available:  1


In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import random
from keras import layers
np.random.seed(1)
random.seed(2)
tf.random.set_seed(4)

EPOCHS = 9
MODEL_DIR = '/home/shared/model_checkpoint_paige/singlescan-3channel/'

CROPPED = 0
MASKED = 1
ORIGINAL = 2

IMG_TYPE = MASKED
IMAGE_PATH = '/home/shared/nps/imgs/'
BATCH_SIZE = 16

FOLDS=10

In [3]:
all_ids = pd.read_csv('all_ids_updated.csv')
all_ids = all_ids[all_ids.contains_lung == True].sample(frac=1).reset_index(drop=True)
all_ids.ycoord = all_ids.ycoord.replace('True', '1.0').astype('float')

  all_ids = pd.read_csv('all_ids_updated.csv')


In [4]:
train_df = pd.read_csv('train_df_upd.csv').drop(columns='Unnamed: 0')
test_df = pd.read_csv('test_df_upd.csv').drop(columns='Unnamed: 0')
val_df = pd.read_csv('val_df_upd.csv').drop(columns='Unnamed: 0')

In [5]:
# fold_df = pd.DataFrame({'StudyInstanceUID': all_ids.StudyInstanceUID.unique()}).sample(frac=1)

In [6]:
# fold_df = fold_df.reset_index(drop=True)
# fold_df['fold'] = fold_df.index % 10

In [7]:
# fold_df.to_csv('folds.csv', index=False)

In [6]:
all_ids.ycoord = all_ids.ycoord.astype('float')

SLAB_SIZE = 3

all_ids_slab = all_ids.sort_values(['StudyInstanceUID', 'ycoord'])
all_ids_slab['endStudyUID'] = all_ids_slab.StudyInstanceUID.shift(SLAB_SIZE - 1)

all_ids_slab['pe_in_slab'] = all_ids_slab.pe_present_on_image
for i in range(1, SLAB_SIZE):
    all_ids_slab['pe_' + str(i)] = all_ids_slab.pe_present_on_image.shift(i)
    all_ids_slab['SOP_' + str(i)] = all_ids_slab.SOPInstanceUID.shift(i)
    all_ids_slab['pe_in_slab'] = all_ids_slab['pe_in_slab'] + all_ids_slab['pe_' + str(i)]

all_ids_slab['pe_in_slab'] = all_ids_slab.pe_1 == 1

all_ids_slab = all_ids_slab[all_ids_slab.StudyInstanceUID == all_ids_slab.endStudyUID]

all_ids_slab = all_ids_slab.reset_index(drop=True)

In [7]:
fold_df = pd.read_csv('folds.csv')
all_ids_slab = pd.merge(all_ids_slab, fold_df)

In [8]:
def get_model() -> keras.models.Model:
    
    from keras_cv_attention_models import coatnet
    
    inputs = keras.Input(shape=(256,256,3))
    
#     data_aug = keras.Sequential(
#                     [
#                         layers.RandomRotation(factor=0.05, fill_mode='constant'),
#                         layers.RandomZoom(height_factor=0.1, width_factor=0.1, fill_mode ='constant'),
#                         layers.RandomTranslation(0.1,0.1, fill_mode='constant')
#                     ],
#                     name="data_augmentation",)
    
#     aug = data_aug(inputs)
    
    coat = coatnet.CoAtNet0(pretrained='imagenet',
                            num_classes=512,
                            classifier_activation='relu', 
                            input_shape=(256,256,3), 
                            drop_connect_rate = 0.1,
                            dropout=0.4)
    x = coat(inputs)
#     x = layers.GlobalAveragePooling2D()(x)
#     x = keras.layers.Dropout(0.3)(x)##0.3
#     x = layers.Dense(512, activation='relu')(x)
    x = keras.layers.Dropout(0.3)(x)##0.3
    x = keras.layers.Dense(64)(x)
    x = keras.layers.Dropout(0.2)(x)##0.6
    output = keras.layers.Dense(1, activation="sigmoid")(x)
    model = keras.models.Model(inputs=inputs, outputs=output)
    
    print(model.summary())
    return model

def get_generators(
    all_df,
    fold,
    img_type,
    batch_size
):
    train_df = all_df[all_df.fold != fold].reset_index(drop=True)
    test_df = all_df[all_df.fold == fold].reset_index(drop=True)
    
    train_generator = DataSlabGenerator(train_df, 
                                   IMAGE_PATH, 
                                   img_type = img_type,
                                   verbose=False, 
                                   n_channels=3, 
                                   set_type='train',
                                   batch_size=batch_size,
                                   shuffle=True)
    
    valid_generator = DataSlabGenerator(test_df, 
                                   IMAGE_PATH, 
                                   img_type = img_type,
                                   verbose=False, 
                                   set_type = 'valid',
                                   n_channels=3, 
                                   batch_size=batch_size,
                                   shuffle=False)
    
    test_generator = DataSlabGenerator(test_df, 
                                   IMAGE_PATH, 
                                   img_type = img_type,
                                   verbose=False, 
                                   set_type = 'test',
                                   n_channels=3, 
                                   batch_size=batch_size,
                                   shuffle=False)
    
    return train_generator, valid_generator, test_generator

def train(all_df, model_name, fold):
    model = get_model()

    train_generator, valid_generator, test_generator = get_generators(all_df, 
                                                                      fold, 
                                                                      IMG_TYPE, 
                                                                      BATCH_SIZE)

    opt = keras.optimizers.Adam(learning_rate=0.0001) 
#                                 beta_1=0.9, beta_2=0.999, decay=0.01)
    scheduler = keras.optimizers.schedules.CosineDecay(0.0001, 100000)
#                                 beta_1=0.9, beta_2=0.999, decay=0.01)

    scheduler = keras.callbacks.LearningRateScheduler(scheduler)
    checkpoint_path = os.path.join(MODEL_DIR, f"{model_name}_fold_{fold:02d}.h5")
    training_histories = []

    check = keras.callbacks.ModelCheckpoint(
        checkpoint_path,
        save_best_only=True,
        verbose=1,
    )
    es = keras.callbacks.EarlyStopping(
        monitor='val_loss',
        mode='min',
        verbose=1,
        patience=1,
    )
    
    model.compile(loss="binary_crossentropy", 
                  optimizer=opt, 
                  metrics=["accuracy"])
    
    hist_full = model.fit(
        train_generator,
        epochs=EPOCHS,
#         steps_per_epoch=10,
#         validation_steps=10,
        validation_data=valid_generator,
        callbacks=[check,es,scheduler],
    )

    training_histories.append(pd.DataFrame(hist_full.history))

    df_hist = pd.concat(training_histories, axis=0, ignore_index=True, sort=False)
    df_hist.to_csv(os.path.join(MODEL_DIR, f"{model_name}_fold_{fold:02d}.csv"))

In [9]:
import turku_aug_funcs

class DataSlabGenerator(Sequence):
    """Generates data for Keras
    Sequence based data generator. Suitable for building data generator for training and prediction.
    """
    def __init__(self, all_df, image_path, img_type, set_type, n_channels=1, 
                 batch_size=32, dim=256, num_pos=None, n_classes=2, shuffle=True,
                 verbose=False, to_fit=True,
                 ):
        """Initialization
        :param list_IDs: list of all 'label' ids to use in the generator
        :param labels: list of image labels (file names)
        :param image_path: path to images location
        :param mask_path: path to masks location
        :param to_fit: True to return X and y, False to return X only
        :param batch_size: batch size at each iteration
        :param dim: tuple indicating image dimension
        :param n_channels: number of image channels
        :param n_classes: number of output masks
        :param shuffle: True to shuffle label indexes after every epoch
        """
        self.image_path = image_path
        self.to_fit = to_fit
        self.batch_size = batch_size
        self.dim = dim
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.path_dicom = image_path
        self.verbose = verbose
        self.img_type = img_type
        warnings.filterwarnings(action='ignore')
        self.all_df = all_df
        self.set_type = set_type
        
        if self.set_type == 'test':
            self.labels = self.all_df
        elif self.set_type == 'valid':
            pos = self.all_df[self.all_df.pe_in_slab == True]
            neg = self.all_df[self.all_df.pe_in_slab == False].sample(n=len(pos))
            self.labels = pd.concat([pos,neg]).sample(frac=1).reset_index(drop=True)
        elif self.set_type == 'train':
            pos = self.all_df[self.all_df.pe_in_slab ==True]
            neg = self.all_df[self.all_df.pe_in_slab == False].sample(n=len(pos))
            self.labels = pd.concat([pos,neg]).sample(frac=1).reset_index(drop=True)
        else:
            print('Invalid set type, must be test, valid or train')
            return False
        
        if self.verbose:
            print(self.set_type, self.labels.fold.unique())
        
        self.list_IDs = np.arange(len(self.labels))
        if self.shuffle == True:
            np.random.shuffle(self.list_IDs)

    def get_df(self):
        return self.labels

        
    def __len__(self):
        """Denotes the number of batches per epoch
        :return: number of batches per epoch
        """
        return int(np.floor(len(self.list_IDs) / self.batch_size))


    def __getitem__(self, index):
        """Generate one batch of data
        :param index: index of the batch
        :return: X and y when fitting. X only when predicting
        """
        
        indexes = self.list_IDs[index * self.batch_size:((index+1) * self.batch_size)]
        X = np.zeros([self.batch_size,self.dim,self.dim,3])
        y = np.zeros([self.batch_size, 1])
        for i in range(0,self.batch_size):
            X[i], y[i] = self._load_dicom(indexes[i])
            
        if self.verbose == True:
            fig, ax = plt.subplots(self.batch_size, 1, figsize=[12, 12*(self.batch_size/2)])
            for i in range(self.batch_size):
                ax[i].imshow(X[i])
                ax[i].axis('off')
        
        X = X/255
        if self.to_fit:
            return (X, y)
        else:
            return (X)
        
    def on_epoch_end(self):
        """Updates indexes after each epoch
        """
        if self.set_type == 'train':
            pos = self.all_df[self.all_df.pe_in_slab ==True]
            neg = self.all_df[self.all_df.pe_in_slab == False].sample(n=len(pos))
            self.labels = pd.concat([pos,neg]).sample(frac=1).reset_index(drop=True)
            
        self.list_IDs = np.arange(len(self.labels))
        if self.shuffle == True:
            np.random.shuffle(self.list_IDs)

    def _load_dicom(self, index):
        slice = self.labels.iloc[index] 

        frames = np.zeros((self.dim,self.dim,3))
        frame = cv2.imread(self.image_path+slice.StudyInstanceUID + '_'+ slice.SeriesInstanceUID
                      + '_' + slice.SOPInstanceUID +'.png')
        frame = frame[:,:,self.img_type]
        frames[:,:,0] = frame

        frame = cv2.imread(self.image_path+slice.StudyInstanceUID + '_'+ slice.SeriesInstanceUID
                      + '_' + slice.SOP_1 +'.png')
        frame = frame[:,:,self.img_type]
        frames[:,:,1] = frame    

        frame = cv2.imread(self.image_path+slice.StudyInstanceUID + '_'+ slice.SeriesInstanceUID
                      + '_' + slice.SOP_2 +'.png')
        frame = frame[:,:,self.img_type]
        frames[:,:,2] = frame       
                
        if self.set_type == 'train':
            
            if random.random() > 0.5:
                frames = cv2.GaussianBlur(frames, (3, 3), 0)

            if random.random() > 0.2:
                factor = random.randint(-5,10)/100 + 1
                if factor > 1:
                    radius = round((1.0/factor) * self.dim/2)
                    mini = int(self.dim/2 -radius)
                    maxi = int(self.dim/2 +radius)
                    tmp_frame = frames[mini:maxi, mini:maxi,:]
                    frames = cv2.resize(tmp_frame, (self.dim, self.dim))
                if factor < 1:
                    diam = int((1.0/factor) * self.dim)
                    mini = int(round(diam/2) - self.dim/2)
                    maxi = int(self.dim/2 + round(diam/2))
                    tmp_frame = np.zeros((diam, diam, 3))
                    tmp_frame[mini:maxi, mini:maxi,:] = frames
                    frames = cv2.resize(tmp_frame, (self.dim, self.dim))

            angle = random.randint(-10,10)
            if angle != 0:
                M = cv2.getRotationMatrix2D((self.dim/2, self.dim/2), angle, 1)
                frames = cv2.warpAffine(frames, M, (self.dim, self.dim))
                
            if random.random() > 0.5:
                gaus_noise = np.random.normal(0, 1, np.shape(frames))
                frames = frames + gaus_noise
                frames = np.clip(frames,0, 255)
        
        frames = np.reshape(frames, (256,256,3))
        
        if self.verbose == True:
            print(np.shape(frames))
            plt.imshow(frames[:,:,0])
            plt.show()

        frames = frames[None, ...]
        y = np.array([int(slice.pe_in_slab)])
        y = y[None, ...]
        
        return frames, y

  from scipy.ndimage.filters import gaussian_filter
  from scipy.ndimage.interpolation import map_coordinates


In [10]:
EPOCHS=3
IMG_TYPE = CROPPED

In [None]:
for fold in range(6,8):
    print(fold)
    train(all_ids_slab, 'coat-net-2-kfold', fold)

  tf_version = distutils.version.LooseVersion(tf.__version__)
  from .autonotebook import tqdm as notebook_tqdm


6


2022-12-08 14:38:32.721863: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-08 14:38:34.509538: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 11470 MB memory:  -> device: 0, name: NVIDIA GeForce GTX TITAN X, pci bus id: 0000:06:00.0, compute capability: 5.2


>>>> Load pretrained from: /home/jupyter-paige/.keras/models/coatnet0_224_imagenet.h5
>>>> Reload mismatched weights: 224 -> (256, 256)
>>>> Reload layer: stack_3_block_1_mhsa_pos_emb
>>>> Reload layer: stack_3_block_2_mhsa_pos_emb
>>>> Reload layer: stack_3_block_3_mhsa_pos_emb
>>>> Reload layer: stack_3_block_4_mhsa_pos_emb
>>>> Reload layer: stack_3_block_5_mhsa_pos_emb
>>>> Reload layer: stack_4_block_1_mhsa_pos_emb
>>>> Reload layer: stack_4_block_2_mhsa_pos_emb
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 coatnet0 (Functional)       (None, 512)               22930490  
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                   

2022-12-08 14:39:13.464397: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8401

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.




In [None]:
for fold in range(2,3):
    print(fold)
    train(all_ids_slab, 'coat-net-diff-kfold', fold)

In [None]:
EPOCHS=6
IMG_TYPE = CROPPED
for fold in range(1,10):    
    if fold != 2:
        train(all_ids_slab, 'coat-net-diff-kfold', fold)

>>>> Load pretrained from: /home/jupyter-paige/.keras/models/coatnet0_224_imagenet.h5
>>>> Reload mismatched weights: 224 -> (256, 256)
>>>> Reload layer: stack_3_block_1_mhsa_pos_emb
>>>> Reload layer: stack_3_block_2_mhsa_pos_emb
>>>> Reload layer: stack_3_block_3_mhsa_pos_emb
>>>> Reload layer: stack_3_block_4_mhsa_pos_emb
>>>> Reload layer: stack_3_block_5_mhsa_pos_emb
>>>> Reload layer: stack_4_block_1_mhsa_pos_emb
>>>> Reload layer: stack_4_block_2_mhsa_pos_emb
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 coatnet0 (Functional)       (None, 512)               22930490  
                                                                 
 dropout_3 (Dropout)         (None, 512)               0         
                                                 

In [11]:
all_ids = pd.read_csv('all_ids_updated.csv')
all_ids = all_ids[all_ids.contains_lung == True].sample(frac=1).reset_index(drop=True)
all_ids.ycoord = all_ids.ycoord.replace('True', '1.0').astype('float')

all_ids_small = all_ids.drop(columns=['negative_exam_for_pe', 'qa_motion',
       'qa_contrast', 'flow_artifact', 'rv_lv_ratio_gte_1', 'rv_lv_ratio_lt_1',
       'leftsided_pe', 'chronic_pe', 'true_filling_defect_not_pe',
       'rightsided_pe', 'acute_and_chronic_pe', 'central_pe', 'indeterminate',
       'contains_lung'])

  all_ids = pd.read_csv('all_ids_updated.csv')


In [16]:
model = get_model(3)
model.load_weights('/home/shared/model_checkpoint_paige/singlescan-3channel/coat-net-kfold_fold_02.h5')
# model_partial = keras.models.Sequential(model.layers[:-2])
model_partial = keras.models.Model(inputs=model.input, outputs=[model.layers[-2].output])
model_partial.summary()

>>>> Load pretrained from: /home/jupyter-paige/.keras/models/coatnet0_224_imagenet.h5
>>>> Reload mismatched weights: 224 -> (256, 256)
>>>> Reload layer: stack_3_block_1_mhsa_pos_emb
>>>> Reload layer: stack_3_block_2_mhsa_pos_emb
>>>> Reload layer: stack_3_block_3_mhsa_pos_emb
>>>> Reload layer: stack_3_block_4_mhsa_pos_emb
>>>> Reload layer: stack_3_block_5_mhsa_pos_emb
>>>> Reload layer: stack_4_block_1_mhsa_pos_emb
>>>> Reload layer: stack_4_block_2_mhsa_pos_emb
Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 coatnet0 (Functional)       (None, 512)               22930490  
                                                                 
 dropout_6 (Dropout)         (None, 512)               0         
                                                 

In [24]:
import datetime
import gc

In [15]:
models = []
model_partials = []
model_parts = []
for i in range(0,10):
    models.append(get_model())
    models[i].load_weights(MODEL_DIR + 'coat-net-2-kfold_fold_0'+ str(i) +'.h5')
    model_partials.append(keras.models.Model(inputs=models[i].input, outputs=[models[i].layers[-3].output]))
#     model_parts.append(keras.models.Model(inputs=models[i].input, outputs=[models[i].layers[-4].output]))

>>>> Load pretrained from: /home/jupyter-paige/.keras/models/coatnet0_224_imagenet.h5
>>>> Reload mismatched weights: 224 -> (256, 256)
>>>> Reload layer: stack_3_block_1_mhsa_pos_emb
>>>> Reload layer: stack_3_block_2_mhsa_pos_emb
>>>> Reload layer: stack_3_block_3_mhsa_pos_emb
>>>> Reload layer: stack_3_block_4_mhsa_pos_emb
>>>> Reload layer: stack_3_block_5_mhsa_pos_emb
>>>> Reload layer: stack_4_block_1_mhsa_pos_emb
>>>> Reload layer: stack_4_block_2_mhsa_pos_emb
Model: "model_32"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_25 (InputLayer)       [(None, 256, 256, 3)]     0         
                                                                 
 coatnet0 (Functional)       (None, 512)               22930490  
                                                                 
 dropout_24 (Dropout)        (None, 512)               0         
                                                

>>>> Reload mismatched weights: 224 -> (256, 256)
>>>> Reload layer: stack_3_block_1_mhsa_pos_emb
>>>> Reload layer: stack_3_block_2_mhsa_pos_emb
>>>> Reload layer: stack_3_block_3_mhsa_pos_emb
>>>> Reload layer: stack_3_block_4_mhsa_pos_emb
>>>> Reload layer: stack_3_block_5_mhsa_pos_emb
>>>> Reload layer: stack_4_block_1_mhsa_pos_emb
>>>> Reload layer: stack_4_block_2_mhsa_pos_emb
Model: "model_36"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_29 (InputLayer)       [(None, 256, 256, 3)]     0         
                                                                 
 coatnet0 (Functional)       (None, 512)               22930490  
                                                                 
 dropout_28 (Dropout)        (None, 512)               0         
                                                                 
 dense_28 (Dense)            (None, 64)                32832     
  

>>>> Reload mismatched weights: 224 -> (256, 256)
>>>> Reload layer: stack_3_block_1_mhsa_pos_emb
>>>> Reload layer: stack_3_block_2_mhsa_pos_emb
>>>> Reload layer: stack_3_block_3_mhsa_pos_emb
>>>> Reload layer: stack_3_block_4_mhsa_pos_emb
>>>> Reload layer: stack_3_block_5_mhsa_pos_emb
>>>> Reload layer: stack_4_block_1_mhsa_pos_emb
>>>> Reload layer: stack_4_block_2_mhsa_pos_emb
Model: "model_40"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_33 (InputLayer)       [(None, 256, 256, 3)]     0         
                                                                 
 coatnet0 (Functional)       (None, 512)               22930490  
                                                                 
 dropout_32 (Dropout)        (None, 512)               0         
                                                                 
 dense_32 (Dense)            (None, 64)                32832     
  

>>>> Reload mismatched weights: 224 -> (256, 256)
>>>> Reload layer: stack_3_block_1_mhsa_pos_emb
>>>> Reload layer: stack_3_block_2_mhsa_pos_emb
>>>> Reload layer: stack_3_block_3_mhsa_pos_emb
>>>> Reload layer: stack_3_block_4_mhsa_pos_emb
>>>> Reload layer: stack_3_block_5_mhsa_pos_emb
>>>> Reload layer: stack_4_block_1_mhsa_pos_emb
>>>> Reload layer: stack_4_block_2_mhsa_pos_emb
Model: "model_44"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_37 (InputLayer)       [(None, 256, 256, 3)]     0         
                                                                 
 coatnet0 (Functional)       (None, 512)               22930490  
                                                                 
 dropout_36 (Dropout)        (None, 512)               0         
                                                                 
 dense_36 (Dense)            (None, 64)                32832     
  

>>>> Reload mismatched weights: 224 -> (256, 256)
>>>> Reload layer: stack_3_block_1_mhsa_pos_emb
>>>> Reload layer: stack_3_block_2_mhsa_pos_emb
>>>> Reload layer: stack_3_block_3_mhsa_pos_emb
>>>> Reload layer: stack_3_block_4_mhsa_pos_emb
>>>> Reload layer: stack_3_block_5_mhsa_pos_emb
>>>> Reload layer: stack_4_block_1_mhsa_pos_emb
>>>> Reload layer: stack_4_block_2_mhsa_pos_emb
Model: "model_48"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_41 (InputLayer)       [(None, 256, 256, 3)]     0         
                                                                 
 coatnet0 (Functional)       (None, 512)               22930490  
                                                                 
 dropout_40 (Dropout)        (None, 512)               0         
                                                                 
 dense_40 (Dense)            (None, 64)                32832     
  

In [13]:
all_ids_slab_small = all_ids_slab.drop(columns=['qa_motion',
       'qa_contrast', 'flow_artifact', 'rv_lv_ratio_gte_1', 'rv_lv_ratio_lt_1',
       'leftsided_pe', 'chronic_pe', 'true_filling_defect_not_pe',
       'rightsided_pe', 'acute_and_chronic_pe', 'central_pe', 'indeterminate',
       'contains_lung', 'endStudyUID', 'pe_1', 'pe_2',])

In [21]:
directory = '/home/shared/nps/coat_2_00/'

lisdir = os.listdir(directory)
print('hello?')
print(len(lisdir))

files = pd.DataFrame({'file_name':lisdir})
files['StudyInstanceUID'] = files['file_name'].str.replace('.npy','')

remaining = all_ids_slab[~all_ids_slab.StudyInstanceUID.isin(files.StudyInstanceUID.unique())]
remaining = remaining.sort_values(by='StudyInstanceUID', ascending=False)

unique_ids = remaining.StudyInstanceUID.unique()[:600]


hello?
5561


In [None]:
import datetime
import gc

all_ids_slab = all_ids_slab.sort_values(by='StudyInstanceUID')
unique_ids = all_ids_slab.StudyInstanceUID.unique()
unique_ids = unique_ids[int(np.floor(len(unique_ids)*0.7)):]

IMG_TYPE = CROPPED
IMAGE_PATH = '/home/shared/nps/imgs/'
BATCH_SIZE = 64

for i in range(0,len(unique_ids)):
    exam_id = unique_ids[i]
    exam = all_ids_slab_small[all_ids_slab_small.StudyInstanceUID == exam_id]
    exam = exam.sort_values(by='ycoord')
    exam_generator = DataSlabGenerator(exam, 
                                   IMAGE_PATH, 
                                   img_type = IMG_TYPE,
                                   set_type='test',
                                   verbose=False, 
                                   shuffle=False,
                                   n_channels=3, 
                                   batch_size=1)
    x_test = np.zeros([len(exam),256,256,3])
    for j in range(0,len(exam)):
        x_test[j],_ = exam_generator.__getitem__(j)
           
    feats = np.zeros([10, len(exam),64])
#     feats_big = np.zeros([10,len(exam),512])
    results = np.zeros([10,len(exam),1])
    for j in range(int(len(exam)/BATCH_SIZE)):
        for k in range(0,10):
            feats[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(model_partials[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
            results[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(models[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
#             feats_big[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(model_parts[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
    if len(exam)%BATCH_SIZE != 0:
        for k in range(0,10):
            feats[k, (j+1)*BATCH_SIZE:] = np.array(model_partials[k](x_test[(j+1)*BATCH_SIZE:]))
            results[k, (j+1)*BATCH_SIZE:] = np.array(models[k](x_test[(j+1)*BATCH_SIZE:]))
#             feats_big[k, (j+1)*BATCH_SIZE:] = np.array(model_parts[k](x_test[(j+1)*BATCH_SIZE:]))
        
    for k in range(0,10):
        exam['features'] = feats[k].tolist()
#         exam['features_back'] = feats_big[k].tolist()
        exam['preds'] = results[k].tolist()
        np.save('/home/shared/nps/coat_2_0'+ str(k)+ '/' + exam_id, exam.to_dict(orient='records'))
            
    if i %10 == 0:
        print(i, 'time is: ', datetime.datetime.now())
        gc.collect()

0 time is:  2022-12-10 12:03:17.464163
10 time is:  2022-12-10 12:10:51.079062
20 time is:  2022-12-10 12:18:08.251402
30 time is:  2022-12-10 12:24:05.074339
40 time is:  2022-12-10 12:30:13.503649
50 time is:  2022-12-10 12:37:33.751026
60 time is:  2022-12-10 12:44:32.096984
70 time is:  2022-12-10 12:51:40.803704
80 time is:  2022-12-10 12:58:44.706362
90 time is:  2022-12-10 13:05:18.434465
100 time is:  2022-12-10 13:11:52.986013
110 time is:  2022-12-10 13:17:58.081326
120 time is:  2022-12-10 13:24:18.855944
130 time is:  2022-12-10 13:31:20.379284
140 time is:  2022-12-10 13:37:30.124312
150 time is:  2022-12-10 13:44:06.336379
160 time is:  2022-12-10 13:51:31.224850
170 time is:  2022-12-10 13:58:32.214167
180 time is:  2022-12-10 14:04:53.171459
