In [1]:

import numpy as np
import pydicom
import os
import sys
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import Sequence

print(tf.__version__)
print(keras.__version__)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,2"

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
import warnings
warnings.filterwarnings(action='once')
import cv2


2.8.1
2.8.0
Num GPUs Available:  2


In [2]:
import argparse
import os
import random as rn
import sys
from typing import Tuple

import numpy as np
import pandas as pd
import tensorflow as tf
from keras.applications.inception_resnet_v2 import preprocess_input
from tensorflow import keras
import random

# import config
# from pe_logger import PELogger
# from plots.plot_results import (save_accuracy_plot, save_loss_plot,
#                                 save_pr_curve, save_roc_curve)
from inception_resnet_v2_gray import InceptionResNetV2Gray
# from training.slice_data_generator import SliceDataGenerator

# Set seeding based on Keras documentation
np.random.seed(1)
rn.seed(2)
tf.random.set_seed(3)

# logger = PELogger().get_logger()

# CONFIG = config.config()
# MODEL_DIR = CONFIG["model"]["model_dir"]
# PLOT_DIR = CONFIG["model"]["plot_dir"]
IMAGE_W = 386
IMAGE_H = 386
ENCODING_DIM = 64
FILENAME_COL = "png_filename"
LABEL_COL = "label"
FOLD_COL = "pat_fold"
EPOCHS = 3
BATCH_SIZE = 32
PRETRAINED_WEIGHTS = "./pretrained/InceptionResNetV2_NIH15_Px256.h5"
MODEL_DIR = '/home/shared/model_checkpoint_paige/singlescan-3channel/'


In [3]:
def get_model(num_channels: int) -> keras.models.Model:
    
    inputs = keras.Input(shape=(IMAGE_W,IMAGE_H,1))
    
    inception = InceptionResNetV2Gray(
                    input_shape=(IMAGE_H, IMAGE_W, 1),
                    include_top=False,
                    weights='pretrained/InceptionResNetV2_NIH15_Px256.h5',
                )
    x = inception(inputs)
    x = keras.layers.GlobalAveragePooling2D()(inception.output)
    x = keras.layers.Dense(512, activation="relu")(x)
    x = keras.layers.Dropout(0.2)(x)
    x = keras.layers.Dense(ENCODING_DIM)(x)
    x = keras.layers.Dense(1, activation="sigmoid")(x)
    model = keras.models.Model(inputs=inception.input, outputs=x)
    return model


def get_generators(
    df,
    fold,
):

    train_df = df[df.fold != fold].reset_index(drop=True)
    test_df = df[df.fold == fold].reset_index(drop=True)

    
    train_generator = DataSliceGenerator(train_df, 
                                   IMAGE_PATH, 
                                   img_type = IMG_TYPE,
                                   verbose=False, 
                                   n_channels=1, 
                                   set_type='train',
                                   batch_size=BATCH_SIZE,
                                   shuffle=True,
                                   dim=386)
    
    valid_generator = DataSliceGenerator(test_df, 
                                   IMAGE_PATH, 
                                   img_type = IMG_TYPE,
                                   verbose=False, 
                                   set_type = 'valid',
                                   n_channels=1, 
                                   batch_size=BATCH_SIZE,
                                   shuffle=False,
                                   dim=386)

    return train_generator, valid_generator


def train(
    df: pd.DataFrame,
    model_prefix: str,
    num_channels: int,
    pre_train_top: bool,
    fold: int,
):
    """Train a slice based (2D) binary classification model using Inception ResNet V2 backbone.
    Best model for given fold will be saved based validation loss.
    Args:
        df: dataframe object that contains list of images with labels and fold info
        image_dir: directory from which training images are loaded from
        model_prefix: prefix to use when storing trained models
        num_channels: number of input channels to use, 1 = NIH, 2 = ImageNet (RGB)
        pre_train_top: flag whether to first train a the classifier part
        fold: number indicating cross-validation fold
    """
    model = get_model(1)

    train_generator, valid_generator = get_generators(df, fold)

    opt = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, decay=0.01)

    training_histories = []
    os.makedirs(MODEL_DIR, exist_ok=True)
#     os.makedirs(PLOT_DIR, exist_ok=True)

    checkpoint_path = os.path.join(MODEL_DIR, f"{model_prefix}_fold_{fold:02d}.h5")
    check = keras.callbacks.ModelCheckpoint(
        checkpoint_path,
        save_best_only=True,
        verbose=1,
    )


    for layer in model.layers:
        layer.trainable = True

    model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
    if fold == 0:
        model.summary()

    hist_full = model.fit(
        train_generator,
        epochs=EPOCHS,
        shuffle=False,
        validation_data=valid_generator,
        callbacks=[check],
    )

    training_histories.append(pd.DataFrame(hist_full.history))

    df_hist = pd.concat(training_histories, axis=0, ignore_index=True, sort=False)
    df_hist.to_csv(os.path.join(MODEL_DIR, f"hist_{model_prefix}_fold_{fold:02d}.csv"))




In [4]:
all_ids = pd.read_csv('all_ids_updated.csv')
all_ids = all_ids[all_ids.contains_lung == True].sample(frac=1).reset_index(drop=True)
all_ids.ycoord = all_ids.ycoord.replace('True', '1.0').astype('float')

  all_ids = pd.read_csv('all_ids_updated.csv')


In [5]:
fold_df = pd.read_csv('folds.csv')
all_ids = pd.merge(all_ids, fold_df)

In [7]:
BATCH_SIZE = 16
FOLDS = 10
EPOCHS = 3
MODEL_DIR = '/home/shared/model_checkpoint_paige/singlescan-3channel/'

for fold in range(8, FOLDS):
    print(fold)
    train(all_ids, 'turku-incepres', 1, True, fold)

8


2022-12-12 12:11:10.011097: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-12 12:11:10.517637: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 11470 MB memory:  -> device: 0, name: NVIDIA GeForce GTX TITAN X, pci bus id: 0000:09:00.0, compute capability: 5.2


Epoch 1/3


2022-12-12 12:11:33.943706: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8401

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


   36/10738 [..............................] - ETA: 2:17:12 - loss: 0.7885 - accuracy: 0.5434


KeyboardInterrupt



In [24]:
train_generator, valid_generator = get_generators(all_ids, 0)
opt = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, decay=0.01)

models[0].compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
models[0].evaluate(valid_generator)

 21/264 [=>............................] - ETA: 3:09 - loss: 0.3507 - accuracy: 0.8475

KeyboardInterrupt: 

In [4]:
import turku_aug_funcs

class DataSliceGenerator(Sequence):
    """Generates data for Keras
    Sequence based data generator. Suitable for building data generator for training and prediction.
    """
    def __init__(self, all_df, image_path, img_type, set_type, n_channels=1, 
                 batch_size=32, dim=386, num_pos=None, n_classes=2, shuffle=True,
                 verbose=False, to_fit=True,
                 ):
        """Initialization
        :param list_IDs: list of all 'label' ids to use in the generator
        :param labels: list of image labels (file names)
        :param image_path: path to images location
        :param mask_path: path to masks location
        :param to_fit: True to return X and y, False to return X only
        :param batch_size: batch size at each iteration
        :param dim: tuple indicating image dimension
        :param n_channels: number of image channels
        :param n_classes: number of output masks
        :param shuffle: True to shuffle label indexes after every epoch
        """
        self.image_path = image_path
        self.to_fit = to_fit
        self.batch_size = batch_size
        self.dim = dim
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.path_dicom = image_path
        self.verbose = verbose
        self.img_type = img_type
        warnings.filterwarnings(action='ignore')
        self.all_df = all_df
        self.set_type = set_type
        
        if self.set_type == 'test':
            self.labels = self.all_df
        elif self.set_type == 'valid':
            pos = self.all_df[self.all_df.pe_present_on_image == True]
            neg = self.all_df[self.all_df.pe_present_on_image == False].sample(n=len(pos))
            self.labels = pd.concat([pos,neg]).sample(frac=1).reset_index(drop=True)
        elif self.set_type == 'train':
            pos = self.all_df[self.all_df.pe_present_on_image ==True]
            neg = self.all_df[self.all_df.pe_present_on_image == False].sample(n=len(pos))
            self.labels = pd.concat([pos,neg]).sample(frac=1).reset_index(drop=True)
        else:
            print('Invalid set type, must be test, valid or train')
            return False
        
        self.list_IDs = np.arange(len(self.labels))
        if self.shuffle == True:
            np.random.shuffle(self.list_IDs)

        
    def get_df(self):
        return self.labels

        
    def __len__(self):
        """Denotes the number of batches per epoch
        :return: number of batches per epoch
        """
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        """Generate one batch of data
        :param index: index of the batch
        :return: X and y when fitting. X only when predicting
        """
        
        indexes = self.list_IDs[index * self.batch_size:((index+1) * self.batch_size)]
        X = np.zeros([self.batch_size,self.dim,self.dim,1])
        y = np.zeros([self.batch_size, 1])
        for i in range(0,self.batch_size):
            X[i], y[i] = self._load_dicom(indexes[i])
            
        if self.verbose == True:
            fig, ax = plt.subplots(self.batch_size, 1, figsize=[12, 12*(self.batch_size/2)])
            for i in range(self.batch_size):
                ax[i].imshow(X[i])
                ax[i].axis('off')
        
        X = X/255
            
        if self.to_fit:
            return (X, y)
        else:
            return (X)
        
    def on_epoch_end(self):
        """Updates indexes after each epoch
        """
        if self.set_type == 'train':
            pos = self.all_df[self.all_df.pe_present_on_image ==True]
            neg = self.all_df[self.all_df.pe_present_on_image == False].sample(n=len(pos)*3)
            self.labels = pd.concat([pos,pos,pos,neg]).sample(frac=1).reset_index(drop=True)
            
        self.list_IDs = np.arange(len(self.labels))
        if self.shuffle == True:
            np.random.shuffle(self.list_IDs)

            
    def _load_dicom(self, index):
        slice = self.labels.iloc[index] 

        frame = cv2.imread(self.image_path+slice.StudyInstanceUID + '_'+ slice.SeriesInstanceUID
                      + '_' + slice.SOPInstanceUID +'.png')
        frame = frame[:,:,self.img_type]
        
#         frame = np.reshape(frame, (256,256,1))      
        
        if self.set_type == 'train':
            trans = random.sample(range(0,5),random.randint(0,5))
            if 0 in trans:
                frame = turku_aug_funcs.blur(frame)
            if 1 in trans:
                zoom = random.randint(0,2)
                if zoom == 0:
                    frame = turku_aug_funcs.zoom_1_05(frame)
                if zoom == 1:
                    frame = turku_aug_funcs.zoom_1_075(frame)
                if zoom == 2:
                    frame = turku_aug_funcs.zoom_1_15(frame)

            if 2 in trans:
                rot = random.randint(0,3)
                if rot == 0:
                    frame = turku_aug_funcs.rotate_3(frame)
                if rot == 1:
                    frame = turku_aug_funcs.rotate_m3(frame)
                if rot == 2:
                    frame = turku_aug_funcs.rotate_5(frame)
                if rot == 3:
                    frame = turku_aug_funcs.rotate_m5(frame)

            if 3 in trans:
                frame = np.reshape(frame, (1,256,256))
                frame = turku_aug_funcs.gaussian_noise(frame)
                frame = frame.squeeze()

            if 4 in trans:
                tx = random.randint(0,6)
                if tx == 0:
                    frame = turku_aug_funcs.tr_x10(frame)
                if tx == 1:
                    frame = turku_aug_funcs.tr_x15(frame)
                if tx == 2:
                    frame = turku_aug_funcs.tr_x20(frame)
                if tx == 3:
                    frame = turku_aug_funcs.tr_xm10(frame)
                if tx == 4:
                    frame = turku_aug_funcs.tr_xm15(frame)
                if tx == 5:
                    frame = turku_aug_funcs.tr_xm20(frame)
                tx = random.randint(0,6)
                if tx == 0:
                    frame = turku_aug_funcs.tr_y10(frame)
                if tx == 1:
                    frame = turku_aug_funcs.tr_y15(frame)
                if tx == 2:
                    frame = turku_aug_funcs.tr_y20(frame)
                if tx == 3:
                    frame = turku_aug_funcs.tr_ym10(frame)
                if tx == 4:
                    frame = turku_aug_funcs.tr_ym15(frame)
                if tx == 5:
                    frame = turku_aug_funcs.tr_ym20(frame)
        frame = cv2.resize(frame, (386,386))
        frame = np.reshape(frame, (386,386,1))
        
        if self.verbose == True:
            print(np.shape(frame))
            plt.imshow(frame)
            plt.show()

        frame = frame[None, ...]
        y = np.array([int(slice.pe_present_on_image)])
        y = y[None, ...]
        
        return frame, y

CROPPED = 0
MASKED = 1
ORIGINAL = 2

IMG_TYPE = MASKED
IMAGE_PATH = '/home/shared/nps/imgs/'


  from scipy.ndimage.filters import gaussian_filter
  from scipy.ndimage.interpolation import map_coordinates


In [7]:
all_ids = pd.read_csv('all_ids_updated.csv')
all_ids = all_ids[all_ids.contains_lung == True].sample(frac=1).reset_index(drop=True)
all_ids.ycoord = all_ids.ycoord.replace('True', '1.0').astype('float')

all_ids_small = all_ids.drop(columns=['negative_exam_for_pe', 'qa_motion',
       'qa_contrast', 'flow_artifact', 'rv_lv_ratio_gte_1', 'rv_lv_ratio_lt_1',
       'leftsided_pe', 'chronic_pe', 'true_filling_defect_not_pe',
       'rightsided_pe', 'acute_and_chronic_pe', 'central_pe', 'indeterminate',
       'contains_lung'])

  all_ids = pd.read_csv('all_ids_updated.csv')


In [58]:
import datetime

In [4]:
all_ids = pd.read_csv('all_ids_updated.csv')
all_ids.ycoord = all_ids.ycoord.replace('True', '1.0').astype('float')
train_ids = pd.read_csv('train_df_upd.csv').drop(columns='Unnamed: 0')
test_ids = pd.read_csv('test_df_upd.csv').drop(columns='Unnamed: 0')
val_ids = pd.read_csv('val_df_upd.csv').drop(columns='Unnamed: 0')

import os 
lisdir = os.listdir('/home/shared/nps/turku_nps/')
print(len(lisdir))

  all_ids = pd.read_csv('all_ids_updated.csv')


6805


In [25]:
models = []
model_partials = []

opt = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, decay=0.01)
for i in range(0,10):
    models.append(get_model(1))
    models[i].compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
    models[i].load_weights(MODEL_DIR + 'turku-incepres_fold_0'+ str(i) +'.h5')
    model_partials.append(keras.models.Model(inputs=models[i].input, outputs=[models[i].layers[-2].output]))
    print(i)

0
1
2



KeyboardInterrupt



In [26]:
import datetime
import gc

unique_ids = all_ids.StudyInstanceUID.unique()

IMG_TYPE = MASKED
IMAGE_PATH = '/home/shared/nps/imgs/'
BATCH_SIZE = 64

MAX = 2

for i in range(0,len(unique_ids)):
    exam_id = unique_ids[i]
    exam = all_ids[all_ids.StudyInstanceUID == exam_id]
    exam = exam.sort_values(by='ycoord')
    exam_generator = DataSliceGenerator(exam, 
                                   IMAGE_PATH, 
                                   img_type = IMG_TYPE,
                                   verbose=False, 
                                   set_type = 'test',
                                   shuffle=False,
                                   n_channels=1, 
                                   batch_size=1,
                                   dim=386)
    
    x_test = np.zeros([len(exam),386,386,1])
    for j in range(0,len(exam)):
        x_test[j],_ = exam_generator.__getitem__(j)
           
    feats = np.zeros([MAX, len(exam),64])
    results = np.zeros([MAX,len(exam),1])
    for j in range(int(len(exam)/BATCH_SIZE)):
        for k in range(0,MAX):
            feats[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(model_partials[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
            results[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(models[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
    if len(exam)%BATCH_SIZE != 0:
        for k in range(0,MAX):
            feats[k, (j+1)*BATCH_SIZE:] = np.array(model_partials[k](x_test[(j+1)*BATCH_SIZE:]))
            results[k, (j+1)*BATCH_SIZE:] = np.array(models[k](x_test[(j+1)*BATCH_SIZE:]))
        
    for k in range(0,MAX):
        exam['features'] = feats[k].tolist()
        exam['preds'] = results[k].tolist()
        np.save('/home/shared/nps/turku_0'+ str(k)+ '/' + exam_id, exam.to_dict(orient='records'))

    if i %10 == 0:
        print(i, 'time is: ', datetime.datetime.now())
        gc.collect()

KeyboardInterrupt: 

In [15]:
fold_df = pd.read_csv('folds.csv')
all_ids = pd.merge(all_ids, fold_df)

In [44]:
unique_ids = all_ids[(all_ids.negative_exam_for_pe == False) & (all_ids.fold == 0)].StudyInstanceUID.unique()

In [45]:
unique_ids

array(['3f28b9d32596', 'c8aeafba5334', '7e209fbfc30a', 'ca9fafa5d5e7',
       '216400e63bb3', 'a711e3f632d0', '5a649181c295', '018b5097a129',
       '6fab99baa593', '1c0a60a5bf38', 'c593fa6a480b', 'bcd48aa10710',
       '7ebfc5fe0caf', 'ec36deb204d3', 'af0fa891991a', '757535dda5f4',
       '5120108a1940', '90bca49335fc', 'b0385ef57697', 'ac9350709953',
       '2988af19b941', '25b3b3270108', '6b4009e175a0', 'd40cdd117143',
       '93072a8bf3c4', 'cd0f7d6be318', '8aad4d650a05', '665099cd6228',
       '5a8c9a50239b', '7a234b6e6eb3', '94a5a995efab', '242ef334aa07',
       '4658f71f3692', '8036479efea4', '6ad821b36eda', '671a05e18984',
       '0cd398286a7b', '9c5ac31b9dd6', '37a9543f187f', 'f5029e76e793',
       '90cd80093e37', 'a813eaeaf84e', '8f9c50b17d2f', '759a5963508b',
       '7b181499758c', '5ea1e4378c10', '2008f14c56d7', '7826ae6f1218',
       'f037e7310f8f', 'f52f636ef7e1', '62e8b6cf4f54', '7bcb8de6305c',
       '0e81dd7637d7', '9990b97f784e', 'eb835e5ea3f3', '21b615e2b5fb',
      

In [52]:
import datetime
import gc

# unique_ids = all_ids.StudyInstanceUID.unique()
unique_ids = all_ids[(all_ids.negative_exam_for_pe == False) & (all_ids.fold == 0)].StudyInstanceUID.unique()

unique_ids = ['b11a626b05e3']
IMG_TYPE = CROPPED
IMAGE_PATH = '/home/shared/nps/imgs/'
BATCH_SIZE = 64

MAX = 1

for i in range(0,len(unique_ids)):
    exam_id = unique_ids[i]
    exam = all_ids[all_ids.StudyInstanceUID == exam_id]
    exam = exam.sort_values(by='ycoord')
    exam_generator = DataSliceGenerator(exam, IMAGE_PATH, img_type = IMG_TYPE,
                                        verbose=False, n_channels=1, 
                                        set_type='test', batch_size=1,
                                        shuffle=False, dim=386)

    x_test = np.zeros([len(exam),386,386,1])
    for j in range(0,len(exam)):
        x_test[j],_ = exam_generator.__getitem__(j)
           
    feats = np.zeros([MAX, len(exam),64])
    results = np.zeros([MAX,len(exam),1])
    for j in range(int(len(exam)/BATCH_SIZE)):
        for k in range(0,MAX):
            feats[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(model_partials[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
            results[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(models[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
    if len(exam)%BATCH_SIZE != 0:
        for k in range(0,MAX):
            feats[k, (j+1)*BATCH_SIZE:] = np.array(model_partials[k](x_test[(j+1)*BATCH_SIZE:]))
            results[k, (j+1)*BATCH_SIZE:] = np.array(models[k](x_test[(j+1)*BATCH_SIZE:]))
        
    for k in range(0,MAX):
        exam['features'] = feats[k].tolist()
        exam['preds'] = results[k].tolist()
#         np.save('/home/shared/nps/turku_check_'+ str(k)+ '/' + exam_id, exam.to_dict(orient='records'))

    if i %10 == 0:
        print(i, 'time is: ', datetime.datetime.now())
        gc.collect()

0 time is:  2022-12-12 20:01:20.798340


In [51]:
exam[exam.pe_present_on_image == True].preds

Series([], Name: preds, dtype: object)

In [53]:
np.mean(exam[exam.pe_present_on_image == True].preds.to_list())

0.7900773358817857

In [43]:
exam.StudyInstanceUID.unique()

array(['c8aeafba5334'], dtype=object)

In [None]:
models = []
model_partials = []

MIN = 2
MAX = 5
for i in range(MIN,MAX):
    models.append(get_model(1))
    models[i-MIN].load_weights(MODEL_DIR + 'turku-incepres_fold_0'+ str(i) +'.h5')
    model_partials.append(keras.models.Model(inputs=models[i-MIN].input, outputs=[models[i-MIN].layers[-2].output]))
    print(i)
    
    
unique_ids = all_ids.StudyInstanceUID.unique()

IMG_TYPE = MASKED
IMAGE_PATH = '/home/shared/nps/imgs/'
BATCH_SIZE = 64

for i in range(0,len(unique_ids)):
    exam_id = unique_ids[i]
    exam = all_ids[all_ids.StudyInstanceUID == exam_id]
    exam = exam.sort_values(by='ycoord')
    exam_generator = DataSliceGenerator(exam, 
                                   IMAGE_PATH, 
                                   img_type = IMG_TYPE,
                                   verbose=False, 
                                   set_type = 'test',
                                   shuffle=False,
                                   n_channels=1, 
                                   batch_size=1,
                                   dim=386)
    
    x_test = np.zeros([len(exam),386,386,1])
    for j in range(0,len(exam)):
        x_test[j],_ = exam_generator.__getitem__(j)
           
    feats = np.zeros([MAX-MIN, len(exam),64])
    results = np.zeros([MAX-MIN,len(exam),1])
    for j in range(int(len(exam)/BATCH_SIZE)):
        for k in range(0,MAX-MIN):
            feats[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(model_partials[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
            results[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(models[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
    if len(exam)%BATCH_SIZE != 0:
        for k in range(0,MAX-MIN):
            feats[k, (j+1)*BATCH_SIZE:] = np.array(model_partials[k](x_test[(j+1)*BATCH_SIZE:]))
            results[k, (j+1)*BATCH_SIZE:] = np.array(models[k](x_test[(j+1)*BATCH_SIZE:]))
        
    for k in range(MIN,MAX):
        exam['features'] = feats[k-MIN].tolist()
        exam['preds'] = results[k-MIN].tolist()
        np.save('/home/shared/nps/turku_0'+ str(k)+ '/' + exam_id, exam.to_dict(orient='records'))

    if i %10 == 0:
        print(i, 'time is: ', datetime.datetime.now())
        gc.collect()

2
3
4
0 time is:  2022-12-03 19:24:03.967259
10 time is:  2022-12-03 19:27:50.867344
20 time is:  2022-12-03 19:31:44.469551
30 time is:  2022-12-03 19:35:40.729277
40 time is:  2022-12-03 19:39:43.032734
50 time is:  2022-12-03 19:42:57.965260
60 time is:  2022-12-03 19:47:21.485160
70 time is:  2022-12-03 19:51:38.390659
80 time is:  2022-12-03 19:55:45.362907
90 time is:  2022-12-03 19:59:13.773276
100 time is:  2022-12-03 20:02:33.682846
110 time is:  2022-12-03 20:05:58.601448
120 time is:  2022-12-03 20:10:11.698824
130 time is:  2022-12-03 20:14:04.185687
140 time is:  2022-12-03 20:17:38.397889
150 time is:  2022-12-03 20:20:59.264195
160 time is:  2022-12-03 20:24:37.974125
170 time is:  2022-12-03 20:28:19.174748
180 time is:  2022-12-03 20:32:04.380271
190 time is:  2022-12-03 20:35:44.163386
200 time is:  2022-12-03 20:39:27.069022


In [None]:
import datetime
import gc
    
unique_ids = all_ids.StudyInstanceUID.unique()

IMG_TYPE = MASKED
IMAGE_PATH = '/home/shared/nps/imgs/'
BATCH_SIZE = 64

for i in range(0,len(unique_ids)):
    exam_id = unique_ids[i]
    exam = all_ids[all_ids.StudyInstanceUID == exam_id]
    exam = exam.sort_values(by='ycoord')
    exam_generator = DataSliceGenerator(exam, IMAGE_PATH, img_type = IMG_TYPE,
                                        verbose=False, n_channels=1, 
                                        set_type='test', batch_size=1,
                                        shuffle=False, dim=386)
    
    x_test = np.zeros([len(exam),386,386,1])
    for j in range(0,len(exam)):
        x_test[j],_ = exam_generator.__getitem__(j)
           
    feats = np.zeros([MAX-MIN, len(exam),64])
    results = np.zeros([MAX-MIN,len(exam),1])
    for j in range(int(len(exam)/BATCH_SIZE)):
        for k in range(0,MAX-MIN):
            feats[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(model_partials[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
            results[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(models[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
    if len(exam)%BATCH_SIZE != 0:
        for k in range(0,MAX-MIN):
            feats[k, (j+1)*BATCH_SIZE:] = np.array(model_partials[k](x_test[(j+1)*BATCH_SIZE:]))
            results[k, (j+1)*BATCH_SIZE:] = np.array(models[k](x_test[(j+1)*BATCH_SIZE:]))
        
    for k in range(MIN,MAX):
        exam['features'] = feats[k-MIN].tolist()
        exam['preds'] = results[k-MIN].tolist()
        np.save('/home/shared/nps/turku_0'+ str(k)+ '/' + exam_id, exam.to_dict(orient='records'))

    if i %10 == 0:
        print(i, 'time is: ', datetime.datetime.now())
        gc.collect()

8
9
0 time is:  2022-12-09 12:24:47.145020
10 time is:  2022-12-09 12:27:24.724880
20 time is:  2022-12-09 12:30:35.246488
30 time is:  2022-12-09 12:33:18.597317
40 time is:  2022-12-09 12:36:05.888092
50 time is:  2022-12-09 12:38:48.759846
60 time is:  2022-12-09 12:41:11.002942
70 time is:  2022-12-09 12:43:44.725570
80 time is:  2022-12-09 12:46:37.530960
90 time is:  2022-12-09 12:49:10.356916
100 time is:  2022-12-09 12:52:03.481524
110 time is:  2022-12-09 12:54:33.519392
120 time is:  2022-12-09 12:57:16.823327
130 time is:  2022-12-09 12:59:49.320302
140 time is:  2022-12-09 13:02:13.275386
150 time is:  2022-12-09 13:04:19.735304
160 time is:  2022-12-09 13:06:57.020183
170 time is:  2022-12-09 13:09:13.414922
180 time is:  2022-12-09 13:11:51.563823
190 time is:  2022-12-09 13:14:15.006973
200 time is:  2022-12-09 13:16:29.552263
210 time is:  2022-12-09 13:19:04.956429
220 time is:  2022-12-09 13:21:44.975000
230 time is:  2022-12-09 13:24:44.834540
240 time is:  2022-12-0

In [5]:
IMAGE_PATH = '/home/shared/test/imgs/'

all_ids = pd.read_csv('test_ids.csv')
all_ids = all_ids[all_ids.ycoord != 'ERROR']
all_ids.ycoord = all_ids.ycoord.astype('float')

lisdir = os.listdir(IMAGE_PATH)

files = pd.DataFrame({'file_name':lisdir})
print(len(files))

files['cols'] = files.file_name.str.split('_')
files[['StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID']] = files.cols.tolist()
files.SOPInstanceUID = files.SOPInstanceUID.str.replace('.png','')
files = files.drop(columns='cols')

print(len(all_ids), len(files))
all_ids = pd.merge(all_ids, files)
print(len(all_ids))

  all_ids = pd.read_csv('test_ids.csv')


106463


  files.SOPInstanceUID = files.SOPInstanceUID.str.replace('.png','')


146840 106463
106463


In [6]:
all_ids['pe_present_on_image'] = 0

In [9]:
models = []
model_partials = []

opt = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, decay=0.01)
for i in range(0,10):
    models.append(get_model(1))
    models[i].compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
    models[i].load_weights(MODEL_DIR + 'turku-incepres_fold_0'+ str(i) +'.h5')
    model_partials.append(keras.models.Model(inputs=models[i].input, outputs=[models[i].layers[-2].output]))
    print(i)

0
1
2
3
4
5
6
7
8
9


In [10]:
import datetime
import gc 

unique_ids = all_ids.StudyInstanceUID.unique()

IMG_TYPE = MASKED
IMAGE_PATH = '/home/shared/test/imgs/'
BATCH_SIZE = 64

MIN=0
MAX=10

for i in range(0,len(unique_ids)):
    exam_id = unique_ids[i]
    exam = all_ids[all_ids.StudyInstanceUID == exam_id]
    exam = exam.sort_values(by='ycoord')
    exam_generator = DataSliceGenerator(exam, IMAGE_PATH, img_type = IMG_TYPE,
                                        verbose=False, n_channels=1, 
                                        set_type='test', batch_size=1,
                                        shuffle=False, dim=386)
    
    x_test = np.zeros([len(exam),386,386,1])
    for j in range(0,len(exam)):
        x_test[j],_ = exam_generator.__getitem__(j)
           
    feats = np.zeros([MAX-MIN, len(exam),64])
    results = np.zeros([MAX-MIN,len(exam),1])
    for j in range(int(len(exam)/BATCH_SIZE)):
        for k in range(0,MAX-MIN):
            feats[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(model_partials[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
            results[k, j*BATCH_SIZE:(j+1)*BATCH_SIZE] = np.array(models[k](x_test[j*BATCH_SIZE:(j+1)*BATCH_SIZE]))
    if len(exam)%BATCH_SIZE != 0:
        for k in range(0,MAX-MIN):
            feats[k, (j+1)*BATCH_SIZE:] = np.array(model_partials[k](x_test[(j+1)*BATCH_SIZE:]))
            results[k, (j+1)*BATCH_SIZE:] = np.array(models[k](x_test[(j+1)*BATCH_SIZE:]))
        
    for k in range(MIN,MAX):
        exam['features'] = feats[k-MIN].tolist()
        exam['preds'] = results[k-MIN].tolist()
        np.save('/home/shared/test/turku_0'+ str(k)+ '/' + exam_id, exam.to_dict(orient='records'))

    if i %10 == 0:
        print(i, 'time is: ', datetime.datetime.now())
        gc.collect()

0 time is:  2023-01-17 12:02:10.116863
10 time is:  2023-01-17 12:07:52.328087
20 time is:  2023-01-17 12:13:42.117829
30 time is:  2023-01-17 12:19:48.309585
40 time is:  2023-01-17 12:25:00.987625
50 time is:  2023-01-17 12:31:58.807176


KeyboardInterrupt: 