In [None]:
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D,Conv2DTranspose
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dropout, Activation, Flatten, Concatenate, Dense, Reshape, Add, PReLU, LeakyReLU, BatchNormalization
from tensorflow.keras.optimizers import RMSprop, Adam, Adagrad, SGD, Adadelta
from tensorflow.keras import backend as K
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.applications import *
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import Adam, Nadam
from tensorflow.keras.metrics import *
import os
from keras.models import load_model
import glob, os.path
from sklearn.model_selection import train_test_split

from tensorflow.keras.callbacks import TensorBoard
import os
from skimage import io
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import tensorflow as tf
from tensorflow.keras.callbacks import CSVLogger
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.utils import class_weight
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    average_precision_score,
    precision_recall_curve,
    roc_auc_score,
    roc_curve,
)
from sklearn.calibration import calibration_curve
from mlxtend.plotting import plot_confusion_matrix
import pandas as pd
import cv2
import numpy as np
import nibabel as nib
import pydicom

from pydicom.pixel_data_handlers.util import apply_voi_lut

# Functions

In [None]:
def nifty2numpy(nifti_path):
    img = nib.load(nifti_path)
    return np.array(img.dataobj)


def dicom2numpy(dicom_path):
    dicom_file = pydicom.read_file(dicom_path)
    return np.array(dicom_file.pixel_array), dicom_file.PhotometricInterpretation

def dicom2numpy_2(dicom_path):
    dicom_file = pydicom.read_file(dicom_path)
    return np.array(dicom_file.pixel_array), dicom_file.PhotometricInterpretation,dicom_file


def json_to_dict(p):
    dti = dict()
    path = p.replace(PATH, "").split("/")
    modality = path[-1].replace(".png", "").split("_")[-1]    
    dti['Subject'] = path[0]
    dti['Session'] = path[1]
    dti['mod'] = path[2]
    dti['File'] = path[3]
    dti['Type'] = modality
    dti['Path'] = p
    json_file = p.replace('.png', '.json').replace('.nii.gz', '.json')
    # If there is no json associated with the file, then pick the json on the series. 
    # Maybe multiple acquisitions with the same parameters in the same series?
    if not os.path.exists(json_file):
        json_in_series = glob.glob(os.path.join(PATH, path[0], path[1], path[2], '*.json'))
        if len(json_in_series) >= 1:
            json_file = json_in_series[0]
    if os.path.exists(json_file):
        with open(json_file) as f:
            dtij = json.load(f)
        for l in dtij.keys():
            if 'Value' in dtij[l].keys():
                try:
                    dti[pydicom.datadict.dictionary_description(str(l))] = dtij[l]['Value']
                except:
                    dti[str(l)] = dtij[l]['Value']
    else:
        print(p)
    return dti

def min_max_preprocessing(images):
    processed_images = []
    for i in range(len(images)):
        maxi=np.max(images[i])
        mini=np.min(images[i])
        processed_images.append((images[i]-mini)/(maxi-mini))
    return np.array(processed_images)

def samplewise_preprocessing(images):
    processed_images = []
    means = []
    stds = []
    for i in range(images.shape[0]):
        mean = np.mean(images[i])
        std = np.std(images[i])
        if std!=0 and mean != 0:
            means.append(mean)
            stds.append(std)
            processed_images.append((images[i]-mean)/std)
    
    return np.array(processed_images), np.mean(means), np.mean(stds)


def featurewise_preprocessing(images, mean, std):
    processed_images = np.zeros_like(images, dtype=np.float32)
    for i in range(images.shape[0]):
        processed_images[i] = (images[i]-mean)/std
    return processed_images

def checkDuplicates(trainDF, devDF, testDF,id_column):
    patientsTrain = set(trainDF[id_column])
    patientsDev = set(devDF[id_column])
    patientsTest = set(testDF[id_column])

    ids = list(patientsTrain.intersection(patientsDev))
    print('# de pacientes de train presentes en dev:', len(ids))

    ids_ = list(patientsTrain.intersection(patientsTest))
    print('# de pacientes de train presentes en test:', len(ids_))
    ids.extend(ids_)

    ids_dev = list(patientsDev.intersection(patientsTest))
    print('# de pacientes de dev presentes en test:', len(ids_dev))

def remove_black_borders_1(img):
    rows, cols = img.shape
    start_row, end_row = 0, 0
    start_col, end_col = 0, 0
    
    for i in range(rows):
        if sum(img[i, :]) > 0:
            start_row = i
            break
    for i in reversed(range(rows)):
        if (sum(img[i, :]) > 0):
            end_row = i
            break    
    for i in range(cols):
        if (sum(img[:, i]) > 0):
            start_col = i
            break
    for i in reversed(range(cols)):
        if (sum(img[:, i]) > 0):
            end_col = i
            break
   
    return img[start_row:end_row, start_col:end_col]
    

def saveNPY_from_DICOM(DF,destination, name,path,src_column,W=224,H=224,C_Labels=False,rmv_black_borders=False):
    src_dir = path
    images = []

    print('reading images...')

    for i in tqdm(range(len(DF))):
        src_file = os.path.join(src_dir, DF[src_column][i])
        img,ph,dicom=dicom2numpy_2(src_file)
        #img = apply_voi_lut(dicom.pixel_array, dicom)
        if ph=='MONOCHROME1':
            img=np.amax(img)-img
        
        if rmv_black_borders:
            try:
                
                img=remove_black_borders_1(img)
            except:
                continue
        
        try:
            resized = cv2.resize(img, (W, H))
        except:
            continue
        if resized.shape==(W,H,4):
            images.append(resized[:,:,0])
        else:
            images.append(resized)

    NPY = np.array(images)
    images_filename = destination+'X_'+name+'.npy'
    np.save(images_filename, NPY)
    if C_Labels:

        labels_ = DF.group.replace(['C', 'N', 'I', 'NI'], [0, 1, 2, 3])
        labels = tf.keras.utils.to_categorical(labels_, num_classes=4)
  
        labels_filename = destination+'/y_'+name+'.npy'
        np.save(labels_filename, labels)
 
    

    print('done!')
    
    
from keras import backend as K

def dice_coef(y_true, y_pred):
  smooth = 1
  intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
  return (2. * intersection + smooth) / (K.sum(K.square(y_true),-1) + K.sum(K.square(y_pred),-1) + smooth)

def dice_coef_loss(y_true, y_pred):
  return 1-dice_coef(y_true, y_pred)

def iou(y_true, y_pred):
    def f(y_true, y_pred):
        intersection = (y_true * y_pred).sum()
        smooth = 1
        union = y_true.sum() + y_pred.sum() - intersection
        x = (intersection + smooth) / (union + smooth)
        x = x.astype(np.float32)
        return x
    return tf.numpy_function(f, [y_true, y_pred], tf.float32)

def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)

def focal_loss(y_true, y_pred):
    alpha=0.25
    gamma=2
    def focal_loss_with_logits(logits, targets, alpha, gamma, y_pred):
        weight_a = alpha * (1 - y_pred) ** gamma * targets
        weight_b = (1 - alpha) * y_pred ** gamma * (1 - targets)
        return (tf.math.log1p(tf.exp(-tf.abs(logits))) + tf.nn.relu(-logits)) * (weight_a + weight_b) + logits * weight_b

    y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1 - tf.keras.backend.epsilon())
    logits = tf.math.log(y_pred / (1 - y_pred))
    loss = focal_loss_with_logits(logits=logits, targets=y_true, alpha=alpha, gamma=gamma, y_pred=y_pred)
    # or reduce_sum and/or axis=-1
    return tf.reduce_mean(loss)


# Dataframe Creation

In [None]:
col_names =  ['subject','session','filepath','partition','projection']
SES_df  = pd.DataFrame(columns = col_names)

In [None]:
#Positives
PATH="/media/ia/DATA/COVID19/Xnat_positivas/"
Subjects=os.listdir(PATH)
for sub in tqdm(Subjects):
    sessions=os.listdir(PATH+sub)
    for sess in sessions:
        subsess=os.listdir(PATH+sub+'/'+sess)
        for subs in subsess:
            folder=os.listdir(PATH+sub+'/'+sess+'/'+subs)
            for f in folder:
                if f=='DICOM':
                    images=os.listdir(PATH+sub+'/'+sess+'/'+subs+'/'+f)
                    for img in images:
                        list_info=[sub,sess,sub+'/'+sess+'/'+subs+'/'+f+'/'+img,'NR','NR']
                        row = pd.Series(list_info, index=SES_df.columns)
                        SES_df=SES_df.append(row, ignore_index=True)
                    

In [None]:
sess

In [None]:
import os

# Establishing the current work directory (cwd)
thisdir = '/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
Files = []
names=[]
# r=root, d=directories, f = files
for r, d, f in os.walk(thisdir):
    for file in f:
        if ".dcm" in file:
            Files.append(os.path.join(r,file))
            names.append(file)
filenames = Files

In [None]:
files=[]
for i,f in enumerate(filenames):
    img,_,dicom=dicom2numpy_2(f)
    if dicom.Manufacturer=='GE Healthcare':#Select just GE Healthcare Manufacturer to test models
        files.append(names[i])
        

In [None]:
len(files)

In [None]:
#Negatives
PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
Subjects=os.listdir(PATH)
for sub in tqdm(Subjects):
    sessions=os.listdir(PATH+sub)
    for sess in sessions:
        subsess=os.listdir(PATH+sub+'/'+sess)
        for subs in subsess:
            folder=os.listdir(PATH+sub+'/'+sess+'/'+subs)
            for f in folder:
                if f=='DICOM':
                    images=os.listdir(PATH+sub+'/'+sess+'/'+subs+'/'+f)
                    
                    for img in images:
                        if img in files:
                            list_info=[sub,sess,sub+'/'+sess+'/'+subs+'/'+f+'/'+img,'NR','NR']
                            row = pd.Series(list_info, index=SES_df.columns)
                            SES_df=SES_df.append(row, ignore_index=True)

In [None]:
SES_df

In [None]:
SES_df.to_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/SES_Negative_info_Seed1.csv",sep=',')

# Frontal vs Lateral Classification

In [None]:
SES_df=pd.read_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/SES_Negative_info_Seed1.csv").drop(['Unnamed: 0'],axis=1)

In [None]:
SES_df

In [None]:
#Datos para aplicar normalización y estandartización
X_train_F = np.load('/media/ia/DATA/COVID19/NPY_Data/COVID/X_train_COVID_1CH.npy')
X_L = np.load('/media/ia/DATA/COVID19/NPY_Data/COVID/X_test_lateral.npy')
X_train_L,X_test_L,X_dev_L = np.array_split(X_L,[375,611])
X_train=np.concatenate((X_train_F,X_train_L))

X_train=min_max_preprocessing(X_train)

X_train,mean,std=samplewise_preprocessing(X_train)



In [None]:
def get_model_VGG19_gray():
      model = tf.keras.applications.VGG19(weights='imagenet', include_top=False)

      # Block1_conv1 weights are of the format [3, 3, 3, 64] -> this is for RGB images
      # For grayscale, format should be [3, 3, 1, 64]. Weighted average of the features has to be calculated across channels.
      # RGB weights: Red 0.2989, Green 0.5870, Blue 0.1140

      # getting weights of block1 conv1.
      block1_conv1 = model.get_layer('block1_conv1').get_weights()
      weights, biases = block1_conv1

      # :weights shape = [3, 3, 3, 64] - (0, 1, 2, 3)
      # convert :weights shape to = [64, 3, 3, 3] - (3, 2, 0, 1)
      weights = np.transpose(weights, (3, 2, 0, 1))


      kernel_out_channels, kernel_in_channels, kernel_rows, kernel_columns = weights.shape

      # Dimensions : [kernel_out_channels, 1 (since grayscale), kernel_rows, kernel_columns]
      grayscale_weights = np.zeros((kernel_out_channels, 1, kernel_rows, kernel_columns))

      # iterate out_channels number of times
      for i in range(kernel_out_channels):

        # get kernel for every out_channel
        get_kernel = weights[i, :, :, :]

        temp_kernel = np.zeros((3, 3))

        # :get_kernel shape = [3, 3, 3]
        # axis, dims = (0, in_channel), (1, row), (2, col)

        # calculate weighted average across channel axis
        in_channels, in_rows, in_columns = get_kernel.shape

        for in_row in range(in_rows):
          for in_col in range(in_columns):
            feature_red = get_kernel[0, in_row, in_col]
            feature_green = get_kernel[1, in_row, in_col]
            feature_blue = get_kernel[2, in_row, in_col]

            # weighted average for RGB filter
            total = (feature_red * 0.2989) + (feature_green * 0.5870) + (feature_blue * 0.1140)

            temp_kernel[in_row, in_col] = total


        # :temp_kernel is a 3x3 matrix [rows x columns]
        # add an axis at the end to specify in_channel as 1

        # Second: Add axis at the start of :temp_kernel to make its shape: [1, 3, 3] which is [in_channel, rows, columns]
        temp_kernel = np.expand_dims(temp_kernel, axis=0)

        # Now, :temp_kernel shape is [1, 3, 3]

        # Concat :temp_kernel to :grayscale_weights along axis=0
        grayscale_weights[i, :, :, :] = temp_kernel

      # Dimension of :grayscale_weights is [64, 1, 3, 3]
      # In order to bring it to tensorflow or keras weight format, transpose :grayscale_weights

      # dimension, axis of :grayscale_weights = (out_channels: 0), (in_channels: 1), (rows: 2), (columns: 3)
      # tf format of weights = (rows: 0), (columns: 1), (in_channels: 2), (out_channels: 3)

      # Go from (0, 1, 2, 3) to (2, 3, 1, 0)
      grayscale_weights = np.transpose(grayscale_weights, (2, 3, 1, 0)) # (3, 3, 1, 64)

      # combine :grayscale_weights and :biases
      new_block1_conv1 = [grayscale_weights, biases]


      # Reconstruct the layers of VGG16 but replace block1_conv1 weights with :grayscale_weights

      # get weights of all the layers starting from 'block1_conv2'
      vgg19_weights = {}
      for layer in model.layers[2:]:
        if "conv" in layer.name:
          vgg19_weights["224_" + layer.name] = model.get_layer(layer.name).get_weights()

      del model


      # Custom build VGG19
      input = Input(shape=(224, 224, 1), name='224_input')
      # Block 1
      x = Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 1), data_format="channels_last", name='224_block1_conv1')(input)
      x = Conv2D(64, (3, 3), activation='relu', padding='same', name='224_block1_conv2')(x)
      x = MaxPooling2D((2, 2), strides=(2, 2), name='224_block1_pool')(x)

      # Block 2
      x = Conv2D(128, (3, 3), activation='relu', padding='same', name='224_block2_conv1')(x)
      x = Conv2D(128, (3, 3), activation='relu', padding='same', name='224_block2_conv2')(x)
      x = MaxPooling2D((2, 2), strides=(2, 2), name='224_block2_pool')(x)

      # Block 3
      x = Conv2D(256, (3, 3), activation='relu', padding='same', name='224_block3_conv1')(x)
      x = Conv2D(256, (3, 3), activation='relu', padding='same', name='224_block3_conv2')(x)
      x = Conv2D(256, (3, 3), activation='relu', padding='same', name='224_block3_conv3')(x)
      x = Conv2D(256, (3, 3), activation='relu', padding='same', name='224_block3_conv4')(x)
      x = MaxPooling2D((2, 2), strides=(2, 2), name='224_block3_pool')(x)

      # Block 4
      x = Conv2D(512, (3, 3), activation='relu', padding='same', name='224_block4_conv1')(x)
      x = Conv2D(512, (3, 3), activation='relu', padding='same', name='224_block4_conv2')(x)
      x = Conv2D(512, (3, 3), activation='relu', padding='same', name='224_block4_conv3')(x)
      x = Conv2D(512, (3, 3), activation='relu', padding='same', name='224_block4_conv4')(x)
      x = MaxPooling2D((2, 2), strides=(2, 2), name='224_block4_pool')(x)

      # Block 5
      x = Conv2D(512, (3, 3), activation='relu', padding='same', name='224_block5_conv1')(x)
      x = Conv2D(512, (3, 3), activation='relu', padding='same', name='224_block5_conv2')(x)
      x = Conv2D(512, (3, 3), activation='relu', padding='same', name='224_block5_conv3')(x)
      x = Conv2D(512, (3, 3), activation='relu', padding='same', name='224_block5_conv4')(x)
      x = MaxPooling2D((8, 8), strides=(8, 8), name='224_block5_pool')(x)

      base_model = Model(inputs=input, outputs=x)

      base_model.get_layer('224_block1_conv1').set_weights(new_block1_conv1)
      for layer in base_model.layers[2:]:
        if 'conv' in layer.name:
          base_model.get_layer(layer.name).set_weights(vgg19_weights[layer.name])

      x = base_model.output

      for layer in base_model.layers:
          layer.trainable = True

      x = tf.keras.layers.GlobalAveragePooling2D()(x)  
      layers = tf.keras.layers.Flatten()(x)
      #layers = tf.keras.layers.Dense(128,activation="relu")(layers)
      layers = tf.keras.layers.Dropout(0.2)(layers)
      layers = tf.keras.layers.Dense(1024 ,activation="relu")(layers)
      layers = tf.keras.layers.Dropout(0.2)(layers)
      layers = tf.keras.layers.Dense( 512,activation="relu")(layers)
      layers = tf.keras.layers.Dense( 64,activation="relu")(layers)
      predictions = tf.keras.layers.Dense(2, activation="softmax", name="output_1")(layers)

      #Compilador
      model = tf.keras.Model(inputs = base_model.input, outputs=predictions)
      optimizer=tf.keras.optimizers.Adam(lr=0.0001) 
      model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=['accuracy'])
      model.summary()
      return model

In [None]:
model = get_model_VGG19_gray()

In [None]:
model.load_weights("/media/ia/DATA/COVID19/VGG19_For_Covid_FyL.h5") #Load weights to Classification

In [None]:
for i in tqdm(range(len(SES_df))): 
    PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
    img,ph=dicom2numpy(PATH+SES_df['filepath'][i])
    if ph=='MONOCHROME1':
        img=-img
    img=cv2.resize(img, (224, 224))
    maxi=np.max(img)
    mini=np.min(img)
    img_N=(img-mini)/(maxi-mini)
    img_P=(img_N-mean)/std
    
    predictions=model.predict(np.expand_dims(img_P,axis=0))
    y_pred_bool = np.argmax(predictions, axis=-1)
    
    if y_pred_bool[0]==0:
        SES_df['projection'][i]='F'
    if y_pred_bool[0]==1:
        SES_df['projection'][i]='NA'

In [None]:
SES_df.loc[SES_df.projection.isna()]

In [None]:
img,ph=dicom2numpy(PATH+SES_df.loc[SES_df.projection.isna()]['filepath'][2041])
plt.imshow(img,cmap='gray')

In [None]:
SES_df

# Save Images Using 3 Seeds (2,4,8)

In [None]:
SES_Frontal=SES_df.loc[SES_df.projection=='F'].reset_index()

In [None]:
SES_Frontal

In [None]:
img,ph=dicom2numpy(PATH+SES_Frontal['filepath'][500])
if ph=='MONOCHROME1':
    img=-img
img=cv2.resize(img, (224, 224))
plt.imshow(img,cmap='gray')

In [None]:
PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
destination_f="/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/NPY Processed/Normal Images/Complete/"

saveNPY_from_DICOM(SES_Frontal,destination_f, 'SES_COVID_NEGATIVE_224_Normal',PATH,'filepath',W=224,H=224,C_Labels=False,rmv_black_borders=True)

In [None]:
X_SES_P=np.load("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/NPY Processed/Normal Images/Complete/X_SES_COVID_NEGATIVE_224_Normal.npy")

In [None]:
X_SES_P.shape

In [None]:
plt.imshow(X_SES_P[5],cmap='gray')

# División train-test

In [None]:
def json_to_dict(p):
    dti = dict()
    path = p.replace(PATH, "").split("/")
    modality = path[-1].replace(".png", "").split("_")[-1]    
    dti['Subject'] = path[0]
    dti['Session'] = path[1]
    dti['mod'] = path[2]
    dti['File'] = path[3]
    dti['Type'] = modality
    dti['Path'] = p
    json_file = p.replace('.png', '.json').replace('.nii.gz', '.json')
    # If there is no json associated with the file, then pick the json on the series. 
    # Maybe multiple acquisitions with the same parameters in the same series?
    if not os.path.exists(json_file):
        json_in_series = glob.glob(os.path.join(PATH, path[0], path[1], path[2], '*.json'))
        if len(json_in_series) >= 1:
            json_file = json_in_series[0]
    if os.path.exists(json_file):
        with open(json_file) as f:
            dtij = json.load(f)
        for l in dtij.keys():
            if 'Value' in dtij[l].keys():
                try:
                    dti[pydicom.datadict.dictionary_description(str(l))] = dtij[l]['Value']
                except:
                    dti[str(l)] = dtij[l]['Value']
    else:
        print(p)
    return dti

def min_max_preprocessing(images):
    processed_images = []
    for i in range(len(images)):
        maxi=np.max(images[i])
        mini=np.min(images[i])
        processed_images.append((images[i]-mini)/(maxi-mini))
    return np.array(processed_images)

def samplewise_preprocessing(images):
    processed_images = []
    means = []
    stds = []
    for i in range(images.shape[0]):
        mean = np.mean(images[i])
        std = np.std(images[i])
        if std!=0 and mean != 0:
            means.append(mean)
            stds.append(std)
            processed_images.append((images[i]-mean)/std)
    
    return np.array(processed_images), np.mean(means), np.mean(stds)


def featurewise_preprocessing(images, mean, std):
    processed_images = np.zeros_like(images, dtype=np.float32)
    for i in range(images.shape[0]):
        processed_images[i] = (images[i]-mean)/std
    return processed_images

def checkDuplicates(trainDF,devDF,testDF,id_column):
    patientsTrain = set(trainDF[id_column])
    patientsDev = set(devDF[id_column])
    patientsTest = set(testDF[id_column])

    ids = list(patientsTrain.intersection(patientsDev))
    print('# de pacientes de train presentes en dev:', len(ids))

    ids_ = list(patientsTrain.intersection(patientsTest))
    print('# de pacientes de train presentes en test:', len(ids_))
    ids.extend(ids_)

    ids_dev = list(patientsDev.intersection(patientsTest))
    print('# de pacientes de dev presentes en test:', len(ids_dev))

def remove_black_borders_1(img):
    rows, cols = img.shape
    start_row, end_row = 0, 0
    start_col, end_col = 0, 0
    
    for i in range(rows):
        if sum(img[i, :]) > 0:
            start_row = i
            break
    for i in reversed(range(rows)):
        if (sum(img[i, :]) > 0):
            end_row = i
            break    
    for i in range(cols):
        if (sum(img[:, i]) > 0):
            start_col = i
            break
    for i in reversed(range(cols)):
        if (sum(img[:, i]) > 0):
            end_col = i
            break
   
    return img[start_row:end_row, start_col:end_col]
    

def saveNPY(DF,destination, name,path,src_column,W=224,H=224,C_Labels=False,rmv_black_borders=False):
    src_dir = path
    images = []

    print('reading images...')

    for i in tqdm(range(len(DF))):
        src_file = os.path.join(src_dir, DF[src_column][i])
        img = cv2.imread(src_file,-1)
        dti=json_to_dict(DF['filepath'][i])
        if dti['00280004'][0]=='MONOCHROME1':
            img=-img
        if rmv_black_borders:
            try:
                img=remove_black_borders_1(img)
            except:
                continue
        resized = cv2.resize(img, (W, H))
        if resized.shape==(W,H,4):
            images.append(resized[:,:,0])
        else:
            images.append(resized)

    NPY = np.array(images)
    images_filename = destination+'X_'+name+'.npy'
    np.save(images_filename, NPY)
    if C_Labels:

        labels_ = DF.group.replace(['C', 'N', 'I', 'NI'], [0, 1, 2, 3])
        labels = tf.keras.utils.to_categorical(labels_, num_classes=4)
  
        labels_filename = destination+'/y_'+name+'.npy'
        np.save(labels_filename, labels)
 
    

    print('done!')

In [None]:
Partitions=pd.read_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/SES_Negative_info_Seed3.csv").drop(['Unnamed: 0'],axis=1)

In [None]:
Partitions

In [None]:
np.random.seed(8)#3 Seeds (2,4,8)
data=Partitions.loc[Partitions.projection=='F']
a=data['subject'].unique()
train=np.random.choice(a,size=int(len(a)*1.18))
data1=data.drop(data.loc[data.subject.isin(train)].index)
b=data1['subject'].unique()
dev=np.random.choice(b,size=int(len(b)*0.85))
test=data1.drop(data1.loc[data1.subject.isin(dev)].index)['subject'].unique()

#test=data.drop(data.loc[data.subject.isin(train)].index)['subject'].unique()

In [None]:
trainDF_SES=data.loc[data.subject.isin(train)]
devDF_SES=data.loc[data.subject.isin(dev)]
testDF_SES=data.loc[data.subject.isin(test)]

In [None]:
data

In [None]:
len(a)

In [None]:
trainDF_SES

In [None]:
testDF_SES

In [None]:
devDF_SES

In [None]:
checkDuplicates(trainDF_SES,devDF_SES,testDF_SES,'subject')

In [None]:
print('Cantidad imagenes:',len(data))
print('Cantidad imagenes train:',len(trainDF_SES))
print('Cantidad imagenes dev: ',len(devDF_SES))
print('Cantidad imagenes test:',len(testDF_SES))

In [None]:
Partitions['partition'][Partitions.subject.isin(train)]='tr'
Partitions['partition'][Partitions.subject.isin(dev)]='dev'
Partitions['partition'][Partitions.subject.isin(test)]='te'

In [None]:
Partitions

In [None]:
Partitions.to_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/SES_Negative_info_Seed3.csv",sep=',')

In [None]:
trainDF_SES.to_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/Train, Validation, Test/Seed3/Info_Neg_Train_Seed3.tsv",sep='\t')
devDF_SES.to_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/Train, Validation, Test/Seed3/Info_Neg_Dev_Seed3.tsv",sep='\t')
testDF_SES.to_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/Train, Validation, Test/Seed3/Info_Neg_Test_Seed3.tsv",sep='\t')

In [None]:
trainDF=pd.read_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/Train, Validation, Test/Seed3/Info_Neg_Train_Seed3.tsv",sep='\t').drop(['Unnamed: 0'],axis=1)
devDF=pd.read_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/Train, Validation, Test/Seed3/Info_Neg_Dev_Seed3.tsv",sep='\t').drop(['Unnamed: 0'],axis=1)
testDF=pd.read_csv("/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/Tables/Train, Validation, Test/Seed3/Info_Neg_Test_Seed3.tsv",sep='\t').drop(['Unnamed: 0'],axis=1)

In [None]:
trainDF

In [None]:
PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
destination_f="/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/NPY Processed/Normal Images/Complete/Train, Validation, Test/600x600/Seed3/"

saveNPY_from_DICOM(trainDF,destination_f, 'Neg_Train_600_Seed3',PATH,'filepath',W=600,H=600,C_Labels=False,rmv_black_borders=True)

In [None]:
PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
destination_f="/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/NPY Processed/Normal Images/Complete/Train, Validation, Test/600x600/Seed3/"

saveNPY_from_DICOM(devDF,destination_f, 'Neg_Dev_600_Seed3',PATH,'filepath',W=600,H=600,C_Labels=False,rmv_black_borders=True)

In [None]:
PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
destination_f="/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/NPY Processed/Normal Images/Complete/Train, Validation, Test/600x600/Seed3/"

saveNPY_from_DICOM(testDF,destination_f, 'Neg_Test_600_Seed3',PATH,'filepath',W=600,H=600,C_Labels=False,rmv_black_borders=True)

In [None]:
PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
destination_f="/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/NPY Processed/Normal Images/Complete/Train, Validation, Test/224x224/Seed3/"

saveNPY_from_DICOM(trainDF,destination_f, 'Neg_Train_224_Seed3',PATH,'filepath',W=224,H=224,C_Labels=False,rmv_black_borders=True)

In [None]:
PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
destination_f="/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/NPY Processed/Normal Images/Complete/Train, Validation, Test/224x224/Seed3/"

saveNPY_from_DICOM(devDF,destination_f, 'Neg_Dev_224_Seed3',PATH,'filepath',W=224,H=224,C_Labels=False,rmv_black_borders=True)

In [None]:
PATH='/media/ia/DATA/COVID19/Negativas XNAT/p0032021/'
destination_f="/media/ia/DATA/COVID19/SES_DATABASE/COVID19 NEGATIVE/NPY Processed/Normal Images/Complete/Train, Validation, Test/224x224/Seed3/"

saveNPY_from_DICOM(testDF,destination_f, 'Neg_Test_224_Seed3',PATH,'filepath',W=224,H=224,C_Labels=False,rmv_black_borders=True)

In [None]:
X_SES=np.load(destination_f+"X_Neg_Dev_224_Seed3.npy")
print(X_SES.shape)
plt.imshow(X_SES[1],cmap='gray')
plt.show()