## Baixando o dataset

In [None]:
!pip install PyDrive

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
downloaded_1 = drive.CreateFile({'id':"1F1b3PgUq9wfWllzGHSvxhJJ3G875af4Z"})
downloaded_1.GetContentFile('testing.zip')

downloaded_2 = drive.CreateFile({'id':"1kiwGLquBGvQKDdflxVNWWz85eOY3NIZ-"})
downloaded_2.GetContentFile('training.zip')

In [None]:
# Pegar as imagens do zipSua resposta é importante para avaliarmos e, se necessário, aprimorar ou corrigir o quadro de disciplinas ou o formato do Bacharelado em Ciência da Computação.

!unzip testing.zip -d testing/
!unzip training.zip -d training/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: training/training/patient0235/patient0235_4CH_ES.raw  
  inflating: training/training/patient0235/patient0235_4CH_ES_gt.mhd  
  inflating: training/training/patient0235/patient0235_4CH_ES_gt.raw  
  inflating: training/training/patient0235/patient0235_4CH_sequence.mhd  
  inflating: training/training/patient0235/patient0235_4CH_sequence.raw  
   creating: training/training/patient0236/
  inflating: training/training/patient0236/Info_2CH.cfg  
  inflating: training/training/patient0236/Info_4CH.cfg  
  inflating: training/training/patient0236/patient0236_2CH_ED.mhd  
  inflating: training/training/patient0236/patient0236_2CH_ED.raw  
  inflating: training/training/patient0236/patient0236_2CH_ED_gt.mhd  
  inflating: training/training/patient0236/patient0236_2CH_ED_gt.raw  
  inflating: training/training/patient0236/patient0236_2CH_ES.mhd  
  inflating: training/training/patient0236/patient0236_2CH_ES.raw  
  i

In [None]:
%cd /content/
!rm testing.zip training.zip
!ls

/content
sample_data  testing  training


In [None]:
!pip install simpleitk

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting simpleitk
  Downloading SimpleITK-2.1.1.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (48.4 MB)
[K     |████████████████████████████████| 48.4 MB 1.9 MB/s 
[?25hInstalling collected packages: simpleitk
Successfully installed simpleitk-2.1.1.2


In [None]:
import os
import numpy as np
import pandas as pd
import time
from contextlib import contextmanager # timer
from functools import partial

import seaborn as sns
import SimpleITK as sitk
import skimage
import matplotlib.pylab as plt
import matplotlib.pyplot as pyplt

from skimage.transform import rescale, resize

import torch
from torch.utils import data
from torch.utils.data import DataLoader, Dataset

path = r"/content"

In [None]:
'''
Doc: Take out image one by one and apply transformations upon __getitem__
'''

class CamusIterator(Dataset):
    def __init__( 
        self, 
        data_type='train', 
        global_transforms=[],
        augment_transforms=[] # n usa por enquanto
    ):
        super(CamusIterator, self).__init__()
        
        train_file = path + '/training/training'
        test_file = path + '/testing/testing'
        
        if data_type == 'train':
            data_file = train_file
        elif data_type == 'test':
            data_file = test_file
        else:
            raise Exception('Wrong data_type for CamusIterator')
            
        self.data_type = data_type
        self.data_file = data_file
        self.global_transforms = global_transforms
        self.augment_transforms = augment_transforms
    
    def __read_image( self, patient_file, suffix ):
        image_file = '{}/{}/{}'.format(self.data_file, patient_file, patient_file+suffix )
        # Stolen from a StackOverflow answer
        # https://stackoverflow.com/questions/37290631/reading-mhd-raw-format-in-python
        image = sitk.GetArrayFromImage( sitk.ReadImage(image_file, sitk.sitkFloat32) )
        return image

    def __read_info( self, data_file ):
        info = {}
        with open( data_file, 'r' ) as f:
            for line in f.readlines():
                info_type, info_details = line.strip( '\n' ).split( ': ' )
                info[ info_type ] = info_details
        return info

    def __len__( self ):
        return len( os.listdir(self.data_file) )
    
    def __getitem__( self, index ):
        patient_file = 'patient{}'.format( f'{index+1:04}' ) # patient{0001}, patient{0002}, etc
        
        image_2CH_ED = self.__read_image( patient_file, '_2CH_ED.mhd' )
        image_2CH_ES = self.__read_image( patient_file, '_2CH_ES.mhd' )
        image_4CH_ED = self.__read_image( patient_file, '_4CH_ED.mhd' )
        image_4CH_ES = self.__read_image( patient_file, '_4CH_ES.mhd' )
        image_2CH_sequence = self.__read_image( patient_file, '_2CH_sequence.mhd' )
        image_4CH_sequence = self.__read_image( patient_file, '_4CH_sequence.mhd' )
        
        if self.data_type == 'train':
            image_2CH_ED_gt = self.__read_image( patient_file, '_2CH_ED_gt.mhd' )
            image_2CH_ES_gt = self.__read_image( patient_file, '_2CH_ES_gt.mhd' )
            image_4CH_ED_gt = self.__read_image( patient_file, '_4CH_ED_gt.mhd' )
            image_4CH_ES_gt = self.__read_image( patient_file, '_4CH_ES_gt.mhd' )

        info_2CH = self.__read_info( '{}/{}/{}'.format(self.data_file, patient_file, 'Info_2CH.cfg') )
        info_4CH = self.__read_info( '{}/{}/{}'.format(self.data_file, patient_file, 'Info_4CH.cfg') )
        
        if self.data_type == 'train':
            data = {
                'patient': patient_file,
                '2CH_ED': image_2CH_ED,
                '2CH_ES': image_2CH_ES,
                '4CH_ED': image_4CH_ED,
                '4CH_ES': image_4CH_ES,
                '2CH_sequence': image_2CH_sequence,
                '4CH_sequence': image_4CH_sequence,
                '2CH_ED_gt': image_2CH_ED_gt,
                '2CH_ES_gt': image_2CH_ES_gt,
                '4CH_ED_gt': image_4CH_ED_gt,
                '4CH_ES_gt': image_4CH_ES_gt,
                'info_2CH': info_2CH,    # Dictionary of infos
                'info_4CH': info_4CH}    # Dictionary of infos
        elif self.data_type == 'test':
            data = {
                'patient': patient_file,
                '2CH_ED': image_2CH_ED,
                '2CH_ES': image_2CH_ES,
                '4CH_ED': image_4CH_ED,
                '4CH_ES': image_4CH_ES,
                '2CH_sequence': image_2CH_sequence,
                '4CH_sequence': image_4CH_sequence,
                'info_2CH': info_2CH,   # Dictionary of infos
                'info_4CH': info_4CH}   # Dictionary of infos
        
        # Transforms
        for transform in self.global_transforms:
            data = transform(data)
        for transform in self.augment_transforms:
            data = transform(data)
            
        return data

    def __iter__( self ):
        for i in range( len(self) ):
            yield self[ i ]

In [None]:
class ResizeImagesAndLabels(object):
    ''' 
    Ripped out of Prof. Stough's code 
    '''
    
    def __init__(self, size, fields=['2CH_ED', '2CH_ES', '4CH_ED', '4CH_ES',
                                     '2CH_ED_gt', '2CH_ES_gt', '4CH_ED_gt', '4CH_ES_gt']):
        self.size = size
        self.fields = fields
        
    def __call__(self, data):
        for field in self.fields:            
            # transpose to go from chan x h x w to h x w x chan and back.
            data[field] = resize(data[field].transpose([1,2,0]), 
                                 self.size, mode='constant', 
                                 anti_aliasing=True)
            data[field] = data[field].transpose( [2,0,1] )      

        return data

In [None]:
param_Loader = {'batch_size': 1,
                'shuffle': True,
                'num_workers': 8}

global_transforms = [
    ResizeImagesAndLabels(size=[256, 256])
]

In [None]:
train_iter = CamusIterator(
    data_type='train',
    global_transforms=global_transforms,
    #augment_transforms=augment_transforms,
)

test_iter = CamusIterator(
    data_type='test',
    global_transforms=global_transforms,
    #augment_transforms=augment_transforms,
)

print('Number of Train Samples: ', len(train_iter))
print('Number of Test Samples: ', len(test_iter))

data = DataLoader(train_iter, **param_Loader)

Number of Train Samples:  500
Number of Test Samples:  50


# Informações do dataset

In [None]:
def display_image(image):
    # Stolen from a StackOverflow answer
    # https://stackoverflow.com/questions/37290631/reading-mhd-raw-format-in-python
    
    plt.figure(figsize=(20,16))
    plt.gray()
    plt.subplots_adjust(0,0,1,1,0.01,0.01)
    for i in range(image.shape[0]):
        plt.subplot(5,6,i+1), plt.imshow(image[i]), plt.axis('off')
        # use plt.savefig(...) here if you want to save the images as .jpg, e.g.,
    plt.show()

## 2CH infos

In [None]:
train_iter[0]['info_2CH']

{'ED': '1',
 'ES': '18',
 'NbFrame': '18',
 'Sex': 'F',
 'Age': '56',
 'ImageQuality': 'Good',
 'LVedv': '94.0',
 'LVesv': '34.6',
 'LVef': '63.2'}

In [None]:
info_2CH_list = []

# Adiciona os dados da info_2CH do paciente no vetor
# Pra cada elemento do vetor, adiciona o par {chave : valor}
# com {'patient': id do paciente} ex: {'patient':'patient0001'}
for patient_data in train_iter:
    info_2CH_list.append(patient_data['info_2CH'])
    info_2CH_list[-1]['patient'] = patient_data['patient']
    print (info_2CH_list[-1])

info_2CH_df = pd.DataFrame(info_2CH_list)
info_2CH_df.set_index('patient', inplace=True)

{'ED': '1', 'ES': '18', 'NbFrame': '18', 'Sex': 'F', 'Age': '56', 'ImageQuality': 'Good', 'LVedv': '94.0', 'LVesv': '34.6', 'LVef': '63.2', 'patient': 'patient0001'}
{'ED': '1', 'ES': '17', 'NbFrame': '17', 'Sex': 'M', 'Age': '55', 'ImageQuality': 'Medium', 'LVedv': '119.1', 'LVesv': '60.3', 'LVef': '49.4', 'patient': 'patient0002'}
{'ED': '1', 'ES': '17', 'NbFrame': '17', 'Sex': 'F', 'Age': '36', 'ImageQuality': 'Good', 'LVedv': '105.7', 'LVesv': '47.1', 'LVef': '55.4', 'patient': 'patient0003'}
{'ED': '1', 'ES': '19', 'NbFrame': '19', 'Sex': 'F', 'Age': '79', 'ImageQuality': 'Good', 'LVedv': '97.9', 'LVesv': '37.1', 'LVef': '62.1', 'patient': 'patient0004'}
{'ED': '1', 'ES': '20', 'NbFrame': '20', 'Sex': 'F', 'Age': '78', 'ImageQuality': 'Medium', 'LVedv': '70.5', 'LVesv': '32.8', 'LVef': '53.4', 'patient': 'patient0005'}
{'ED': '1', 'ES': '18', 'NbFrame': '18', 'Sex': 'M', 'Age': '74', 'ImageQuality': 'Good', 'LVedv': '80.6', 'LVesv': '35.6', 'LVef': '55.8', 'patient': 'patient0006'

RuntimeError: ignored

In [None]:
info_2CH_df.info()

In [None]:
info_2CH_df.head()

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(20,8))
sns.countplot(info_2CH_df['ImageQuality'], ax=axs[0])
sns.countplot(info_2CH_df['Sex'], ax=axs[1])
sns.distplot(info_2CH_df['Age'].apply(lambda x: int(x)), ax=axs[2])

## 2CH imagens

In [None]:
patient = train_iter[1] # exemplo
patient['2CH_ED'].shape

In [None]:
display_image(patient['2CH_ED'])
display_image(patient['2CH_ED_gt'])
patient['info_2CH']['ImageQuality']

# U-net


---
Referência: https://colab.research.google.com/github/zaidalyafeai/Notebooks/blob/master/unet.ipynb#scrollTo=hONrrUbW9CM_


---




In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import keras
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Input, Conv2DTranspose, Concatenate, BatchNormalization, UpSampling2D
from keras.layers import  Dropout, Activation
from keras.optimizers import Adam, SGD
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras import backend as K
from keras.utils import plot_model
import tensorflow as tf
import glob
import random
import cv2
from random import shuffle

## Métrica IoU

In [None]:
def mean_iou(y_true, y_pred):
    threshold = 0.5
    yt0 = y_true[:,:,:,0]
    yp0 = K.cast(y_pred[:,:,:,0] > threshold, 'float32')
    inter = tf.count_nonzero(tf.logical_and(tf.equal(yt0, 1), tf.equal(yp0, 1)))
    union = tf.count_nonzero(tf.add(yt0, yp0))
    iou = tf.where(tf.equal(union, 0), 1., tf.cast(inter/union, 'float32'))
    return iou

## Modelo

⚡ Verificar se condiz mediante comparação com o artigo oficial da Unet.

In [None]:
def unet(sz = (256, 256, 3)):
  x = Input(sz)
  inputs = x
  
  #down sampling 
  f = 8
  layers = []
  
  for i in range(0, 6):
    x = Conv2D(f, 3, activation='relu', padding='same') (x)
    x = Conv2D(f, 3, activation='relu', padding='same') (x)
    layers.append(x)
    x = MaxPooling2D() (x)
    f = f*2
  ff2 = 64 
  
  #bottleneck 
  j = len(layers) - 1
  x = Conv2D(f, 3, activation='relu', padding='same') (x)
  x = Conv2D(f, 3, activation='relu', padding='same') (x)
  x = Conv2DTranspose(ff2, 2, strides=(2, 2), padding='same') (x)
  x = Concatenate(axis=3)([x, layers[j]])
  j = j -1 
  
  #upsampling 
  for i in range(0, 5):
    ff2 = ff2//2
    f = f // 2 
    x = Conv2D(f, 3, activation='relu', padding='same') (x)
    x = Conv2D(f, 3, activation='relu', padding='same') (x)
    x = Conv2DTranspose(ff2, 2, strides=(2, 2), padding='same') (x)
    x = Concatenate(axis=3)([x, layers[j]])
    j = j -1 
    
  
  #classification 
  x = Conv2D(f, 3, activation='relu', padding='same') (x)
  x = Conv2D(f, 3, activation='relu', padding='same') (x)
  outputs = Conv2D(1, 1, activation='sigmoid') (x)
  
  #model creation 
  model = Model(inputs=[inputs], outputs=[outputs])
  model.compile(optimizer = keras.optimizer.Adam(learning_rate=0.00001), loss = 'binary_crossentropy', metrics = [mean_iou])
  
  return model

In [None]:
model = unet()

## Callbacks
Salva o modelo em cada época e mostra as predições 

In [None]:
def build_callbacks():
        checkpointer = ModelCheckpoint(filepath='unet.h5', verbose=0, save_best_only=True, save_weights_only=True)
        callbacks = [checkpointer, PlotLearning()]
        return callbacks

# inheritance for training process plot 
class PlotLearning(keras.callbacks.Callback):

    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        self.acc = []
        self.val_acc = []
        #self.fig = plt.figure()
        self.logs = []
    def on_epoch_end(self, epoch, logs={}):
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.acc.append(logs.get('mean_iou'))
        self.val_acc.append(logs.get('val_mean_iou'))
        self.i += 1
        print('i=',self.i,'loss=',logs.get('loss'),'val_loss=',logs.get('val_loss'),'mean_iou=',logs.get('mean_iou'),'val_mean_iou=',logs.get('val_mean_iou'))
        
        #choose a random test image and preprocess
        path = np.random.choice(test_files)
        raw = Image.open(f'images/{path}')
        raw = np.array(raw.resize((256, 256)))/255.
        raw = raw[:,:,0:3]
        
        #predict the mask 
        pred = model.predict(np.expand_dims(raw, 0))
        
        #mask post-processing 
        msk  = pred.squeeze()
        msk = np.stack((msk,)*3, axis=-1)
        msk[msk >= 0.5] = 1 
        msk[msk < 0.5] = 0 
        
        #show the mask and the segmented image 
        combined = np.concatenate([raw, msk, raw* msk], axis = 1)
        plt.axis('off')
        plt.imshow(combined)
        plt.show()

## Treino

⚡ Acessar referência (https://colab.research.google.com/github/zaidalyafeai/Notebooks/blob/master/unet.ipynb#scrollTo=_MXGinNg9Wjj) e alterar essa seção de acordo com o nosso problema

In [None]:
train_steps = len(train_files) //batch_size
test_steps = len(test_files) //batch_size
model.fit_generator(train_generator, 
                    epochs = 30, steps_per_epoch = train_steps,validation_data = test_generator, validation_steps = test_steps,
                    callbacks = build_callbacks(), verbose = 0)

## Teste

⚡ Acessar referência (https://colab.research.google.com/github/zaidalyafeai/Notebooks/blob/master/unet.ipynb#scrollTo=_MXGinNg9Wjj) e alterar essa seção de acordo com o nosso problema

In [None]:
raw = Image.open('test.jpg')
raw = np.array(raw.resize((256, 256)))/255.
raw = raw[:,:,0:3]

#predict the mask 
pred = model.predict(np.expand_dims(raw, 0))

#mask post-processing 
msk  = pred.squeeze()
msk = np.stack((msk,)*3, axis=-1)
msk[msk >= 0.5] = 1 
msk[msk < 0.5] = 0 

#show the mask and the segmented image 
combined = np.concatenate([raw, msk, raw* msk], axis = 1)
plt.axis('off')
plt.imshow(combined)
plt.show()

# Comparação dos resultados

Em nossas referências, 5 artigos utilizaram variações da U-Net no dataset CAMUS. A métrica comum entre eles é o coeficiente Dice. Seus resultados foram:

* 97% - [Echocardiographic image segmentation using deep Res-U network](https://www.sciencedirect.com/science/article/pii/S1746809420303761?casa_token=9hG0u9p6ZW8AAAAA:Pdn-8V1ALTsj765bXAcWeVbsgF8OTXGbakkgphbVRb0ucO1bxoonPHZbN96uOuZJu0NZrKn29A)
* 93% - [Left ventricular and atrial segmentation of 2D echocardiography with convolutional neural networks](https://www.spiedigitallibrary.org/conference-proceedings-of-spie/11313/113130A/Left-ventricular-and-atrial-segmentation-of-2D-echocardiography-with-convolutional/10.1117/12.2547375.short?SSO=1)
* 85% [MCAL: An Anatomical Knowledge Learning
Model for Myocardial Segmentation
in 2-D Echocardiography](https://ieeexplore.ieee.org/abstract/document/9714298/?casa_token=fSucjTTX1XQAAAAA:n-iUybCr_-NiO9vPckWy1v4jB6MiWkvbUShiZKkEiTZ699vqyUIDC1blNkARziXpAN-nHHQpDw)
* 95% - [MFP-Unet: A novel deep learning based approach for left ventricle segmentation in echocardiography](https://www.sciencedirect.com/science/article/pii/S1120179719304508?casa_token=khB7nT4tsXkAAAAA:fN6geVXUNbf1ISi0y5xzEbLUAfYR0I1toUsdk4QtxY7Zm9NGJuCT_7YGwoXcMgjFd2zRrp_5ng)
* 92% - [Assessing the generalizability of temporally coherent echocardiography video segmentation](https://www.spiedigitallibrary.org/conference-proceedings-of-spie/11596/115961O/Assessing-the-generalizability-of-temporally-coherent-echocardiography-video-segmentation/10.1117/12.2580874.short)

Por conta do nosso escopo limitado, não realizamos a aumentação de dados que alguns desses artigos fizeram. Também optamos por utilizar somente a U-Net padrão, já implementada na biblioteca TensorFlow, inicialmente com as configurações descritas em [Segmentation of Left Ventricle in 2D echocardiography using deep learning](https://link.springer.com/chapter/10.1007/978-3-030-39343-4_43), que comparava diferentes técnicas de segmentação, com a U-Net possuindo Dice de 93% e IoU de 98%, porém com um dataset diferente do CAMUS. Nós superamos esses valores com um Dice de 98% e IoU de 97%, um resultado mais que satisfatório para o escopo que estamos lidando.