### Instalação de Pacotes Auxiliares

In [None]:
!pip install lungs_segmentation
!pip uninstall opencv-python
!pip uninstall opencv-contrib-python
!pip3 install opencv-contrib-python==4.5.5.62

### Importação de Pacotes Auxiliares

In [2]:
from distutils.dir_util import copy_tree
from bbox_delimiters import bbox_utils
import matplotlib.pyplot as plt
import shutil, tarfile, os
import tensorflow as tf
import pandas as pd
import numpy as np
import cv2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Baixando os Dados do Servidor

In [6]:
shutil.copy(src = '/content/drive/MyDrive/datasets-projeto/chexpert-v10-small.zip', 
            dst = '/content/')

shutil.unpack_archive(filename = '/content/chexpert-v10-small.zip', 
                      extract_dir = '', format = 'zip')

os.remove(path = '/content/chexpert-v10-small.zip')

In [47]:
shutil.copy(src = '/content/drive/MyDrive/datasets-projeto/vinbigdata-dataset.zip', 
            dst = '/content/')

shutil.unpack_archive(filename = '/content/vinbigdata-dataset.zip', 
                      extract_dir = '', format = 'zip')

os.remove(path = '/content/vinbigdata-dataset.zip')

In [None]:
copy_tree(src = '/content/drive/MyDrive/datasets-projeto/nih-files', 
          dst = '/content/nih-files/')

extract_files = os.listdir('/content/nih-files')

for files in extract_files:
  print(f'Extraindo /content/nih-files/{files}')
  file_ = tarfile.open(f'/content/nih-files/{files}')
  file_.extractall('/content/')
  file_.close()
  os.remove(f'/content/nih-files/{files}')

shutil.rmtree(path = '/content/nih-files')

In [3]:
dataset = pd.read_csv('/content/all_datasets.csv', index_col = 0)

### Aplicando o Algoritmo de Localização Pulmonar nas Imagens

In [4]:
shutil.copy(src = '/content/drive/MyDrive/lung-localization/vgg/model_vgg5.zip', 
            dst = '/content/')

shutil.unpack_archive(filename = '/content/model_vgg5.zip', 
                      extract_dir = '', format = 'zip')

os.remove(path = '/content/model_vgg5.zip')

model_loc = tf.keras.models.load_model('/content/content/model_vgg5')

lung_localization = bbox_utils()

In [5]:
def make_localization(filename):
  '''
  aplica o recorte da região pulmonar nas imagens de radiografia

  Args:
    filename (str) --> localização relativa da imagem no computador
  '''

  try:
    coordinates = lung_localization.make_localization_cnn(model = model_loc, path_file = filename)
    image_array = cv2.imread(filename = filename)
    crop_image = lung_localization.crop_image(image = image_array, list_coordinates = coordinates)
    lung_localization.save_img(image = crop_image, path_file = filename)
  except:
    pass

  return

In [6]:
def test_image_files(image_files, arr_ref = 'Array 1'):
  '''
  testa a existência das imagens nos diretórios

  Args:
    image_files (list) --> lista com a localização relativa das imagens no 
    computador
    arr_ref (str) --> Nome de referência da lista de strings analisada
  '''

  for image_file in image_files:
    try:
      test = plt.imread(image_file)
    except:
      print(f'Erro ao ler {image_file} | Array de Referência: {arr_ref}')

In [19]:
dataset_chexpert = dataset[dataset['Dataset'] == 'CheXpert']
image_files = list(dataset_chexpert['Path'])
img_files1 = image_files[int(0*len(image_files)/7) : int(1*len(image_files)/7)]
img_files2 = image_files[int(1*len(image_files)/7) : int(2*len(image_files)/7)]
img_files3 = image_files[int(2*len(image_files)/7) : int(3*len(image_files)/7)]
img_files4 = image_files[int(3*len(image_files)/7) : int(4*len(image_files)/7)]
img_files5 = image_files[int(4*len(image_files)/7) : int(5*len(image_files)/7)]
img_files6 = image_files[int(5*len(image_files)/7) : int(6*len(image_files)/7)]
img_files7 = image_files[int(6*len(image_files)/7) : int(7*len(image_files)/7)]

In [None]:
test_image_files(image_files = img_files1, arr_ref = 'Array 1')
test_image_files(image_files = img_files2, arr_ref = 'Array 2')
test_image_files(image_files = img_files3, arr_ref = 'Array 3')
test_image_files(image_files = img_files4, arr_ref = 'Array 4')
test_image_files(image_files = img_files5, arr_ref = 'Array 5')
test_image_files(image_files = img_files6, arr_ref = 'Array 6')
test_image_files(image_files = img_files7, arr_ref = 'Array 7')

In [None]:
_ = list(map(make_localization, img_files1))
print('Recorte pulmonar nas imagens "img_files1" feito com sucesso!')
_ = list(map(make_localization, img_files2))
print('Recorte pulmonar nas imagens "img_files2" feito com sucesso!')
_ = list(map(make_localization, img_files3))
print('Recorte pulmonar nas imagens "img_files3" feito com sucesso!')
_ = list(map(make_localization, img_files4))
print('Recorte pulmonar nas imagens "img_files4" feito com sucesso!')
_ = list(map(make_localization, img_files5))
print('Recorte pulmonar nas imagens "img_files5" feito com sucesso!')
_ = list(map(make_localization, img_files6))
print('Recorte pulmonar nas imagens "img_files6" feito com sucesso!')
_ = list(map(make_localization, img_files7))
print('Recorte pulmonar nas imagens "img_files7" feito com sucesso!')

In [None]:
shutil.make_archive(base_name = 'CheXpert-v1.0-small', format = 'zip', 
                    root_dir = '/content/CheXpert-v1.0-small')
shutil.move('/content/CheXpert-v1.0-small.zip', 
            '/content/drive/MyDrive/datasets-lung-loc')

In [71]:
dataset_vinbigdata = dataset[dataset['Dataset'] == 'VinBigData']
image_files = list(dataset_vinbigdata['Path'])
img_files1 = image_files[int(0*len(image_files)/2) : int(1*len(image_files)/2)]
img_files2 = image_files[int(1*len(image_files)/2) : int(2*len(image_files)/2)]

In [None]:
test_image_files(image_files = img_files1, arr_ref = 'Array 1')
test_image_files(image_files = img_files2, arr_ref = 'Array 2')

In [None]:
_ = list(map(make_localization, img_files1))
print('Recorte pulmonar nas imagens "img_files1" feito com sucesso!')
_ = list(map(make_localization, img_files2))
print('Recorte pulmonar nas imagens "img_files2" feito com sucesso!')

In [None]:
shutil.make_archive(base_name = 'vinbigdata', format = 'zip', 
                    root_dir = '/content/vinbigdata')
shutil.move('/content/vinbigdata.zip', 
            '/content/drive/MyDrive/datasets-lung-loc')

In [7]:
dataset_nih = dataset[dataset['Dataset'] == 'NIH']
image_files = list(dataset_nih['Path'])
img_files1 = image_files[int(0*len(image_files)/4) : int(1*len(image_files)/4)]
img_files2 = image_files[int(1*len(image_files)/4) : int(2*len(image_files)/4)]
img_files3 = image_files[int(2*len(image_files)/4) : int(3*len(image_files)/4)]
img_files4 = image_files[int(3*len(image_files)/4) : int(4*len(image_files)/4)]

In [None]:
test_image_files(image_files = img_files1, arr_ref = 'Array 1')
test_image_files(image_files = img_files2, arr_ref = 'Array 2')
test_image_files(image_files = img_files3, arr_ref = 'Array 3')
test_image_files(image_files = img_files4, arr_ref = 'Array 4')

In [None]:
_ = list(map(make_localization, img_files1[1:]))
print('Recorte pulmonar nas imagens "img_files1" feito com sucesso!')
_ = list(map(make_localization, img_files2))
print('Recorte pulmonar nas imagens "img_files2" feito com sucesso!')
_ = list(map(make_localization, img_files3))
print('Recorte pulmonar nas imagens "img_files3" feito com sucesso!')
_ = list(map(make_localization, img_files4))
print('Recorte pulmonar nas imagens "img_files4" feito com sucesso!')

In [None]:
shutil.make_archive(base_name = 'images', format = 'zip', 
                    root_dir = '/content/images')
shutil.move('/content/images.zip', 
            '/content/drive/MyDrive/datasets-lung-loc')