In [None]:
import numpy as np
import PIL
import matplotlib.pyplot as plt
import pandas as pd
import random

import datetime
import os
import gc
import re
import threading

import urllib.request
import requests

import tarfile
import zipfile

import sklearn
import sklearn.preprocessing
import sklearn.metrics
import sklearn.model_selection
import tensorflow as tf

from google.colab import drive
drive.mount('/gdrive')

# Set seed for reprodutibility
seed=1
tf.keras.utils.set_random_seed(seed)
tf.config.experimental.enable_op_determinism()

## Dados

### Cohen dataset

In [None]:
cohen_dir  = "cohen_dataset/"
if not os.path.exists(cohen_dir):
  os.mkdir(cohen_dir)
cohen_url = "https://api.github.com/repos/ieee8023/covid-chestxray-dataset/contents/images"
images_requisition = list(requests.get(cohen_url).json())
files = []
# Divide para cada uma das 24 threads a imagem para baixar
total_images = len(images_requisition)
images_per_thread = total_images//24

def download_images(images, begining, stop):
  for moment in range(begining, stop):
    files.append(images[moment]['download_url'])
    urllib.request.urlretrieve(images[moment]['download_url'], os.path.join(cohen_dir, images[moment]['name']))

threads = [threading.Thread(target=download_images, args=(images_requisition, i*images_per_thread, i*images_per_thread+images_per_thread)) if i != 23
           else threading.Thread(target=download_images, args=(images_requisition, i*images_per_thread, i*images_per_thread+images_per_thread+total_images%24))
           for i in range(24)]

for thread in threads:
  thread.start()

for thread in threads:
  thread.join()

urllib.request.urlretrieve("https://raw.githubusercontent.com/ieee8023/covid-chestxray-dataset/master/metadata.csv", "cohen_metadata.csv")

cohen_metadata = pd.read_csv("cohen_metadata.csv")
cohen_metadata

### Kag dataset

Troque "CAMINHO_DAS_CREDENCIAIS" por onde originalmente estão suas credenciais do Kaggle. Se já estiverem onde especificado à partir da 4º linha abaixo, pode começar da 5º linha

In [None]:
!cp CAMINHO_DAS_CREDENCIAIS ./
!rm /root/.kaggle
!mkdir /root/.kaggle
!mv ./kaggle.json /root/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download --unzip paultimothymooney/chest-xray-pneumonia
!rename 's/chest_xray/kag_dataset/' *

## Pré-processamento

In [None]:
img_height, img_width = 224, 224

Classes

In [None]:
# O mapeamento de classes mudará de base para base, mas manteremos um padrão
# para cada doença ou se a pessoa está saudável

# kag x Cohen
categories = ['kag_normal', 'kag_pneumonia_bacteria', 'kag_pneumonia_virus',
              'cohen_covid19', 'cohen_other_virus', 'cohen_bacteria', 'cohen_fungal']

le = sklearn.preprocessing.LabelEncoder()
le_categories = le.fit_transform(categories)
le_categories = le_categories.reshape(len(le_categories), 1)

ohe = sklearn.preprocessing.OneHotEncoder(sparse=False)
ohe.fit(le_categories)

### COHEN dataset

Será distinguida a covid de outros vírus, com as outras classes se agrupando reino animal

In [None]:
# X-ray images
cohen_xray_metada = cohen_metadata[cohen_metadata['modality'] == 'X-ray']
covid_19_metadata = cohen_xray_metada[cohen_xray_metada['finding'] == 'Pneumonia/Viral/COVID-19']
# Procurar outros vírus E acelerar para a busca das outras classes
other_metadata = cohen_xray_metada[cohen_xray_metada['finding'] != 'Pneumonia/Viral/COVID-19']

other_virus_metadata = other_metadata[other_metadata['finding'].str.startswith('Pneumonia/Viral')]
bacterial_metadata = other_metadata[other_metadata['finding'].str.startswith('Pneumonia/Bacterial')]
fungal_metadata = other_metadata[other_metadata['finding'].str.startswith('Pneumonia/Fungal')]

In [None]:
len(other_virus_metadata), len(bacterial_metadata), len(fungal_metadata), len(other_virus_metadata)+len(bacterial_metadata)+len(fungal_metadata)

#### Baseline

In [None]:
cohen_xray_metada['finding'].value_counts()

In [None]:
cohen_x_dataset = []
cohen_y_dataset = []
cohen_dir = "cohen_dataset"

# Acelerar comparação
covid_found = 0
other_virus_found = 0
bacterial_found = 0
fungal_found = 0
total_images = len(covid_19_metadata)+len(other_virus_metadata)+len(bacterial_metadata)+len(fungal_metadata)

# Procura nos metadados as imagens necessárias
training_data_filenames = os.listdir(cohen_dir)
for img_name in training_data_filenames:
  if covid_found+other_virus_found+bacterial_found+fungal_found != total_images:

    if covid_found != len(covid_19_metadata) and len(covid_19_metadata[covid_19_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_covid19'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_metadata[other_virus_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_other_virus'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_metadata[bacterial_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_bacteria'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_metadata[fungal_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_fungal'])]).reshape(len(le_categories), 1))

    else:
      continue

    with PIL.Image.open(os.path.join(cohen_dir, img_name)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      cohen_x_dataset.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

  # Achou todas as classes precisas
  else:
    break

#### Separação de visualizações (ANTEROPOSTERIOR, POSTOANTERIOR, AP SUPINO e Lateral)

In [None]:
covid_19_ap_metadata = covid_19_metadata[covid_19_metadata['view'] == 'AP']
covid_19_pa_metadata = covid_19_metadata[covid_19_metadata['view'] == 'PA']
covid_19_ap_supine_metadata = covid_19_metadata[covid_19_metadata['view'] == 'AP Supine']
covid_19_l_metadata = covid_19_metadata[covid_19_metadata['view'] == 'L']

other_virus_ap_metadata = other_virus_metadata[other_virus_metadata['view'] == 'AP']
other_virus_pa_metadata = other_virus_metadata[other_virus_metadata['view'] == 'PA']
other_virus_ap_supine_metadata = other_virus_metadata[other_virus_metadata['view'] == 'AP Supine']
other_virus_l_metadata = covid_19_metadata[covid_19_metadata['view'] == 'L']

bacterial_ap_metadata = bacterial_metadata[bacterial_metadata['view'] == 'AP']
bacterial_pa_metadata = bacterial_metadata[bacterial_metadata['view'] == 'PA']
bacterial_l_metadata = bacterial_metadata[bacterial_metadata['view'] == 'L']
bacterial_ap_supine_metadata = bacterial_metadata[bacterial_metadata['view'] == 'AP Supine']

fungal_ap_metadata = fungal_metadata[fungal_metadata['view'] == 'AP']
fungal_pa_metadata = fungal_metadata[fungal_metadata['view'] == 'AP']
fungal_ap_supine_metadata = fungal_metadata[fungal_metadata['view'] == 'AP']
fungal_l_metadata = fungal_metadata[fungal_metadata['view'] == 'AP']

Classes

In [None]:
# O mapeamento de classes mudará de base para base, mas manteremos um padrão
# para cada doença ou se a pessoa está saudável

# kag x Cohen
categories = ['kag_normal', 'kag_pneumonia_bacteria', 'kag_pneumonia_virus',
              'cohen_ap_covid19', 'cohen_ap_other_virus', 'cohen_ap_bacterial', 'cohen_ap_fungal',
              'cohen_pa_covid19', 'cohen_pa_other_virus', 'cohen_pa_bacterial', 'cohen_pa_fungal',
              'cohen_l_covid19', 'cohen_l_other_virus', 'cohen_l_bacterial', 'cohen_l_fungal',
              'cohen_ap_supine_covid19', 'cohen_ap_supine_other_virus', 'cohen_ap_supine_bacterial', 'cohen_ap_supine_fungal']

le = sklearn.preprocessing.LabelEncoder()
le_categories = le.fit_transform(categories)
le_categories = le_categories.reshape(len(le_categories), 1)

ohe = sklearn.preprocessing.OneHotEncoder(sparse=False)
ohe.fit(le_categories)

Find labels

In [None]:
cohen_x_dataset = []
cohen_y_dataset = []
cohen_dir = "cohen_dataset"

# Acelerar comparação
covid_found = 0
other_virus_found = 0
bacterial_found = 0
fungal_found = 0
total_images = len(covid_19_metadata)+len(other_virus_metadata)+len(bacterial_metadata)+len(fungal_metadata)

# Procura nos metadados as imagens necessárias
training_data_filenames = os.listdir(cohen_dir)
for img_name in training_data_filenames:
  if covid_found+other_virus_found+bacterial_found+fungal_found != total_images:

    if covid_found != len(covid_19_metadata) and len(covid_19_ap_metadata[covid_19_ap_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_covid19'])]).reshape(len(le_categories), 1))

    elif covid_found != len(covid_19_metadata) and len(covid_19_pa_metadata[covid_19_pa_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_pa_covid19'])]).reshape(len(le_categories), 1))

    elif covid_found != len(covid_19_metadata) and len(covid_19_ap_supine_metadata[covid_19_ap_supine_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_supine_covid19'])]).reshape(len(le_categories), 1))

    elif covid_found != len(covid_19_metadata) and len(covid_19_l_metadata[covid_19_l_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_l_covid19'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_ap_metadata[other_virus_ap_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_other_virus'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_pa_metadata[other_virus_pa_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_pa_other_virus'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_ap_supine_metadata[other_virus_ap_supine_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_supine_other_virus'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_l_metadata[other_virus_l_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_l_other_virus'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_ap_metadata[bacterial_ap_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_bacterial'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_pa_metadata[bacterial_pa_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_pa_bacterial'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_ap_supine_metadata[bacterial_ap_supine_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_supine_bacterial'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_l_metadata[bacterial_l_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_l_bacterial'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_ap_metadata[fungal_ap_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_fungal'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_pa_metadata[fungal_pa_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_pa_fungal'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_ap_supine_metadata[fungal_ap_supine_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_supine_fungal'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_l_metadata[fungal_l_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_l_fungal'])]).reshape(len(le_categories), 1))

    else:
      continue

    with PIL.Image.open(os.path.join(cohen_dir, img_name)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      cohen_x_dataset.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

  # Achou todas as classes precisas
  else:
    break

### Kag dataset

In [None]:
def find_class_label(image_name):
  expression = re.findall(r'person\d*_(bacteria|virus)_\d*', image_name, flags=re.I)
  label = expression[0].lower()

  return label

In [None]:
# kag_dataset = []
kag_dir = "kag_dataset"
gc.collect()

kag_x_train = []
kag_x_val = []
kag_y_train = []
kag_y_val = []
normal = 0
pneumonias = {"bacteria": 0, "virus": 0}

kag_train_dir = os.path.join(kag_dir, "train")
for finding in os.listdir(kag_train_dir):
  finding_path = os.path.join(kag_train_dir, finding)
  for image in os.listdir(finding_path):
    with PIL.Image.open(os.path.join(finding_path, image)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      kag_x_train.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

    if finding == 'NORMAL':
      kag_y_train.append(ohe.transform([le.transform(['kag_normal'])]).reshape(len(le_categories), 1))
      normal +=1

    else:
      label = find_class_label(image)
      kag_y_train.append(ohe.transform([le.transform([f'kag_pneumonia_{label}'])]).reshape(len(le_categories), 1))
      pneumonias[label] += 1

kag_val_dir = os.path.join(kag_dir, "test")
for finding in os.listdir(kag_val_dir):
  finding_path = os.path.join(kag_val_dir, finding)
  for image in os.listdir(finding_path):
    with PIL.Image.open(os.path.join(finding_path, image)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      kag_x_val.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

    if finding == 'NORMAL':
      kag_y_val.append(ohe.transform([le.transform(['kag_normal'])]).reshape(len(le_categories), 1))
      normal +=1
    else:
      label = find_class_label(image)
      kag_y_val.append(ohe.transform([le.transform([f'kag_pneumonia_{label}'])]).reshape(len(le_categories), 1))
      pneumonias[label] += 1

In [None]:
normal, pneumonias['bacteria'], pneumonias['virus']

## Visualização dos resultados

In [None]:
experiments_path = ""

### Baseline e cross-validation

Base de teste do kag e cohen

In [None]:
cohen_x_train, cohen_x_test, cohen_y_train, cohen_y_test = sklearn.model_selection.train_test_split(cohen_x_dataset, cohen_y_dataset, test_size=0.1, random_state=seed)
cohen_x_train, cohen_x_val, cohen_y_train, cohen_y_val = sklearn.model_selection.train_test_split(cohen_x_train, cohen_y_train, train_size=0.78, random_state=seed)

Base de teste de kag

In [None]:
kag_x_test = []
kag_y_test = []

kag_val_dir = os.path.join(kag_dir, "val")
for finding in os.listdir(kag_val_dir):
  finding_path = os.path.join(kag_val_dir, finding)
  for image in os.listdir(finding_path):
    with PIL.Image.open(os.path.join(finding_path, image)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      kag_x_test.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

    if finding == 'NORMAL':
      kag_y_test.append(ohe.transform([le.transform(['kag_normal'])]).reshape(len(le_categories), 1))
      normal += 1
    else:
      label = find_class_label(image)
      kag_y_test.append(ohe.transform([le.transform([f'kag_pneumonia_{label}'])]).reshape(len(le_categories), 1))
      pneumonias[label] += 1

In [None]:
normal, pneumonias['bacteria'], pneumonias['virus']

In [None]:
# Separar cada base para reconhecimento
categories = ['kag_normal', 'kag_pneumonia_bacteria', 'kag_pneumonia_virus',
              'cohen_covid19', 'cohen_other_virus', 'cohen_bacteria', 'cohen_fungal']

kag_dataset_labels = {'kag_normal': ohe.transform([le.transform(['kag_normal'])]), 'kag_pneumonia_bacteria': ohe.transform([le.transform(['kag_pneumonia_bacteria'])]),
                      'kag_pneumonia_virus': ohe.transform([le.transform(['kag_pneumonia_virus'])])}

Pré-processamento para cada label e Cálculo das métricas

In [None]:
def split_cohen_kag_label(arr):
  arr_ind = np.argmax(arr)
  return 'kag' if np.logical_or(arr_ind == np.argmax(kag_dataset_labels['kag_normal']),
                                np.logical_or(arr_ind == np.argmax(kag_dataset_labels['kag_pneumonia_bacteria']),
                                              arr_ind == np.argmax(kag_dataset_labels['kag_pneumonia_virus']))).sum() == 1\
  else 'cohen'

#### Baseline

Carregar modelo

In [None]:
baseline_larger_regions = 'baseline_cohen_kag_larger_regions'
baseline_low_level_high_level = 'baseline_cohen_kag_low_level_high_level'
baseline_transfer_learning = 'baseline_cohen_kag_resnet50'

baseline_larger_regions_model = tf.keras.models.load_model(os.path.join(experiments_path,
                                                                        baseline_larger_regions))
baseline_low_level_high_level_model = tf.keras.models.load_model(os.path.join(experiments_path,
                                                                              baseline_low_level_high_level))
baseline_transfer_learning_model = tf.keras.models.load_model(os.path.join(experiments_path,
                                                                          baseline_transfer_learning))

In [None]:
# A partir da previsão e do correto é possível usar o sklearn para gerar múltiplas métricas
y_test = np.concatenate((cohen_y_test, kag_y_test))
X_test = np.concatenate((cohen_x_test, kag_x_test))

# Separar cada base para reconhecimento
categories = ['kag_normal', 'kag_pneumonia_bacteria', 'kag_pneumonia_virus',
              'cohen_covid19', 'cohen_other_virus', 'cohen_bacteria', 'cohen_fungal']

kag_dataset_labels = {'kag_normal': ohe.transform([le.transform(['kag_normal'])]), 'kag_pneumonia_bacteria': ohe.transform([le.transform(['kag_pneumonia_bacteria'])]),
                      'kag_pneumonia_virus': ohe.transform([le.transform(['kag_pneumonia_virus'])])}

cohen_dataset_labels = {'cohen_covid19': ohe.transform([le.transform(['cohen_covid19'])]), 'cohen_other_virus': ohe.transform([le.transform(['cohen_other_virus'])]),
                        'cohen_bacteria': ohe.transform([le.transform(['cohen_bacteria'])]), 'cohen_fungal': ohe.transform([le.transform(['cohen_fungal'])])}

##### Teste

In [None]:
predictions_larger_regions = baseline_larger_regions_model.predict(X_test)
predictions_low_level_high_level = baseline_low_level_high_level_model.predict(X_test)
predictions_transfer_learning = baseline_transfer_learning_model.predict(np.concatenate((X_test, X_test, X_test), axis=-1))

###### Dataset

In [None]:
y_base_test = np.array(list(map(split_cohen_kag_label, y_test)))

Larger regions

In [None]:
y_base_test_predictions_larger_regions = np.array(list(map(split_cohen_kag_label, predictions_larger_regions)))

print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_larger_regions))

Different segments

In [None]:
y_base_test_predictions_low_level_high_level = np.array(list(map(split_cohen_kag_label, predictions_low_level_high_level)))

print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_low_level_high_level))

Transfer Learning

In [None]:
y_base_test_predictions_transfer_learning = np.array(list(map(split_cohen_kag_label, predictions_transfer_learning)))

print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_transfer_learning))

###### Defined classes

Larger Regions

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_larger_regions, axis=1)))
)

Different segments

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_low_level_high_level, axis=1)))
)

Transfer Learning

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_transfer_learning, axis=1)))
)

##### Validação

In [None]:
X_val = np.concatenate((cohen_x_val, kag_x_val))
y_val = np.concatenate((cohen_y_val, kag_y_val))

predictions_larger_regions = baseline_larger_regions_model.predict(X_val)
predictions_low_level_high_level = baseline_low_level_high_level_model.predict(X_val)
predictions_transfer_learning = baseline_transfer_learning_model.predict(np.concatenate((X_val, X_val, X_val), axis=-1))

###### Dataset

In [None]:
y_base_val = np.array(list(map(split_cohen_kag_label, y_val)))

Larger regions

In [None]:
y_base_val_predictions_larger_regions = np.array(list(map(split_cohen_kag_label, predictions_larger_regions)))

print(sklearn.metrics.classification_report(y_base_val, y_base_val_predictions_larger_regions))

Low-level to high-level

In [None]:
y_base_val_predictions_low_level_high_level = np.array(list(map(split_cohen_kag_label, predictions_low_level_high_level)))

print(sklearn.metrics.classification_report(y_base_val, y_base_val_predictions_low_level_high_level))

Transfer Learning

In [None]:
y_base_val_predictions_transfer_learning = np.array(list(map(split_cohen_kag_label, predictions_transfer_learning)))

print(sklearn.metrics.classification_report(y_base_val, y_base_val_predictions_transfer_learning))

###### Defined Classes

Larger Regions

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_val.reshape((y_val.shape[0], y_val.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_larger_regions, axis=1)))
)

Low-level to high-level

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_val.reshape((y_val.shape[0], y_val.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_low_level_high_level, axis=1)))
)

Transfer Learning

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_val.reshape((y_val.shape[0], y_val.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_transfer_learning, axis=1)))
)

#### Cross-validation

Get back test data

In [None]:
cohen_kag_x_dataset = np.concatenate((cohen_x_dataset, kag_x_train, kag_x_val, kag_x_test))
cohen_kag_y_dataset = np.concatenate((cohen_y_dataset, kag_y_train, kag_y_val, kag_y_test))
cohen_kag_y_dataset = cohen_kag_y_dataset.reshape(cohen_kag_y_dataset.shape[0], cohen_kag_y_dataset.shape[1])

cohen_kag_x_dataset, X_test, cohen_kag_y_dataset, y_test = sklearn.model_selection.train_test_split(cohen_kag_x_dataset, cohen_kag_y_dataset, test_size=0.1, random_state=seed)
y_base_test = np.array(list(map(split_cohen_kag_label, y_test)))

In [None]:
history_cross_cohen_kag = 'cv_cohen_kag_iteration_'

high_level_low_level = 'low_level_high_level'
larger_regions = 'larger_regions'
transfer_learning = 'resnet50'

##### Dataset

Larger regions

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{larger_regions}"))
  predictions_larger_regions = model.predict(X_test)
  print(f"Iteration {i}")
  y_base_test_predictions_larger_regions = np.array(list(map(split_cohen_kag_label, predictions_larger_regions)))

  print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_larger_regions))

Low-level to high-level

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{high_level_low_level}"))
  predictions_low_level_high_level = model.predict(X_test)
  print(f"Iteration {i}")
  y_base_test_predictions_low_level_high_level = np.array(list(map(split_cohen_kag_label, predictions_low_level_high_level)))

  print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_low_level_high_level))

Transfer Learning

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{transfer_learning}"))
  predictions_transfer_learning = model.predict(np.concatenate((X_test, X_test, X_test), axis=-1))
  print(f"Iteration {i}")
  y_base_test_predictions_transfer_learning = np.array(list(map(split_cohen_kag_label, predictions_transfer_learning)))

  print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_transfer_learning))

##### Defined classes

Larger regions

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{larger_regions}"))
  predictions_larger_regions = model.predict(X_test)
  print(f"Iteration {i}")

  print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_larger_regions, axis=1)))
  )

Low-level to high-level

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{high_level_low_level}"))
  predictions_low_level_high_level = model.predict(X_test)
  print(f"Iteration {i}")

  print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_low_level_high_level, axis=1)))
  )

Transfer Learning

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{transfer_learning}"))
  model.summary()
  predictions_transfer_learning = model.predict(np.concatenate((X_test, X_test, X_test), axis=-1))
  print(f"Iteration {i}")

  print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_transfer_learning, axis=1)))
  )

#### AP, PA e AP Supine

Carregar modelo

In [None]:
baseline_larger_regions = 'view_split_cohen_kag_larger_regions'
baseline_low_level_high_level = 'view_split_cohen_kag_low_level_high_level'
baseline_transfer_learning = 'view_split_cohen_kag_resnet50'

baseline_larger_regions_model = tf.keras.models.load_model(os.path.join(experiments_path,
                                                                        baseline_larger_regions))
baseline_low_level_high_level_model = tf.keras.models.load_model(os.path.join(experiments_path,
                                                                              baseline_low_level_high_level))
baseline_transfer_learning_model = tf.keras.models.load_model(os.path.join(experiments_path,
                                                                          baseline_transfer_learning))

In [None]:
# A partir da previsão e do correto é possível usar o sklearn para gerar múltiplas métricas
y_test = np.concatenate((cohen_y_test, kag_y_test))
X_test = np.concatenate((cohen_x_test, kag_x_test))

# Separar cada base para reconhecimento
categories = ['kag_normal', 'kag_pneumonia_bacteria', 'kag_pneumonia_virus',
              'cohen_covid19', 'cohen_other_virus', 'cohen_bacteria', 'cohen_fungal']

cohen_dataset_labels = {'cohen_ap_covid19': ohe.transform([le.transform(['cohen_ap_covid19'])]), 'cohen_ap_other_virus': ohe.transform([le.transform(['cohen_ap_other_virus'])]),
                        'cohen_ap_bacterial': ohe.transform([le.transform(['cohen_ap_bacterial'])]), 'cohen_ap_fungal': ohe.transform([le.transform(['cohen_ap_fungal'])]),
                        'cohen_pa_covid19': ohe.transform([le.transform(['cohen_pa_covid19'])]), 'cohen_pa_other_virus': ohe.transform([le.transform(['cohen_pa_other_virus'])]),
                        'cohen_pa_bacterial': ohe.transform([le.transform(['cohen_pa_bacterial'])]), 'cohen_pa_fungal': ohe.transform([le.transform(['cohen_pa_fungal'])]),
                        'cohen_l_covid19': ohe.transform([le.transform(['cohen_l_covid19'])]), 'cohen_l_other_virus': ohe.transform([le.transform(['cohen_l_other_virus'])]),
                        'cohen_l_bacterial': ohe.transform([le.transform(['cohen_l_bacterial'])]), 'cohen_l_fungal': ohe.transform([le.transform(['cohen_l_fungal'])]),
                        'cohen_ap_supine_covid19': ohe.transform([le.transform(['cohen_ap_supine_covid19'])]), 'cohen_ap_supine_other_virus': ohe.transform([le.transform(['cohen_ap_supine_other_virus'])]),
                        'cohen_ap_supine_bacterial': ohe.transform([le.transform(['cohen_ap_supine_bacterial'])]), 'cohen_ap_supine_fungal': ohe.transform([le.transform(['cohen_ap_supine_fungal'])])}

##### Teste

In [None]:
predictions_larger_regions = baseline_larger_regions_model.predict(X_test)
predictions_low_level_high_level = baseline_low_level_high_level_model.predict(X_test)
predictions_transfer_learning = baseline_transfer_learning_model.predict(np.concatenate((X_test, X_test, X_test), axis=-1))

###### Dataset

In [None]:
y_base_test = np.array(list(map(split_cohen_kag_label, y_test)))

Larger regions

In [None]:
y_base_test_predictions_larger_regions = np.array(list(map(split_cohen_kag_label, predictions_larger_regions)))

print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_larger_regions))

Low-level to high-level

In [None]:
y_base_test_predictions_low_level_high_level = np.array(list(map(split_cohen_kag_label, predictions_low_level_high_level)))

print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_low_level_high_level))

Transfer Learning

In [None]:
y_base_test_predictions_transfer_learning = np.array(list(map(split_cohen_kag_label, predictions_transfer_learning)))

print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_transfer_learning))

###### Defined classes

Larger Regions

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_larger_regions, axis=1)))
)

Different segments

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_low_level_high_level, axis=1)))
)

Transfer Learning

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_transfer_learning, axis=1)))
)

##### Validação

In [None]:
X_val = np.concatenate((cohen_x_val, kag_x_val))
y_val = np.concatenate((cohen_y_val, kag_y_val))

predictions_larger_regions = baseline_larger_regions_model.predict(X_val)
predictions_low_level_high_level = baseline_low_level_high_level_model.predict(X_val)
predictions_transfer_learning = baseline_transfer_learning_model.predict(np.concatenate((X_val, X_val, X_val), axis=-1))

In [None]:
y_base_val = np.array(list(map(split_cohen_kag_label, y_val)))

###### Dataset

Larger regions

In [None]:
y_base_val_predictions_larger_regions = np.array(list(map(split_cohen_kag_label, predictions_larger_regions)))

print(sklearn.metrics.classification_report(y_base_val, y_base_val_predictions_larger_regions))

Low-level to high-level

In [None]:
y_base_val_predictions_low_level_high_level = np.array(list(map(split_cohen_kag_label, predictions_low_level_high_level)))

print(sklearn.metrics.classification_report(y_base_val, y_base_val_predictions_low_level_high_level))

Transfer Learning

In [None]:
y_base_val_predictions_transfer_learning = np.array(list(map(split_cohen_kag_label, predictions_transfer_learning)))

print(sklearn.metrics.classification_report(y_base_val, y_base_val_predictions_transfer_learning))

###### Defined Classes

Larger Regions

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_val.reshape((y_val.shape[0], y_val.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_larger_regions, axis=1)))
)

Low-level to high-level

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_val.reshape((y_val.shape[0], y_val.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_low_level_high_level, axis=1)))
)

Transfer Learning

In [None]:
print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_val.reshape((y_val.shape[0], y_val.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_transfer_learning, axis=1)))
)

#### Cross-validation AP, PA, AP Supine e Lateral

In [None]:
# A partir da previsão e do correto é possível usar o sklearn para gerar múltiplas métricas
y_test = np.concatenate((cohen_y_test, kag_y_test))
X_test = np.concatenate((cohen_x_test, kag_x_test))

cohen_dataset_labels = {'cohen_ap_covid19': ohe.transform([le.transform(['cohen_ap_covid19'])]), 'cohen_ap_other_virus': ohe.transform([le.transform(['cohen_ap_other_virus'])]),
                        'cohen_ap_bacterial': ohe.transform([le.transform(['cohen_ap_bacterial'])]), 'cohen_ap_fungal': ohe.transform([le.transform(['cohen_ap_fungal'])]),
                        'cohen_pa_covid19': ohe.transform([le.transform(['cohen_pa_covid19'])]), 'cohen_pa_other_virus': ohe.transform([le.transform(['cohen_pa_other_virus'])]),
                        'cohen_pa_bacterial': ohe.transform([le.transform(['cohen_pa_bacterial'])]), 'cohen_pa_fungal': ohe.transform([le.transform(['cohen_pa_fungal'])]),
                        'cohen_l_covid19': ohe.transform([le.transform(['cohen_l_covid19'])]), 'cohen_l_other_virus': ohe.transform([le.transform(['cohen_l_other_virus'])]),
                        'cohen_l_bacterial': ohe.transform([le.transform(['cohen_l_bacterial'])]), 'cohen_l_fungal': ohe.transform([le.transform(['cohen_l_fungal'])]),
                        'cohen_ap_supine_covid19': ohe.transform([le.transform(['cohen_ap_supine_covid19'])]), 'cohen_ap_supine_other_virus': ohe.transform([le.transform(['cohen_ap_supine_other_virus'])]),
                        'cohen_ap_supine_bacterial': ohe.transform([le.transform(['cohen_ap_supine_bacterial'])]), 'cohen_ap_supine_fungal': ohe.transform([le.transform(['cohen_ap_supine_fungal'])])}

Get back test data

In [None]:
cohen_kag_x_dataset = np.concatenate((cohen_x_dataset, kag_x_train, kag_x_val, kag_x_test))
cohen_kag_y_dataset = np.concatenate((cohen_y_dataset, kag_y_train, kag_y_val, kag_y_test))
cohen_kag_y_dataset = cohen_kag_y_dataset.reshape(cohen_kag_y_dataset.shape[0], cohen_kag_y_dataset.shape[1])

cohen_kag_x_dataset, X_test, cohen_kag_y_dataset, y_test = sklearn.model_selection.train_test_split(cohen_kag_x_dataset, cohen_kag_y_dataset, test_size=0.1, random_state=seed)
y_base_test = np.array(list(map(split_cohen_kag_label, y_test)))

In [None]:
history_cross_cohen_kag = 'view_split_cv_cohen_kag_iteration_'

high_level_low_level = 'low_level_high_level'
larger_regions = 'larger_regions'
transfer_learning = 'resnet50'

##### Dataset

Larger regions

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{larger_regions}"))
  predictions_larger_regions = model.predict(X_test)
  print(f"Iteration {i}")
  y_base_test_predictions_larger_regions = np.array(list(map(split_cohen_kag_label, predictions_larger_regions)))

  print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_larger_regions))

Low-level to high-level

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{high_level_low_level}"))
  predictions_low_level_high_level = model.predict(X_test)
  print(f"Iteration {i}")
  y_base_test_predictions_low_level_high_level = np.array(list(map(split_cohen_kag_label, predictions_low_level_high_level)))

  print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_low_level_high_level))

Transfer Learning

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{transfer_learning}"))
  predictions_transfer_learning = model.predict(np.concatenate((X_test, X_test, X_test), axis=-1))
  print(f"Iteration {i}")
  y_base_test_predictions_transfer_learning = np.array(list(map(split_cohen_kag_label, predictions_transfer_learning)))

  print(sklearn.metrics.classification_report(y_base_test, y_base_test_predictions_transfer_learning))

##### Defined classes

Larger regions

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{larger_regions}"))
  predictions_larger_regions = model.predict(X_test)
  print(f"Iteration {i}")

  print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_larger_regions, axis=1)))
  )

Low-level to high-level

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{high_level_low_level}"))
  predictions_low_level_high_level = model.predict(X_test)
  print(f"Iteration {i}")

  print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_low_level_high_level, axis=1)))
  )

Transfer Learning

In [None]:
for i in range(5):
  model = tf.keras.models.load_model(os.path.join(experiments_path, f"{history_cross_cohen_kag}{i}_{transfer_learning}"))
  model.summary()
  predictions_transfer_learning = model.predict(np.concatenate((X_test, X_test, X_test), axis=-1))
  print(f"Iteration {i}")

  print(sklearn.metrics.classification_report(
    le.inverse_transform(np.argmax(y_test.reshape((y_test.shape[0], y_test.shape[1])), axis=1)),
    le.inverse_transform(np.argmax(predictions_transfer_learning, axis=1)))
  )