In [None]:
import numpy as np
import PIL
import matplotlib.pyplot as plt
import pandas as pd
import random

import datetime
import os
import gc
import re
import threading

import urllib.request
import requests

import tarfile
import zipfile

import sklearn
import sklearn.preprocessing
import sklearn.model_selection
import tensorflow as tf

from google.colab import drive
drive.mount('/gdrive')

# Set seed for reprodutibility
seed=1
tf.keras.utils.set_random_seed(seed)
tf.config.experimental.enable_op_determinism()

## Definição da Forma de Avaliação

Dadas as 2 bases iniciais, e 2 modelos inicias, será selecionado um modelo para cada iteração de folds

Para cada base, será necessário realizar cross-validation com 5 folds. Haverá a junção das 3 bases para realizar o treinamento, os quais serão validados depois

Após os 5 folds para o modelo, serão realizados isso para o outro modelo. Por último, será realizado o teste com o Resnet

O que foi descrito acima será realizado com o uso de metadados para a especificação de subclasses com as classes já existentes dos dados.

## Dados

### Cohen dataset

In [None]:
cohen_dir  = "cohen_dataset/"
if not os.path.exists(cohen_dir):
  os.mkdir(cohen_dir)
cohen_url = "https://api.github.com/repos/ieee8023/covid-chestxray-dataset/contents/images"
images_requisition = list(requests.get(cohen_url).json())
files = []
# Divide para cada uma das 24 threads a imagem para baixar
total_images = len(images_requisition)
images_per_thread = total_images//24

def download_images(images, begining, stop):
  for moment in range(begining, stop):
    files.append(images[moment]['download_url'])
    urllib.request.urlretrieve(images[moment]['download_url'], os.path.join(cohen_dir, images[moment]['name']))

threads = [threading.Thread(target=download_images, args=(images_requisition, i*images_per_thread, i*images_per_thread+images_per_thread)) if i != 23
           else threading.Thread(target=download_images, args=(images_requisition, i*images_per_thread, i*images_per_thread+images_per_thread+total_images%24))
           for i in range(24)]

for thread in threads:
  thread.start()

for thread in threads:
  thread.join()

urllib.request.urlretrieve("https://raw.githubusercontent.com/ieee8023/covid-chestxray-dataset/master/metadata.csv", "cohen_metadata.csv")

cohen_metadata = pd.read_csv("cohen_metadata.csv")
cohen_metadata

### Kag dataset

Substitua onde está CREDENCIAIS_DE_ACESSO pela pasta que contém suas credenciais do Kaggle. Fique livre para manter onde está, ignorando as 4 primeiras linhas e trocando a pasta /root/.kaggle/kaggle.json pela localização de suas credenciais.

In [None]:
!cp CREDENCIAIS_DE_ACESSO ./
!rm /root/.kaggle
!mkdir /root/.kaggle
!mv ./kaggle.json /root/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download --unzip paultimothymooney/chest-xray-pneumonia
!rename 's/chest_xray/kag_dataset/' *

## Pré-processamento

In [None]:
img_height, img_width = 224, 224

Classes

In [None]:
# O mapeamento de classes mudará de base para base, mas manteremos um padrão
# para cada doença ou se a pessoa está saudável

# kag x Cohen
categories = ['kag_normal', 'kag_pneumonia_bacteria', 'kag_pneumonia_virus',
              'cohen_covid19', 'cohen_other_virus', 'cohen_bacteria', 'cohen_fungal']

le = sklearn.preprocessing.LabelEncoder()
le_categories = le.fit_transform(categories)
le_categories = le_categories.reshape(len(le_categories), 1)

ohe = sklearn.preprocessing.OneHotEncoder(sparse=False)
ohe.fit(le_categories)

### COHEN dataset

Será distinguida a covid de outros vírus, com as outras classes se agrupando por reino animal

In [None]:
# X-ray images
cohen_xray_metada = cohen_metadata[cohen_metadata['modality'] == 'X-ray']
covid_19_metadata = cohen_xray_metada[cohen_xray_metada['finding'] == 'Pneumonia/Viral/COVID-19']
# Procurar outros vírus E acelerar para a busca das outras classes
other_metadata = cohen_xray_metada[cohen_xray_metada['finding'] != 'Pneumonia/Viral/COVID-19']

other_virus_metadata = other_metadata[other_metadata['finding'].str.startswith('Pneumonia/Viral')]
bacterial_metadata = other_metadata[other_metadata['finding'].str.startswith('Pneumonia/Bacterial')]
fungal_metadata = other_metadata[other_metadata['finding'].str.startswith('Pneumonia/Fungal')]

#### Baseline

In [None]:
cohen_xray_metada['finding'].value_counts()

In [None]:
cohen_x_dataset = []
cohen_y_dataset = []
cohen_dir = "cohen_dataset"

# Acelerar comparação
covid_found = 0
other_virus_found = 0
bacterial_found = 0
fungal_found = 0
total_images = len(covid_19_metadata)+len(other_virus_metadata)+len(bacterial_metadata)+len(fungal_metadata)

# Procura nos metadados as imagens necessárias
training_data_filenames = os.listdir(cohen_dir)
for img_name in training_data_filenames:
  if covid_found+other_virus_found+bacterial_found+fungal_found != total_images:

    if covid_found != len(covid_19_metadata) and len(covid_19_metadata[covid_19_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_covid19'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_metadata[other_virus_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_other_virus'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_metadata[bacterial_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_bacteria'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_metadata[fungal_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_fungal'])]).reshape(len(le_categories), 1))

    else:
      continue

    with PIL.Image.open(os.path.join(cohen_dir, img_name)) as img:
      PIL.ImageFile.LOAD_TRUNCATED_IMAGES = True
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      cohen_x_dataset.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

  # Achou todas as classes precisas
  else:
    break

#### Separação de visualizações (PA, AP, AP Supine e L)

ANTEROPOSTERIOR, POSTOANTERIOR, AP SUPINO e Lateral

In [None]:
covid_19_ap_metadata = covid_19_metadata[covid_19_metadata['view'] == 'AP']
covid_19_pa_metadata = covid_19_metadata[covid_19_metadata['view'] == 'PA']
covid_19_ap_supine_metadata = covid_19_metadata[covid_19_metadata['view'] == 'AP Supine']
covid_19_l_metadata = covid_19_metadata[covid_19_metadata['view'] == 'L']

other_virus_ap_metadata = other_virus_metadata[other_virus_metadata['view'] == 'AP']
other_virus_pa_metadata = other_virus_metadata[other_virus_metadata['view'] == 'PA']
other_virus_ap_supine_metadata = other_virus_metadata[other_virus_metadata['view'] == 'AP Supine']
other_virus_l_metadata = other_virus_metadata[other_virus_metadata['view'] == 'L']

bacterial_ap_metadata = bacterial_metadata[bacterial_metadata['view'] == 'AP']
bacterial_pa_metadata = bacterial_metadata[bacterial_metadata['view'] == 'PA']
bacterial_l_metadata = bacterial_metadata[bacterial_metadata['view'] == 'L']
bacterial_ap_supine_metadata = bacterial_metadata[bacterial_metadata['view'] == 'AP Supine']

fungal_ap_metadata = fungal_metadata[fungal_metadata['view'] == 'AP']
fungal_pa_metadata = fungal_metadata[fungal_metadata['view'] == 'AP']
fungal_ap_supine_metadata = fungal_metadata[fungal_metadata['view'] == 'AP']
fungal_l_metadata = fungal_metadata[fungal_metadata['view'] == 'AP']

Classes

In [None]:
# O mapeamento de classes mudará de base para base, mas manteremos um padrão
# para cada doença ou se a pessoa está saudável

# kag x Cohen
categories = ['kag_normal', 'kag_pneumonia_bacteria', 'kag_pneumonia_virus',
              'cohen_ap_covid19', 'cohen_ap_other_virus', 'cohen_ap_bacterial', 'cohen_ap_fungal',
              'cohen_pa_covid19', 'cohen_pa_other_virus', 'cohen_pa_bacterial', 'cohen_pa_fungal',
              'cohen_l_covid19', 'cohen_l_other_virus', 'cohen_l_bacterial', 'cohen_l_fungal',
              'cohen_ap_supine_covid19', 'cohen_ap_supine_other_virus', 'cohen_ap_supine_bacterial', 'cohen_ap_supine_fungal']

le = sklearn.preprocessing.LabelEncoder()
le_categories = le.fit_transform(categories)
le_categories = le_categories.reshape(len(le_categories), 1)

ohe = sklearn.preprocessing.OneHotEncoder(sparse=False)
ohe.fit(le_categories)

Find labels

In [None]:
cohen_x_dataset = []
cohen_y_dataset = []
cohen_dir = "cohen_dataset"

# Acelerar comparação
covid_found = 0
other_virus_found = 0
bacterial_found = 0
fungal_found = 0
total_images = len(covid_19_metadata)+len(other_virus_metadata)+len(bacterial_metadata)+len(fungal_metadata)

# Procura nos metadados as imagens necessárias
training_data_filenames = os.listdir(cohen_dir)
for img_name in training_data_filenames:
  if covid_found+other_virus_found+bacterial_found+fungal_found != total_images:

    if covid_found != len(covid_19_metadata) and len(covid_19_ap_metadata[covid_19_ap_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_covid19'])]).reshape(len(le_categories), 1))

    elif covid_found != len(covid_19_metadata) and len(covid_19_pa_metadata[covid_19_pa_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_pa_covid19'])]).reshape(len(le_categories), 1))

    elif covid_found != len(covid_19_metadata) and len(covid_19_ap_supine_metadata[covid_19_ap_supine_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_supine_covid19'])]).reshape(len(le_categories), 1))

    elif covid_found != len(covid_19_metadata) and len(covid_19_l_metadata[covid_19_l_metadata['filename'].isin([img_name])])!=0:
      covid_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_l_covid19'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_ap_metadata[other_virus_ap_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_other_virus'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_pa_metadata[other_virus_pa_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_pa_other_virus'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_ap_supine_metadata[other_virus_ap_supine_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_supine_other_virus'])]).reshape(len(le_categories), 1))

    elif other_virus_found != len(other_virus_metadata) and len(other_virus_l_metadata[other_virus_l_metadata['filename'].isin([img_name])])!=0:
      other_virus_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_l_other_virus'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_ap_metadata[bacterial_ap_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_bacterial'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_pa_metadata[bacterial_pa_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_pa_bacterial'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_ap_supine_metadata[bacterial_ap_supine_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_supine_bacterial'])]).reshape(len(le_categories), 1))

    elif bacterial_found != len(bacterial_metadata) and len(bacterial_l_metadata[bacterial_l_metadata['filename'].isin([img_name])])!=0:
      bacterial_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_l_bacterial'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_ap_metadata[fungal_ap_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_fungal'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_pa_metadata[fungal_pa_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_pa_fungal'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_ap_supine_metadata[fungal_ap_supine_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_ap_supine_fungal'])]).reshape(len(le_categories), 1))

    elif fungal_found != len(fungal_metadata) and len(fungal_l_metadata[fungal_l_metadata['filename'].isin([img_name])])!=0:
      fungal_found += 1
      cohen_y_dataset.append(ohe.transform([le.transform(['cohen_l_fungal'])]).reshape(len(le_categories), 1))

    else:
      continue

    with PIL.Image.open(os.path.join(cohen_dir, img_name)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      cohen_x_dataset.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

  # Achou todas as classes precisas
  else:
    break

### Kag dataset

In [None]:
def find_class_label(image_name):
  expression = re.findall(r'person\d*_(bacteria|virus)_\d*', image_name, flags=re.I)
  label = expression[0].lower()

  return label

In [None]:
# kag_dataset = []
kag_dir = "kag_dataset"
gc.collect()

kag_x_train = []
kag_x_val = []
kag_y_train = []
kag_y_val = []

kag_train_dir = os.path.join(kag_dir, "train")
for finding in os.listdir(kag_train_dir):
  finding_path = os.path.join(kag_train_dir, finding)
  for image in os.listdir(finding_path):
    with PIL.Image.open(os.path.join(finding_path, image)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      kag_x_train.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

    if finding == 'NORMAL':
      kag_y_train.append(ohe.transform([le.transform(['kag_normal'])]).reshape(len(le_categories), 1))

    else:
      label = find_class_label(image)
      kag_y_train.append(ohe.transform([le.transform([f'kag_pneumonia_{label}'])]).reshape(len(le_categories), 1))

kag_val_dir = os.path.join(kag_dir, "test")
for finding in os.listdir(kag_val_dir):
  finding_path = os.path.join(kag_val_dir, finding)
  for image in os.listdir(finding_path):
    with PIL.Image.open(os.path.join(finding_path, image)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      kag_x_val.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

    if finding == 'NORMAL':
      kag_y_val.append(ohe.transform([le.transform(['kag_normal'])]).reshape(len(le_categories), 1))
    else:
      label = find_class_label(image)
      kag_y_val.append(ohe.transform([le.transform([f'kag_pneumonia_{label}'])]).reshape(len(le_categories), 1))

## Definição dos modelos

In [None]:
classes_number = len(categories)

Model for extract different segments for larger regions

In [None]:
different_segments_model = [
    tf.keras.layers.Rescaling(1./255, input_shape=(img_height, img_width, 1)),
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(padding='same'),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(padding='same'),
    tf.keras.layers.Dropout(0.2, seed=seed),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.2, seed=seed),
    tf.keras.layers.Dense(128, activation='relu')]

Model to extract low-level to high-level features

In [None]:
high_level_low_level_model = [
    tf.keras.layers.Rescaling(1./255, input_shape=(img_height, img_width, 1)),
    tf.keras.layers.Conv2D(16, 3, padding='valid', activation='relu'),
    tf.keras.layers.MaxPooling2D(padding='same'),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.2, seed=seed),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(padding='valid'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.2, seed=seed),
    tf.keras.layers.Dense(128, activation='relu')]

Transfer Learning

In [None]:
input_tensor = tf.keras.layers.Input(shape=(img_height, img_width, 3))
resnet50 = tf.keras.applications.resnet50.ResNet50(include_top=False, weights='imagenet',
                                                   input_tensor=input_tensor, input_shape=(img_height,img_width, 3))

for layer in resnet50.layers[:]:
   layer.trainable = False

output = resnet50.output
output = tf.keras.layers.Flatten()(output)
output = tf.keras.layers.Dense(classes_number, activation='softmax')(output)

resnet50 = tf.keras.Model(inputs=resnet50.input, outputs=output)

## Uso do protocolo

Substitua a variável por onde deseja salvar as métricas e os modelos

In [None]:
experiments_path = ""
# Hiperparâmetros e parâmetros
batch_size = 64
lr=1e-3
epochs=15

### cohen dataset e kag dataset

#### Baseline

In [None]:
cohen_x_train, cohen_x_test, cohen_y_train, cohen_y_test = sklearn.model_selection.train_test_split(cohen_x_dataset, cohen_y_dataset, test_size=0.1, random_state=seed)
cohen_x_train, cohen_x_val, cohen_y_train, cohen_y_val = sklearn.model_selection.train_test_split(cohen_x_train, cohen_y_train, train_size=0.78, random_state=seed)

X_train = np.concatenate((cohen_x_train, kag_x_train))
X_val = np.concatenate((cohen_x_val, kag_x_val))

y_train = np.concatenate((cohen_y_train, kag_y_train))
y_train =  y_train.reshape(y_train.shape[0], y_train.shape[1])

y_val = np.concatenate((cohen_y_val, kag_y_val))
y_val =  y_val.reshape(y_val.shape[0], y_val.shape[1])

In [None]:
model = tf.keras.models.Sequential(resnet50, name="resnet50")

# Converter os dados para 3 canais
X_train = np.concatenate((X_train, X_train, X_train), axis=-1)
X_val = np.concatenate((X_val, X_val, X_val), axis=-1)

history_baseline_cohen_kag = 'baseline_cohen_kag'
output_path = os.path.join(experiments_path, f"{history_baseline_cohen_kag}_{model.name}")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                loss='categorical_crossentropy',
                metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)

Save model

In [None]:
model.save(output_path, save_format='h5')
df = pd.DataFrame(history.history)
df.to_csv(os.path.join(experiments_path, f"{history_baseline_cohen_kag}_{model.name}.csv"))

#### Cross-validation

In [None]:
kag_test_dir = os.path.join(kag_dir, "val")
kag_x_test = []
kag_y_test = []

for finding in os.listdir(kag_val_dir):
  finding_path = os.path.join(kag_val_dir, finding)
  for image in os.listdir(finding_path):
    with PIL.Image.open(os.path.join(finding_path, image)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      kag_x_test.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

    if finding == 'NORMAL':
      kag_y_test.append(ohe.transform([le.transform(['kag_normal'])]).reshape(len(le_categories), 1))
    else:
      label = find_class_label(image)
      kag_y_test.append(ohe.transform([le.transform([f'kag_pneumonia_{label}'])]).reshape(len(le_categories), 1))

##### Larger regions

In [None]:
cohen_kag_x_dataset = np.concatenate((cohen_x_dataset, kag_x_train, kag_x_val, kag_x_test))
cohen_kag_y_dataset = np.concatenate((cohen_y_dataset, kag_y_train, kag_y_val, kag_y_test))
cohen_kag_y_dataset = cohen_kag_y_dataset.reshape(cohen_kag_y_dataset.shape[0], cohen_kag_y_dataset.shape[1])

cohen_kag_x_dataset, X_test, cohen_kag_y_dataset, y_test = sklearn.model_selection.train_test_split(cohen_kag_x_dataset, cohen_kag_y_dataset, test_size=0.1, random_state=seed)

cv = sklearn.model_selection.ShuffleSplit(n_splits=5, train_size=0.78, random_state=seed)
history_cross_cohen_kag = 'cv_cohen_kag'
i = 0

for train_index, val_index in cv.split(cohen_kag_x_dataset):
  X_train, y_train = cohen_kag_x_dataset[train_index], cohen_kag_y_dataset[train_index]
  X_val, y_val = cohen_kag_x_dataset[val_index], cohen_kag_y_dataset[val_index]

  model = tf.keras.models.Sequential(different_segments_model+
                                    [tf.keras.layers.Dense(classes_number, activation='softmax')], name="larger_regions")

  output_path = os.path.join(experiments_path, f"{history_cross_cohen_kag}_iteration_{i}_{model.name}")
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

  history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)
  model.save(output_path, save_format='h5')
  df = pd.DataFrame(history.history)
  df.to_csv(f"{output_path}.csv")
  i+= 1
  gc.collect()

##### Low-level to high-level

In [None]:
cohen_kag_x_dataset = np.concatenate((cohen_x_dataset, kag_x_train, kag_x_val, kag_x_test))
cohen_kag_y_dataset = np.concatenate((cohen_y_dataset, kag_y_train, kag_y_val, kag_y_test))
cohen_kag_y_dataset = cohen_kag_y_dataset.reshape(cohen_kag_y_dataset.shape[0], cohen_kag_y_dataset.shape[1])

cohen_kag_x_dataset, X_test, cohen_kag_y_dataset, y_test = sklearn.model_selection.train_test_split(cohen_kag_x_dataset, cohen_kag_y_dataset, test_size=0.1, random_state=seed)

cv = sklearn.model_selection.ShuffleSplit(n_splits=5, train_size=0.78, random_state=seed)
history_cross_cohen_kag = 'cv_cohen_kag'
i = 0

for train_index, val_index in cv.split(cohen_kag_x_dataset):
  X_train, y_train = cohen_kag_x_dataset[train_index], cohen_kag_y_dataset[train_index]
  X_val, y_val = cohen_kag_x_dataset[val_index], cohen_kag_y_dataset[val_index]

  model = tf.keras.models.Sequential(high_level_low_level_model+
                                    [tf.keras.layers.Dense(classes_number, activation='softmax')], name="low_level_high_level")

  output_path = os.path.join(experiments_path, f"{history_cross_cohen_kag}_iteration_{i}_{model.name}")
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

  history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)
  model.save(output_path, save_format='h5')
  df = pd.DataFrame(history.history)
  df.to_csv(f"{output_path}.csv")
  i+= 1
  gc.collect()

##### Transfer Learning

In [None]:
cohen_kag_x_dataset = np.concatenate((cohen_x_dataset, kag_x_train, kag_x_val, kag_x_test))
cohen_kag_y_dataset = np.concatenate((cohen_y_dataset, kag_y_train, kag_y_val, kag_y_test))
cohen_kag_y_dataset = cohen_kag_y_dataset.reshape(cohen_kag_y_dataset.shape[0], cohen_kag_y_dataset.shape[1])

cohen_kag_x_dataset, X_test, cohen_kag_y_dataset, y_test = sklearn.model_selection.train_test_split(cohen_kag_x_dataset, cohen_kag_y_dataset, test_size=0.1, random_state=seed)

cv = sklearn.model_selection.ShuffleSplit(n_splits=5, train_size=0.78, random_state=seed)
history_cross_cohen_kag = 'cv_cohen_kag'
i = 0

for train_index, val_index in cv.split(cohen_kag_x_dataset):
  X_train, y_train = cohen_kag_x_dataset[train_index], cohen_kag_y_dataset[train_index]
  X_val, y_val = cohen_kag_x_dataset[val_index], cohen_kag_y_dataset[val_index]

  X_train = np.concatenate((X_train, X_train, X_train), axis=-1)
  X_val = np.concatenate((X_val, X_val, X_val), axis=-1)

  model = tf.keras.models.Sequential(resnet50, name="resnet50")

  output_path = os.path.join(experiments_path, f"{history_cross_cohen_kag}_iteration_{i}_{model.name}")
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

  history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)
  model.save(output_path, save_format='h5')
  df = pd.DataFrame(history.history)
  df.to_csv(f"{output_path}.csv")
  i+= 1
  gc.collect()

#### AP, PA e AP supine

In [None]:
cohen_x_train, cohen_x_test, cohen_y_train, cohen_y_test = sklearn.model_selection.train_test_split(cohen_x_dataset, cohen_y_dataset, test_size=0.1, random_state=seed)
cohen_x_train, cohen_x_val, cohen_y_train, cohen_y_val = sklearn.model_selection.train_test_split(cohen_x_train, cohen_y_train, train_size=0.78, random_state=seed)

X_train = np.concatenate((cohen_x_train, kag_x_train))
X_val = np.concatenate((cohen_x_val, kag_x_val))

y_train = np.concatenate((cohen_y_train, kag_y_train))
y_train =  y_train.reshape(y_train.shape[0], y_train.shape[1])

y_val = np.concatenate((cohen_y_val, kag_y_val))
y_val =  y_val.reshape(y_val.shape[0], y_val.shape[1])

Larger regions

In [None]:
model = tf.keras.models.Sequential(different_segments_model+
                                    [tf.keras.layers.Dense(classes_number, activation='softmax')], name="larger_regions")

history_view_split_cohen_kag = 'view_split_cohen_kag'
output_path = os.path.join(experiments_path, f"{history_view_split_cohen_kag}_{model.name}")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                loss='categorical_crossentropy',
                metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)

Low-level to high-level

In [None]:
model = tf.keras.models.Sequential(high_level_low_level_model+
                                    [tf.keras.layers.Dense(classes_number, activation='softmax')], name="low_level_high_level")

history_view_split_cohen_kag = 'view_split_cohen_kag'
output_path = os.path.join(experiments_path, f"{history_view_split_cohen_kag}_{model.name}")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                loss='categorical_crossentropy',
                metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)

Transfer Learning

In [None]:
model = tf.keras.models.Sequential(resnet50, name="resnet50")

# Converter os dados para 3 canais
X_train = np.concatenate((X_train, X_train, X_train), axis=-1)
X_val = np.concatenate((X_val, X_val, X_val), axis=-1)

history_view_split_cohen_kag = 'view_split_cohen_kag'
output_path = os.path.join(experiments_path, f"{history_view_split_cohen_kag}_{model.name}")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                loss='categorical_crossentropy',
                metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)

Save model

In [None]:
model.save(output_path, save_format='h5')
df = pd.DataFrame(history.history)
df.to_csv(os.path.join(experiments_path, f"{history_view_split_cohen_kag}_{model.name}.csv"))

#### Cross-validation AP, PA e AP supine

In [None]:
kag_test_dir = os.path.join(kag_dir, "val")
kag_x_test = []
kag_y_test = []

for finding in os.listdir(kag_val_dir):
  finding_path = os.path.join(kag_val_dir, finding)
  for image in os.listdir(finding_path):
    with PIL.Image.open(os.path.join(finding_path, image)) as img:
      img_resized = img.convert('L')
      img_resized = img_resized.resize((img_width, img_height))
      kag_x_test.append(np.reshape(np.asarray(img_resized), (img_width, img_height, 1)))

    if finding == 'NORMAL':
      kag_y_test.append(ohe.transform([le.transform(['kag_normal'])]).reshape(len(le_categories), 1))
    else:
      label = find_class_label(image)
      kag_y_test.append(ohe.transform([le.transform([f'kag_pneumonia_{label}'])]).reshape(len(le_categories), 1))

##### Larger regions

In [None]:
cohen_kag_x_dataset = np.concatenate((cohen_x_dataset, kag_x_train, kag_x_val, kag_x_test))
cohen_kag_y_dataset = np.concatenate((cohen_y_dataset, kag_y_train, kag_y_val, kag_y_test))
cohen_kag_y_dataset = cohen_kag_y_dataset.reshape(cohen_kag_y_dataset.shape[0], cohen_kag_y_dataset.shape[1])

cohen_kag_x_dataset, X_test, cohen_kag_y_dataset, y_test = sklearn.model_selection.train_test_split(cohen_kag_x_dataset, cohen_kag_y_dataset, test_size=0.1, random_state=seed)

cv = sklearn.model_selection.ShuffleSplit(n_splits=5, train_size=0.78, random_state=seed)
history_cross_cohen_kag = 'view_split_cv_cohen_kag'
i = 0

for train_index, val_index in cv.split(cohen_kag_x_dataset):
  X_train, y_train = cohen_kag_x_dataset[train_index], cohen_kag_y_dataset[train_index]
  X_val, y_val = cohen_kag_x_dataset[val_index], cohen_kag_y_dataset[val_index]

  model = tf.keras.models.Sequential(different_segments_model+
                                    [tf.keras.layers.Dense(classes_number, activation='softmax')], name="larger_regions")

  output_path = os.path.join(experiments_path, f"{history_cross_cohen_kag}_iteration_{i}_{model.name}")
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

  history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)
  model.save(output_path, save_format='h5')
  df = pd.DataFrame(history.history)
  df.to_csv(f"{output_path}.csv")
  i+= 1
  gc.collect()

##### Low-level to high-level

In [None]:
cohen_kag_x_dataset = np.concatenate((cohen_x_dataset, kag_x_train, kag_x_val, kag_x_test))
cohen_kag_y_dataset = np.concatenate((cohen_y_dataset, kag_y_train, kag_y_val, kag_y_test))
cohen_kag_y_dataset = cohen_kag_y_dataset.reshape(cohen_kag_y_dataset.shape[0], cohen_kag_y_dataset.shape[1])

cohen_kag_x_dataset, X_test, cohen_kag_y_dataset, y_test = sklearn.model_selection.train_test_split(cohen_kag_x_dataset, cohen_kag_y_dataset, test_size=0.1, random_state=seed)

cv = sklearn.model_selection.ShuffleSplit(n_splits=5, train_size=0.78, random_state=seed)
history_cross_cohen_kag = 'view_split_cv_cohen_kag'
i = 0

for train_index, val_index in cv.split(cohen_kag_x_dataset):
  X_train, y_train = cohen_kag_x_dataset[train_index], cohen_kag_y_dataset[train_index]
  X_val, y_val = cohen_kag_x_dataset[val_index], cohen_kag_y_dataset[val_index]

  model = tf.keras.models.Sequential(high_level_low_level_model+
                                    [tf.keras.layers.Dense(classes_number, activation='softmax')], name="low_level_high_level")

  output_path = os.path.join(experiments_path, f"{history_cross_cohen_kag}_iteration_{i}_{model.name}")
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

  history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)
  model.save(output_path, save_format='h5')
  df = pd.DataFrame(history.history)
  df.to_csv(f"{output_path}.csv")
  i+= 1
  gc.collect()

##### Transfer Learning

In [None]:
cohen_kag_x_dataset = np.concatenate((cohen_x_dataset, kag_x_train, kag_x_val, kag_x_test))
cohen_kag_y_dataset = np.concatenate((cohen_y_dataset, kag_y_train, kag_y_val, kag_y_test))
cohen_kag_y_dataset = cohen_kag_y_dataset.reshape(cohen_kag_y_dataset.shape[0], cohen_kag_y_dataset.shape[1])

cohen_kag_x_dataset, X_test, cohen_kag_y_dataset, y_test = sklearn.model_selection.train_test_split(cohen_kag_x_dataset, cohen_kag_y_dataset, test_size=0.1, random_state=seed)

cv = sklearn.model_selection.ShuffleSplit(n_splits=5, train_size=0.78, random_state=seed)
history_cross_cohen_kag = 'view_split_cv_cohen_kag'
i = 0

for train_index, val_index in cv.split(cohen_kag_x_dataset):
  X_train, y_train = cohen_kag_x_dataset[train_index], cohen_kag_y_dataset[train_index]
  X_val, y_val = cohen_kag_x_dataset[val_index], cohen_kag_y_dataset[val_index]

  X_train = np.concatenate((X_train, X_train, X_train), axis=-1)
  X_val = np.concatenate((X_val, X_val, X_val), axis=-1)

  model = tf.keras.models.Sequential(resnet50, name="resnet50")

  output_path = os.path.join(experiments_path, f"{history_cross_cohen_kag}_iteration_{i}_{model.name}")
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

  history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_val, y_val), epochs=epochs)
  model.save(output_path, save_format='h5')
  df = pd.DataFrame(history.history)
  df.to_csv(f"{output_path}.csv")
  i+= 1
  gc.collect()