# **IMPORTS**

In [20]:
import torch
import os
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
from torchvision.models import VGG16_Weights, VGG19_Weights
from torchvision.models import ResNet50_Weights, ResNet152_Weights
from torchvision.models import DenseNet201_Weights
from torchvision.models import Inception_V3_Weights
from torchvision.models import MobileNet_V2_Weights
import torchvision
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, log_loss
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder
from torchvision import transforms
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from torch.utils.data import Dataset, DataLoader
from PIL import Image

In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Usando o dispositivo: {device}')

Usando o dispositivo: cuda


# **Transformando Imagens**

In [22]:
preprocess_224 = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

preprocess_299 = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# **Doenças**

In [23]:
class_names = [
    'Atelectasis', 'Consolidation', 'Infiltration', 'Pneumothorax', 'Edema',
    'Emphysema', 'Fibrosis', 'Effusion', 'Pneumonia', 'Pleural_Thickening',
    'Cardiomegaly', 'Nodule', 'Hernia', 'Mass', 'No Finding'
]
num_classes = len(class_names)

def one_hot_encode_labels(labels_list):
    encoder = OneHotEncoder(categories=[range(num_classes)], sparse_output=False)
    labels = encoder.fit_transform(np.array(labels_list).reshape(-1, 1))
    return torch.tensor(labels, dtype=torch.float32)

# Avaliação

In [24]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, log_loss
import seaborn as sns
import matplotlib.pyplot as plt

def evaluate_model_multilabel(model, val_loader, device, num_classes):
    """
    Avalia um modelo multilabel em um conjunto de validação usando várias métricas.

    Parâmetros:
    - model: O modelo a ser avaliado.
    - val_loader: DataLoader para o conjunto de validação.
    - device: O dispositivo (CPU/GPU) onde o modelo está.
    - num_classes: Número de classes para o problema multilabel.

    Retorna:
    - Um dicionário contendo as métricas calculadas.
    """
    y_true = []
    y_pred = []
    outputs_flat = []

    # Loop de validação
    model.eval()  # Coloca o modelo em modo de avaliação
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            predicted = (outputs > 0.5).float()
            
            # Guardar as previsões e saídas reais
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
            outputs_flat.extend(outputs.cpu().numpy())

    # Converter as saídas e rótulos para binário para cada classe
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Calcula as métricas para cada classe
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    roc_auc = roc_auc_score(y_true, outputs_flat, average='macro')
    log_loss_value = log_loss(y_true, outputs_flat)

    # Matriz de confusão para cada classe
    for i in range(num_classes):
        conf_matrix = confusion_matrix(y_true[:, i], y_pred[:, i])
        plt.figure(figsize=(6, 4))
        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
        plt.ylabel('True Labels')
        plt.xlabel('Predicted Labels')
        plt.title(f'Confusion Matrix - Class {i}')
        plt.show()

    # Imprimir as métricas
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1-Score: {f1}')
    print(f'ROC AUC: {roc_auc}')
    print(f'Log Loss: {log_loss_value}')

    # Retornar as métricas como um dicionário
    return {
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': roc_auc,
        'log_loss': log_loss_value
    }

# Exemplo de como chamar a função para avaliar o modelo multilabel:
# metrics = evaluate_model_multilabel(vgg16, val_loader, device, num_classes=num_classes)


# Treino

In [25]:
from tqdm import tqdm

def train_model(model, train_loader, optimizer, criterion, num_epochs, device):
    loss_history = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        progress_bar = tqdm(train_loader, desc=f'Época {epoch+1}/{num_epochs}', unit='batch')

        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())

        epoch_loss = running_loss / len(train_loader)
        loss_history.append(epoch_loss)
        print(f'Época {epoch+1}, Loss: {epoch_loss}')

    return loss_history

# **Classe**

In [26]:
class ChestXrayDataset(Dataset):
    def __init__(self, image_list, dataframe, img_dir, transform=None):
        self.image_list = image_list
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform
        self.encoder = OneHotEncoder(categories=[range(num_classes)], sparse_output=False)

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_name = self.image_list[idx]
        img_path = os.path.join(self.img_dir, 'images', img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        labels_str = self.dataframe.loc[self.dataframe['Image Index'] == img_name, 'Finding Labels'].values[0]
        labels_list = labels_str.split('|')

        # Convert labels to indices
        label_indices = [class_names.index(label) for label in labels_list]

        # One-hot encode the labels
        labels = torch.zeros(num_classes)
        if label_indices:
            labels = self.encoder.fit_transform(np.array(label_indices).reshape(-1, 1))
            labels = torch.tensor(labels.sum(axis=0), dtype=torch.float32)

        return image, labels


## Variaveis Globais

In [27]:
base_path = 'D:/Users/Lucas/Downloads/Outra Pasta/'
metadata_df = pd.read_csv(base_path + 'Data_Entry_2017.csv')

def load_image_list(file_path):
    with open(file_path, 'r') as file:
        image_list = file.read().splitlines()
    return image_list

train_val_list = load_image_list(base_path + 'train_val_list.txt')
test_list = load_image_list(base_path + 'test_list.txt')
num_epochs = 5
num_classes = 15

# **Modelos**

## **VGG16**

In [28]:
vgg16 = models.vgg16(weights=VGG16_Weights.IMAGENET1K_V1)

In [29]:
vgg16 = vgg16.to(device)

vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, num_classes).to(device)


In [30]:
train_list, val_list = train_test_split(train_val_list, test_size=0.2, random_state=42)

train_dataset = ChestXrayDataset(train_list, metadata_df, base_path, transform=preprocess_224)
val_dataset = ChestXrayDataset(val_list, metadata_df, base_path, transform=preprocess_224)
test_dataset = ChestXrayDataset(test_list, metadata_df, base_path, transform=preprocess_224)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


### **Treino**

In [31]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(vgg16.parameters(), lr=0.001)

loss_history = train_model(vgg16, train_loader, optimizer, criterion, num_epochs, device)

Época 1/5: 100%|██████████| 2164/2164 [1:20:15<00:00,  2.23s/batch, loss=0.233]


Época 1, Loss: 0.20315680467625202


Época 2/5: 100%|██████████| 2164/2164 [1:15:29<00:00,  2.09s/batch, loss=0.237]


Época 2, Loss: 0.19829703822332587


Época 3/5: 100%|██████████| 2164/2164 [1:15:32<00:00,  2.09s/batch, loss=0.23] 


Época 3, Loss: 0.19820399107945164


Época 4/5: 100%|██████████| 2164/2164 [1:16:02<00:00,  2.11s/batch, loss=0.5]  


Época 4, Loss: 0.1982504340291519


Época 5/5: 100%|██████████| 2164/2164 [1:16:00<00:00,  2.11s/batch, loss=0.158]

Época 5, Loss: 0.19808177654106385





### **Avaliação**

In [32]:
vgg16.eval()

metrics = evaluate_model_multilabel(vgg16, val_loader, device, num_classes)


## **VGG19**

In [None]:
vgg19 = models.vgg19(weights=VGG19_Weights.IMAGENET1K_V1)

vgg19 = vgg19.to(device)

vgg19.classifier[6] = nn.Linear(vgg19.classifier[6].in_features, num_classes).to(device)

train_list, val_list = train_test_split(train_val_list, test_size=0.2, random_state=42)

train_dataset = ChestXrayDataset(train_list, metadata_df, base_path, transform=preprocess_224)
val_dataset = ChestXrayDataset(val_list, metadata_df, base_path, transform=preprocess_224)
test_dataset = ChestXrayDataset(test_list, metadata_df, base_path, transform=preprocess_224)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to C:\Users\Windows/.cache\torch\hub\checkpoints\vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:05<00:00, 107MB/s]  


### **Treino**

In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(vgg19.parameters(), lr=0.001)

loss_history = train_model(vgg19, train_loader, optimizer, criterion, num_epochs, device)

Época 1/5: 100%|██████████| 2164/2164 [1:28:46<00:00,  2.46s/batch, loss=0.128]


Época 1, Loss: 0.24808471013280578


Época 2/5: 100%|██████████| 2164/2164 [1:22:09<00:00,  2.28s/batch, loss=0.0884]


Época 2, Loss: 0.19818850679670816


Época 3/5: 100%|██████████| 2164/2164 [1:21:49<00:00,  2.27s/batch, loss=0.254]


Época 3, Loss: 0.19815622946305989


Época 4/5: 100%|██████████| 2164/2164 [1:23:12<00:00,  2.31s/batch, loss=0.332]


Época 4, Loss: 0.1981703033997865


Época 5/5: 100%|██████████| 2164/2164 [1:26:04<00:00,  2.39s/batch, loss=0.199]

Época 5, Loss: 0.19803778880900685





### **Avaliação**

In [None]:
vgg19.eval()

metrics = evaluate_model_multilabel(vgg19, val_loader, device, num_classes)

Validação Loss: 0.19994776994816257, Acurácia: 91.9113936241934%


## **Inception-V3**

In [None]:
inception_v3 = models.inception_v3(weights=models.Inception_V3_Weights.IMAGENET1K_V1)
inception_v3 = inception_v3.to(device)

inception_v3.fc = nn.Linear(inception_v3.fc.in_features, num_classes).to(device)

In [None]:
train_list, val_list = train_test_split(train_val_list, test_size=0.2, random_state=42)

train_dataset = ChestXrayDataset(train_list, metadata_df, base_path, transform=preprocess_299)
val_dataset = ChestXrayDataset(val_list, metadata_df, base_path, transform=preprocess_299)
test_dataset = ChestXrayDataset(test_list, metadata_df, base_path, transform=preprocess_299)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

inception_v3 = models.inception_v3(weights=Inception_V3_Weights.IMAGENET1K_V1, aux_logits=True)
inception_v3 = inception_v3.to(device)

In [None]:
for epoch in range(num_epochs):
    inception_v3.train()
    running_loss = 0.0

    progress_bar = tqdm(train_loader, desc=f'Época {epoch+1}/{num_epochs}', unit='batch')

    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs, _ = inception_v3(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    print(f'Época {epoch+1}, Loss: {running_loss/len(train_loader)}')


Época 1/5:   0%|          | 0/2164 [00:46<?, ?batch/s]


KeyboardInterrupt: 

In [None]:
inception_v3.eval()
metrics = evaluate_model_multilabel(inception_v3, val_loader, device, num_classes)

# **ResNet50**

In [None]:
resnet50 = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
resnet50 = resnet50.to(device)

resnet50.fc = nn.Linear(resnet50.fc.in_features, num_classes).to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Windows/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 81.7MB/s]


In [None]:
train_dataset = ChestXrayDataset(train_list, metadata_df, base_path, transform=preprocess_224)
val_dataset = ChestXrayDataset(val_list, metadata_df, base_path, transform=preprocess_224)
test_dataset = ChestXrayDataset(test_list, metadata_df, base_path, transform=preprocess_224)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(resnet50.parameters(), lr=0.001)

loss_history = train_model(resnet50, train_loader, optimizer, criterion, num_epochs, device)

In [None]:
resnet50.eval()
metrics = evaluate_model_multilabel(resnet50, val_loader, device, num_classes)

 # **ResNet152**

In [None]:
resnet152 = models.resnet152(weights=models.ResNet152_Weights.IMAGENET1K_V1)
resnet152 = resnet152.to(device)

resnet152.fc = nn.Linear(resnet152.fc.in_features, num_classes).to(device)

In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(resnet152.parameters(), lr=0.001)

train_dataset = ChestXrayDataset(train_list, metadata_df, base_path, transform=preprocess_224)
val_dataset = ChestXrayDataset(val_list, metadata_df, base_path, transform=preprocess_224)
test_dataset = ChestXrayDataset(test_list, metadata_df, base_path, transform=preprocess_224)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
loss_history = train_model(resnet152, train_loader, optimizer, criterion, num_epochs, device)

In [None]:
resnet152.eval()
metrics = evaluate_model_multilabel(resnet152, val_loader, device, num_classes)