In [1]:
import os
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
!pip install scikit-learn



In [3]:
import shutil
import random

In [4]:
import shutil
import random

In [5]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [6]:
import torch
import torch.nn as nn
from torchvision import models

In [7]:
import torch
from sklearn.metrics import f1_score, precision_score, recall_score
import torch.nn.functional as F

In [8]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [9]:
import numpy as np

In [52]:
train_dir = "D:/Downloads/archive/chest_xray/train"
val_dir = "D:/Downloads/archive/chest_xray/val"
test_dir = "D:/Downloads/archive/chest_xray/test"

In [53]:
chest_xray_dir = os.path.join(train_dir, 'chest_xray')

In [54]:
train_dir = os.path.join(chest_xray_dir, 'train')
val_dir = os.path.join(chest_xray_dir, 'val')
test_dir = os.path.join(chest_xray_dir, 'test')

In [55]:
def plot_class_distribution(data, title):
    classes = list(data.keys())
    counts = list(data.values())
    
    plt.figure(figsize=(6, 4))
    plt.bar(classes, counts, color=['blue', 'orange'])
    plt.title(title)
    plt.xlabel('Classes')
    plt.ylabel('Número de Imagens')
    plt.show()

In [56]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device='cpu'):
    history = {'train_loss': [], 'val_loss': [],
               'train_accuracy': [], 'val_accuracy': [],
               'train_f1': [], 'val_f1': [],
               'train_precision': [], 'val_precision': [],
               'train_recall': [], 'val_recall': []}
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 20)
        
        model.train()
        running_loss = 0.0
        all_preds = []
        all_labels = []
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
        train_loss = running_loss / len(train_loader)
        train_f1 = f1_score(all_labels, all_preds, average='weighted')
        train_precision = precision_score(all_labels, all_preds, average='weighted')
        train_recall = recall_score(all_labels, all_preds, average='weighted')
        train_accuracy = (torch.tensor(all_preds) == torch.tensor(all_labels)).sum().item() / len(all_labels)
        
        history['train_loss'].append(train_loss)
        history['train_accuracy'].append(train_accuracy)
        history['train_f1'].append(train_f1)
        history['train_precision'].append(train_precision)
        history['train_recall'].append(train_recall)

        model.eval()
        val_loss = 0.0
        all_val_preds = []
        all_val_labels = []
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                _, preds = torch.max(outputs, 1)
                all_val_preds.extend(preds.cpu().numpy())
                all_val_labels.extend(labels.cpu().numpy())
        
        val_loss = val_loss / len(val_loader)
        val_f1 = f1_score(all_val_labels, all_val_preds, average='weighted')
        val_precision = precision_score(all_val_labels, all_val_preds, average='weighted')
        val_recall = recall_score(all_val_labels, all_val_preds, average='weighted')
        val_accuracy = (torch.tensor(all_val_preds) == torch.tensor(all_val_labels)).sum().item() / len(all_val_labels)
        
        history['val_loss'].append(val_loss)
        history['val_accuracy'].append(val_accuracy)
        history['val_f1'].append(val_f1)
        history['val_precision'].append(val_precision)
        history['val_recall'].append(val_recall)

        print(f'Treino: Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, F1-score: {train_f1:.4f}, Precision: {train_precision:.4f}, Recall: {train_recall:.4f}')
        print(f'Validação: Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}, F1-score: {val_f1:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}')
        print()
    
    return history

In [57]:
def evaluate_model_on_test(model, test_loader, criterion, device='cpu'):
    model.eval()  
    
    test_loss = 0.0
    all_test_preds = []
    all_test_labels = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            
            _, preds = torch.max(outputs, 1)
            all_test_preds.extend(preds.cpu().numpy())
            all_test_labels.extend(labels.cpu().numpy())

    test_loss = test_loss / len(test_loader)
    test_f1 = f1_score(all_test_labels, all_test_preds, average='weighted')
    test_precision = precision_score(all_test_labels, all_test_preds, average='weighted')
    test_recall = recall_score(all_test_labels, all_test_preds, average='weighted')
    test_accuracy = (torch.tensor(all_test_preds) == torch.tensor(all_test_labels)).sum().item() / len(all_test_labels)

    print(f'Teste: Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.4f}, F1-score: {test_f1:.4f}, Precision: {test_precision:.4f}, Recall: {test_recall:.4f}')
    
    return test_loss, test_accuracy, test_f1, test_precision, test_recall

In [58]:
def plot_metrics(history):
    epochs = range(1, len(history['train_loss']) + 1)
    
    # Loss
    plt.figure(figsize=(12, 6))
    plt.subplot(2, 3, 1)
    plt.plot(epochs, history['train_loss'], label='Treino Loss')
    plt.plot(epochs, history['val_loss'], label='Validação Loss')
    plt.title('Loss')
    plt.legend()

    # Accuracy
    plt.subplot(2, 3, 2)
    plt.plot(epochs, history['train_accuracy'], label='Treino Accuracy')
    plt.plot(epochs, history['val_accuracy'], label='Validação Accuracy')
    plt.title('Acurácia')
    plt.legend()

    # F1-Score
    plt.subplot(2, 3, 3)
    plt.plot(epochs, history['train_f1'], label='Treino F1-Score')
    plt.plot(epochs, history['val_f1'], label='Validação F1-Score')
    plt.title('F1-Score')
    plt.legend()

    # Precision
    plt.subplot(2, 3, 4)
    plt.plot(epochs, history['train_precision'], label='Treino Precision')
    plt.plot(epochs, history['val_precision'], label='Validação Precision')
    plt.title('Precision')
    plt.legend()

    # Recall
    plt.subplot(2, 3, 5)
    plt.plot(epochs, history['train_recall'], label='Treino Recall')
    plt.plot(epochs, history['val_recall'], label='Validação Recall')
    plt.title('Recall')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [59]:
def evaluate_and_plot_confusion_matrix(model, test_loader, device='cpu'):
    model.eval()  
    all_test_preds = []
    all_test_labels = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            
            _, preds = torch.max(outputs, 1)
            all_test_preds.extend(preds.cpu().numpy())
            all_test_labels.extend(labels.cpu().numpy())

    cm = confusion_matrix(all_test_labels, all_test_preds)
    
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Normal', 'Pneumonia'], yticklabels=['Normal', 'Pneumonia'])
    plt.title('Confusion Matrix - Test Set')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

In [60]:
def plot_confusion_matrix_validation(model, val_loader, device='cpu'):
    model.eval()  
    all_val_preds = []
    all_val_labels = []
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            
            _, preds = torch.max(outputs, 1)
            all_val_preds.extend(preds.cpu().numpy())
            all_val_labels.extend(labels.cpu().numpy())

    cm = confusion_matrix(all_val_labels, all_val_preds)
    
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Normal', 'Pneumonia'], yticklabels=['Normal', 'Pneumonia'])
    plt.title('Confusion Matrix - Validation Set')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

In [61]:
def show_images(dataset_dir, class_name, num_images=5):
    class_path = os.path.join(dataset_dir, class_name)
    images = os.listdir(class_path)[:num_images]
    
    plt.figure(figsize=(12, 6))
    for i, img_name in enumerate(images):
        img_path = os.path.join(class_path, img_name)
        img = Image.open(img_path)
        plt.subplot(1, num_images, i + 1)
        plt.imshow(img, cmap='gray')
        plt.title(class_name)
        plt.axis('off')
    plt.show()

In [62]:
def plot_histogram_of_intensities(dataset_dir, class_name, num_images=5):
    class_path = os.path.join(dataset_dir, class_name)
    images = os.listdir(class_path)[:num_images]
    
    plt.figure(figsize=(10, 5))
    for i, img_name in enumerate(images):
        img_path = os.path.join(class_path, img_name)
        img = Image.open(img_path).convert('L')  
        img_np = np.array(img).flatten()  
        
        plt.subplot(1, num_images, i + 1)
        plt.hist(img_np, bins=50, color='blue', alpha=0.7)
        plt.title(f'Histograma - {class_name} {i+1}')
        plt.xlabel('Valor de Pixel')
        plt.ylabel('Frequência')
    plt.tight_layout()
    plt.show()

In [63]:
train_classes = os.listdir(train_dir)
val_classes = os.listdir(val_dir)
test_classes = os.listdir(test_dir)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'D:/Downloads/archive/chest_xray/train\\chest_xray\\train'

In [64]:
print(f'Classes in training: {train_classes}')
print(f'Classes em validation: {val_classes}')
print(f'Classes em test: {test_classes}')

NameError: name 'train_classes' is not defined