In [1]:
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
%pip install mlflow

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Import Data Science Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torchvision import models
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import mlflow
import mlflow.pytorch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, classification_report, confusion_matrix
from sklearn.preprocessing import label_binarize

# System libraries
from pathlib import Path
import os.path
import random

# Visualization Libraries
import matplotlib.cm as cm
import cv2
import seaborn as sns
from PIL import Image

sns.set_style('darkgrid')

In [4]:
# Configuração do MLflow
mlflow.set_experiment("classification_birds_efficientnet")

2024/01/24 15:26:51 INFO mlflow.tracking.fluent: Experiment with name 'classification_birds_efficientnet' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:/Users/aldan/Documents/GitHub/aves/mlruns/571519730754704629', creation_time=1706120811919, experiment_id='571519730754704629', last_update_time=1706120811919, lifecycle_stage='active', name='classification_birds_efficientnet', tags={}>

In [5]:
BATCH_SIZE = 32
TARGET_SIZE = (224, 224)

In [6]:
data_path = r"C:\\Users\\aldan\\Documents\\GitHub\\aves\\100-bird-species"

In [7]:
# Carregar e preparar o dataset
def load_dataset(batch_size, data_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    train_dataset = datasets.ImageFolder(root=data_path + '\\train', transform=transform)
    val_dataset = datasets.ImageFolder(root=data_path + '\\valid', transform=transform)
    test_dataset = datasets.ImageFolder(root=data_path + '\\test', transform=transform)

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader


No Optuna, foi avaliado que o melhor modelo e os melhores hiperparâmetros foram:
Melhores hiperparâmetros: {'architecture': 'EfficientNetV2_S', 'num_units': 256, 'dropout_rate': 0.26593048478145276, 'learning_rate': 0.0005402792841480648}
Então repeti o mesmo modelo com os mesmo hiperparâmetros especificados no Optuna.

In [8]:
# treinamento do melhor modelo com melhores hiperparâmetros

import torch.nn as nn
from torchvision import models

class CustomModel(nn.Module):
    def __init__(self, num_units, dropout_rate, num_classes):
        super(CustomModel, self).__init__()
        
        # Usando efficientnet_v2_s do torchvision
        self.base_model = models.efficientnet_v2_s(pretrained=True)
        in_features = self.base_model.classifier[1].in_features
        self.base_model.classifier = nn.Identity()

        # Camadas personalizadas com os melhores hiperparâmetros
        self.new_layers = nn.Sequential(
            nn.Linear(in_features, num_units),
            nn.LeakyReLU(),
            nn.BatchNorm1d(num_units),
            nn.Dropout(dropout_rate),
            nn.Linear(num_units, num_units // 2),
            nn.LeakyReLU(),
            nn.BatchNorm1d(num_units // 2),
            nn.Linear(num_units // 2, num_classes)
        )

    def forward(self, x):
        x = self.base_model(x)
        x = x.view(x.size(0), -1)  # Achatar a saída para a camada linear
        x = self.new_layers(x)
        return x



In [9]:
# Função de Treinamento e Avaliação
def train_model(model, criterion, optimizer, train_loader, val_loader, epochs, device):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        val_loss = val_loss / len(val_loader)
        val_accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}, Train Loss: {train_loss}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}%')

        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("val_accuracy", val_accuracy, step=epoch)

    return train_loss, val_loss, val_accuracy

In [10]:
def save_model(model, path):
    torch.save(model.state_dict(), path)

In [11]:
def calculate_metrics(outputs, labels, num_classes):
    # Binarize labels for multi-class ROC AUC
    labels_binarized = label_binarize(labels.cpu(), classes=np.arange(num_classes))

    # Compute probabilities
    probabilities = torch.nn.functional.softmax(outputs, dim=1).cpu().detach().numpy()

    # Calculate metrics
    accuracy = accuracy_score(labels.cpu(), np.argmax(probabilities, axis=1))
    precision, recall, f1, _ = precision_recall_fscore_support(labels.cpu(), np.argmax(probabilities, axis=1), average='macro')

    # ROC AUC per class, then average
    try:
        roc_auc = roc_auc_score(labels_binarized, probabilities, multi_class='ovr')
    except ValueError:
        roc_auc = float('nan')  # ROC AUC might not be applicable for a specific batch

    return accuracy, precision, recall, f1, roc_auc


In [12]:
def evaluate_model(model, test_loader, num_classes, device):
    model.to(device)
    model.eval()

    all_accuracy = []
    all_precision = []
    all_recall = []
    all_f1 = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            accuracy, precision, recall, f1, roc_auc = calculate_metrics(outputs, labels, num_classes)
            all_accuracy.append(accuracy)
            all_precision.append(precision)
            all_recall.append(recall)
            all_f1.append(f1)

    avg_accuracy = np.mean(all_accuracy)
    avg_precision = np.mean(all_precision)
    avg_recall = np.mean(all_recall)
    avg_f1 = np.mean(all_f1)

    return avg_accuracy, avg_precision, avg_recall, avg_f1


In [13]:
if torch.cuda.is_available():
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is NOT available")

GPU is available: NVIDIA GeForce RTX 4060 Laptop GPU


In [14]:
batch_size = 32
epochs = 10
learning_rate = 0.0005402792841480648
num_units = 256
dropout_rate = 0.26593048478145276
num_classes = len(os.listdir(data_path + "\\test"))
train_loader, val_loader, test_loader = load_dataset(batch_size, data_path)

model = CustomModel(num_units, dropout_rate, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adamax(model.parameters(), lr=learning_rate)

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda")
model.to(device)

model_path = r"C:\\Users\\aldan\\Documents\\GitHub\\aves\\model_efficientnet.pth"

with mlflow.start_run() as run:
    train_loss, val_loss, val_accuracy = train_model(model=model, train_loader=train_loader, val_loader=val_loader, criterion=criterion, optimizer=optimizer, epochs=epochs, device=device)

    # Salvando o modelo após o treinamento
    save_model(model, model_path)
    print(f"Modelo salvo em {model_path}")
    
    mlflow.pytorch.log_model(model, "model")

    # Avaliação do modelo
    accuracy, precision, recall, f1 = evaluate_model(model, test_loader, num_classes, device)
    
    # Log das métricas de avaliação
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)


Epoch 1, Train Loss: 2.923011757546302, Validation Loss: 0.4082431171725436, Validation Accuracy: 93.58778625954199%
Epoch 2, Train Loss: 0.39966158591859946, Validation Loss: 0.14332761348611334, Validation Accuracy: 96.6412213740458%
Epoch 3, Train Loss: 0.18162768432503623, Validation Loss: 0.14232785460857175, Validation Accuracy: 96.56488549618321%
Epoch 4, Train Loss: 0.10990879824385047, Validation Loss: 0.12753725897452636, Validation Accuracy: 96.87022900763358%
Epoch 5, Train Loss: 0.07495152779914806, Validation Loss: 0.10787668542101661, Validation Accuracy: 97.44274809160305%
Epoch 6, Train Loss: 0.05785674428995911, Validation Loss: 0.11463000730713602, Validation Accuracy: 97.36641221374046%
Epoch 7, Train Loss: 0.04383302144547501, Validation Loss: 0.12038464795602145, Validation Accuracy: 97.44274809160305%
Epoch 8, Train Loss: 0.03762973450405601, Validation Loss: 0.13419054494815788, Validation Accuracy: 97.13740458015268%
Epoch 9, Train Loss: 0.03271710265460459, Va

In [None]:
# Carregando o modelo treinado
model.load_state_dict(torch.load(r"C:\\Users\\aldan\\Documents\\GitHub\\aves\\model_efficientnet.pth"))
model.eval()

CustomModel(
  (base_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): FusedMBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        )
        (1): FusedMBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (1): BatchNorm

In [15]:
file_path = r'C:\\Users\\aldan\\Documents\\GitHub\\aves\\mlruns\\571519730754704629\\f75a9a4805114a5392569df25ecc48ad\\metrics\\accuracy'

# Abrir o arquivo para leitura
with open(file_path, 'r') as file:
    content = file.read()

print(f"Acurácia do teste: {content}")

Acurácia do teste: 1706128107344 0.9885670731707317 0



In [16]:
file_path = r'C:\\Users\\aldan\\Documents\\GitHub\\aves\\mlruns\\571519730754704629\\f75a9a4805114a5392569df25ecc48ad\\metrics\\f1_score'

# Abrir o arquivo para leitura
with open(file_path, 'r') as file:
    content = file.read()

print(f"F1-Score: {content}")

F1-Score: 1706128107349 0.954516966021031 0



In [17]:
file_path = r'C:\\Users\\aldan\\Documents\\GitHub\\aves\\mlruns\\571519730754704629\\f75a9a4805114a5392569df25ecc48ad\\metrics\\precision'

# Abrir o arquivo para leitura
with open(file_path, 'r') as file:
    content = file.read()

print(f"Precisão: {content}")

Precisão: 1706128107346 0.9596617305458769 0



In [28]:
file_path = r'C:\\Users\\aldan\\Documents\\GitHub\\aves\\mlruns\\571519730754704629\\f75a9a4805114a5392569df25ecc48ad\\metrics\\recall'

# Abrir o arquivo para leitura
with open(file_path, 'r') as file:
    content = file.read()

print(f"Recall: {content}")

Recall: 1706043547536 0.9436636178861788 0



In [18]:
!pip list

Package             Version
------------------- ------------
alembic             1.13.1
asttokens           2.4.1
backcall            0.2.0
blinker             1.7.0
Brotli              1.0.9
certifi             2023.11.17
cffi                1.16.0
chardet             4.0.0
charset-normalizer  3.3.2
click               8.1.7
cloudpickle         3.0.0
colorama            0.4.6
colorlog            6.8.0
comm                0.2.1
contourpy           1.1.1
cryptography        41.0.7
cycler              0.12.1
databricks-cli      0.18.0
debugpy             1.6.7
decorator           5.1.1
docker              6.1.3
entrypoints         0.4
et-xmlfile          1.1.0
executing           2.0.1
filelock            3.13.1
Flask               3.0.1
fonttools           4.47.2
fsspec              2023.12.2
gitdb               4.0.11
GitPython           3.1.41
gmpy2               2.1.2
greenlet            3.0.3
idna                3.4
importlib-metadata  7.0.1
importlib-resources 6.1.1
ipykernel      

Avaliação das métricas:
**Desempenho Global**: O modelo demonstra um desempenho excepcional em todas as métricas. Uma acurácia de mais de 98% é rara em muitos cenários de classificação, especialmente em problemas com um grande número de classes, como na classificação de imagens de pássaros.

**Equilíbrio entre Precisão e Recall**: O F1-Score alto sugere um excelente equilíbrio entre precisão e recall. Isso indica que o modelo não só identifica corretamente a maioria das classes positivas (alta precisão), mas também captura a grande maioria das ocorrências positivas (alto recall).