In [1]:
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [3]:
%pip install mlflow

Collecting mlflow
  Downloading mlflow-1.23.1-py3-none-any.whl (15.6 MB)
Collecting docker>=4.0.0
  Downloading docker-5.0.3-py2.py3-none-any.whl (146 kB)
Collecting querystring-parser
  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)
Collecting waitress
  Downloading waitress-2.0.0-py3-none-any.whl (56 kB)
Collecting Flask
  Downloading Flask-2.0.3-py3-none-any.whl (95 kB)
Collecting sqlparse>=0.3.1
  Using cached sqlparse-0.4.4-py3-none-any.whl (41 kB)
Collecting gitpython>=2.1.0
  Downloading GitPython-3.1.18-py3-none-any.whl (170 kB)
Collecting databricks-cli>=0.8.7
  Downloading databricks-cli-0.17.8.tar.gz (85 kB)
Collecting click>=7.0
  Using cached click-8.0.4-py3-none-any.whl (97 kB)
Collecting cloudpickle
  Downloading cloudpickle-2.2.1-py3-none-any.whl (25 kB)
Collecting prometheus-flask-exporter
  Downloading prometheus_flask_exporter-0.23.0-py3-none-any.whl (18 kB)
Collecting protobuf>=3.7.0
  Downloading protobuf-3.19.6-cp36-cp36m-win_amd64.whl (897 kB)

In [65]:
# Import Data Science Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import timm
import mlflow
import mlflow.pytorch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, classification_report, confusion_matrix
from sklearn.preprocessing import label_binarize

# System libraries
from pathlib import Path
import os.path
import random

# Visualization Libraries
import matplotlib.cm as cm
import cv2
import seaborn as sns
from PIL import Image

sns.set_style('darkgrid')

In [None]:
# Configuração do MLflow
mlflow.set_experiment("classification_birds_efficientnet")

In [5]:
BATCH_SIZE = 32
TARGET_SIZE = (224, 224)

In [6]:
data_path = r"C:\\Users\\Aldan\\Documents\\bird_classification\\100-bird-species"

In [7]:
# Carregar e preparar o dataset
def load_dataset(batch_size, data_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    train_dataset = datasets.ImageFolder(root=data_path + '\\train', transform=transform)
    val_dataset = datasets.ImageFolder(root=data_path + '\\valid', transform=transform)
    test_dataset = datasets.ImageFolder(root=data_path + '\\test', transform=transform)

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader


No Optuna, foi avaliado que o melhor modelo e os melhores hiperparâmetros foram:
Trial 84 finished with value: 96.06870229007633 and parameters: {'architecture': 'EfficientNetB0', 'num_units': 1024, 'dropout_rate': 0.26805261168394723, 'learning_rate': 0.0005527843132235871}. Best is trial 84 with value: 96.06870229007633.
Então repeti o mesmo modelo com os mesmo hiperparâmetros especificados no Optuna.

In [8]:
# treinamento do melhor modelo com melhores hiperparâmetros

# Definindo o modelo personalizado
class CustomModel(nn.Module):
    def __init__(self, num_units, dropout_rate, num_classes):
        super(CustomModel, self).__init__()
        self.base_model = timm.create_model('efficientnet_b0', pretrained=True)
        in_features = self.base_model.get_classifier().in_features
        self.base_model.reset_classifier(0)

        # Descongelando as últimas camadas
        for name, parameter in self.base_model.named_parameters():
            if 'blocks' in name and int(name.split('.')[1]) >= 5:
                parameter.requires_grad = True

        # Camadas personalizadas com os melhores hiperparâmetros
        self.new_layers = nn.Sequential(
            nn.Linear(in_features, num_units),
            nn.LeakyReLU(),
            nn.BatchNorm1d(num_units),
            nn.Dropout(dropout_rate),
            nn.Linear(num_units, num_units // 2),
            nn.LeakyReLU(),
            nn.BatchNorm1d(num_units // 2),
            nn.Linear(num_units // 2, num_classes)
        )

    def forward(self, x):
        x = self.base_model(x)
        x = self.new_layers(x)
        return x


In [9]:
# Função de Treinamento e Avaliação
def train_model(model, criterion, optimizer, train_loader, val_loader, epochs, device):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        val_loss = val_loss / len(val_loader)
        val_accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}, Train Loss: {train_loss}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}%')

        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("val_accuracy", val_accuracy, step=epoch)

    return train_loss, val_loss, val_accuracy

In [10]:
if torch.cuda.is_available():
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is NOT available")

GPU is available: NVIDIA GeForce RTX 4090


In [45]:
batch_size = 32
epochs = 25
learning_rate = 0.0005527843132235871
num_units = 1024
dropout_rate = 0.26805261168394723
num_classes = len(os.listdir(r"C:\\Users\\Aldan\\Documents\\bird_classification\\100-bird-species\\test"))
train_loader, val_loader, test_loader = load_dataset(batch_size, data_path)

model = CustomModel(num_units, dropout_rate, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adamax(model.parameters(), lr=learning_rate)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

with mlflow.start_run() as run:
    train_loss, val_loss, val_accuracy = train_model(model=model, train_loader=train_loader, val_loader=val_loader, criterion=criterion, optimizer=optimizer, epochs=epochs, device=device)

    mlflow.pytorch.log_model(model, "model")


Epoch 1, Train Loss: 1.7048035259161032, Validation Loss: 0.20657315109742852, Validation Accuracy: 95.26717557251908%
Epoch 2, Train Loss: 0.24952891094920535, Validation Loss: 0.11552301169055082, Validation Accuracy: 97.32824427480917%
Epoch 3, Train Loss: 0.11454417725608712, Validation Loss: 0.09825687585455342, Validation Accuracy: 97.09923664122137%
Epoch 4, Train Loss: 0.06230563949528997, Validation Loss: 0.09588073689643885, Validation Accuracy: 97.55725190839695%
Epoch 5, Train Loss: 0.03784968656228474, Validation Loss: 0.1003006163334315, Validation Accuracy: 97.63358778625954%
Epoch 6, Train Loss: 0.024817665897805426, Validation Loss: 0.09362123060933167, Validation Accuracy: 97.55725190839695%
Epoch 7, Train Loss: 0.0177147604837294, Validation Loss: 0.11349890893857596, Validation Accuracy: 97.40458015267176%
Epoch 8, Train Loss: 0.014479046307744945, Validation Loss: 0.10874568661192159, Validation Accuracy: 97.44274809160305%
Epoch 9, Train Loss: 0.012054186241617902

In [46]:
def save_model(model, path):
    torch.save(model.state_dict(), path)

In [47]:
model_path = r"C:\\Users\\Aldan\\Documents\\bird_classification\\model_efficientnet.pth"

In [48]:
# Salvando o modelo
save_model(model, model_path)

print(f"Modelo salvo em {model_path}")

Modelo salvo em C:\\Users\\Aldan\\Documents\\bird_classification\\model_efficientnet.pth


In [49]:
# Carregando o modelo treinado
model.load_state_dict(torch.load(r"C:\\Users\\Aldan\\Documents\\bird_classification\\model_efficientnet.pth"))
model.eval()

CustomModel(
  (base_model): EfficientNet(
    (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNormAct2d(
      32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNormAct2d(
            32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(32, 16, kernel_size

In [50]:
def calculate_metrics(outputs, labels, num_classes):
    # Binarize labels for multi-class ROC AUC
    labels_binarized = label_binarize(labels.cpu(), classes=np.arange(num_classes))

    # Compute probabilities
    probabilities = torch.nn.functional.softmax(outputs, dim=1).cpu().detach().numpy()

    # Calculate metrics
    accuracy = accuracy_score(labels.cpu(), np.argmax(probabilities, axis=1))
    precision, recall, f1, _ = precision_recall_fscore_support(labels.cpu(), np.argmax(probabilities, axis=1), average='macro')

    # ROC AUC per class, then average
    try:
        roc_auc = roc_auc_score(labels_binarized, probabilities, multi_class='ovr')
    except ValueError:
        roc_auc = float('nan')  # ROC AUC might not be applicable for a specific batch

    return accuracy, precision, recall, f1, roc_auc


In [75]:
def evaluate_model(model, test_loader, num_classes, device):
    model.to(device)
    model.eval()

    all_accuracy = []
    all_precision = []
    all_recall = []
    all_f1 = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            accuracy, precision, recall, f1, roc_auc = calculate_metrics(outputs, labels, num_classes)
            all_accuracy.append(accuracy)
            all_precision.append(precision)
            all_recall.append(recall)
            all_f1.append(f1)

    avg_accuracy = np.mean(all_accuracy)
    avg_precision = np.mean(all_precision)
    avg_recall = np.mean(all_recall)
    avg_f1 = np.mean(all_f1)

    return avg_accuracy, avg_precision, avg_recall, avg_f1

# Avaliar o modelo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

accuracy, precision, recall, f1 = evaluate_model(model, test_loader, num_classes, device)

# Log metrics with MLflow
with mlflow.start_run():
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

In [61]:
file_path = r'C:\\Users\\Aldan\\Documents\\bird_classification\\mlruns\\0\\c4551743e4594275be711686eb75b8d1\\metrics\\accuracy'

# Abrir o arquivo para leitura
with open(file_path, 'r') as file:
    content = file.read()

print(f"Acurácia: {content}")

Acurácia: 1703018413993 0.9916158536585366 0



In [54]:
file_path = r'C:\\Users\\Aldan\\Documents\\bird_classification\\mlruns\\0\\c4551743e4594275be711686eb75b8d1\\metrics\\f1_score'

# Abrir o arquivo para leitura
with open(file_path, 'r') as file:
    content = file.read()

print(f"F1-Score: {content}")

1703018413996 0.9647998935346498 0



In [56]:
file_path = r'C:\\Users\\Aldan\\Documents\\bird_classification\\mlruns\\0\\c4551743e4594275be711686eb75b8d1\\metrics\\precision'

# Abrir o arquivo para leitura
with open(file_path, 'r') as file:
    content = file.read()

print(f"Precisão: {content}")

1703018413994 0.9693428184281844 0



In [57]:
file_path = r'C:\\Users\\Aldan\\Documents\\bird_classification\\mlruns\\0\\c4551743e4594275be711686eb75b8d1\\metrics\\recall'

# Abrir o arquivo para leitura
with open(file_path, 'r') as file:
    content = file.read()

print(f"Recall: {content}")

1703018413995 0.9617039295392953 0



Avaliação das métricas:
**Desempenho Global**: O modelo demonstra um desempenho excepcional em todas as métricas. Uma acurácia de mais de 99% é rara em muitos cenários de classificação, especialmente em problemas com um grande número de classes, como na classificação de imagens de pássaros.

**Equilíbrio entre Precisão e Recall**: O F1-Score alto sugere um excelente equilíbrio entre precisão e recall. Isso indica que o modelo não só identifica corretamente a maioria das classes positivas (alta precisão), mas também captura a grande maioria das ocorrências positivas (alto recall).