In [3]:
# Load Dataset
from pathlib import Path
import pandas as pd

ssw60df_path = Path("..") / "dataset" / "ssw60"

taxa_df_path = ssw60df_path / "taxa.csv"
images_inat_path = ssw60df_path / "images_inat.csv"
images_nabirds_path = ssw60df_path / "images_nabirds.csv"

taxa_df = pd.read_csv(taxa_df_path)
images_inat_df = pd.read_csv(images_inat_path)
images_nabirds_df = pd.read_csv(images_nabirds_path)

In [4]:
taxa_df.head()

Unnamed: 0,label,species_code,inat_taxon_id,inat2021_label,nabirds_labels,common_name,scientific_name,family,order
0,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes
1,1,wooduc,7107,3188,81 314 613,Wood Duck,Aix sponsa,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes
2,2,mallar3,6930,3201,102 317 616,Mallard,Anas platyrhynchos,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes
3,3,hoomer,7109,3250,225 335 634,Hooded Merganser,Lophodytes cucullatus,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes
4,4,moudov,3454,3547,171 529,Mourning Dove,Zenaida macroura,Columbidae (Pigeons and Doves),Columbiformes


In [12]:
images_inat_df.drop(columns=["rights_holder", "channels", "license_id"], inplace=True)
images_inat_df.head()

Unnamed: 0,asset_id,label,split,height,width
0,12097,0,test,500,375
1,15640,0,test,500,375
2,34672,0,test,375,500
3,78882,0,test,500,375
4,87025,0,test,323,500


In [11]:
images_nabirds_df.drop(columns=["photographer", "channels"], inplace=True)
images_nabirds_df.head()

Unnamed: 0,asset_id,label,split,height,width
0,021d62ce5f924d579e05bec9116c586b,0,test,796,1024
1,02b41e191e0d4729b61c8b98213c3281,0,test,1024,683
2,0660044959754a9caa049069ab3126d5,0,test,682,1024
3,1d59a904801240e3b77c30bd2408495a,0,test,585,640
4,258124efeb98443e8ff8429580be6786,0,test,683,1024


In [14]:
images_inat_plus_names_df = taxa_df.merge(images_inat_df, on="label")
images_inat_plus_names_df.head()

Unnamed: 0,label,species_code,inat_taxon_id,inat2021_label,nabirds_labels,common_name,scientific_name,family,order,asset_id,split,height,width
0,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,12097,test,500,375
1,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,15640,test,500,375
2,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,34672,test,375,500
3,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,78882,test,500,375
4,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,87025,test,323,500


In [15]:
images_nabirds_plus_names_df = taxa_df.merge(images_nabirds_df, on="label")
images_nabirds_plus_names_df.head()

Unnamed: 0,label,species_code,inat_taxon_id,inat2021_label,nabirds_labels,common_name,scientific_name,family,order,asset_id,split,height,width
0,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,021d62ce5f924d579e05bec9116c586b,test,796,1024
1,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,02b41e191e0d4729b61c8b98213c3281,test,1024,683
2,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,0660044959754a9caa049069ab3126d5,test,682,1024
3,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,1d59a904801240e3b77c30bd2408495a,test,585,640
4,0,cangoo,7089,3226,57 457,Canada Goose,Branta canadensis,"Anatidae (Ducks, Geese, and Waterfowl)",Anseriformes,258124efeb98443e8ff8429580be6786,test,683,1024


In [28]:
inat_image_path = ssw60df_path /  "images_inat"
images_inat_plus_names_df["filepath"] = images_inat_plus_names_df["asset_id"].apply(lambda x: str(inat_image_path / (str(x) + ".jpg")))

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

# 1. Custom Dataset Class
class BirdDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_path = row['filepath']
        image = Image.open(img_path).convert('RGB')
        label = row['label']
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

# 2. Data Transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 3. Prepare datasets
train_df = images_inat_plus_names_df[images_inat_plus_names_df['split'] == 'train'].reset_index(drop=True)
val_df = images_inat_plus_names_df[images_inat_plus_names_df['split'] == 'test'].reset_index(drop=True)

train_dataset = BirdDataset(train_df, transform=train_transform)
val_dataset = BirdDataset(val_df, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# 4. Model Setup (ResNet50 pretrained)
num_classes = len(taxa_df)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# 5. Training Setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# 6. Training Loop
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(loader), 100. * correct / total

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(loader), 100. * correct / total

# 7. Train
print("Kiwi != Victor")
num_epochs = 20
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    scheduler.step()
    
    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

# 8. Save Model
torch.save(model.state_dict(), 'bird_classifier.pth')

Exception ignored in: <function tqdm.__del__ at 0x7fd3c57e1300>
Traceback (most recent call last):
  File "/home/giovanni/ufmg/dlbird/experiments/.venv/lib/python3.13/site-packages/tqdm/std.py", line 1148, in __del__
    self.close()
  File "/home/giovanni/ufmg/dlbird/experiments/.venv/lib/python3.13/site-packages/tqdm/notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


cuda
Kiwi != Victor
Epoch 1/20
Epoch 1/20
Train Loss: 3.9534, Train Acc: 4.42%
Val Loss: 3.8115, Val Acc: 5.77%
Epoch 2/20
Epoch 2/20
Train Loss: 3.7414, Train Acc: 6.81%
Val Loss: 3.7011, Val Acc: 8.10%
Epoch 3/20
Epoch 3/20
Train Loss: 3.6216, Train Acc: 9.01%
Val Loss: 3.5541, Val Acc: 10.73%
Epoch 4/20
Epoch 4/20
Train Loss: 3.4858, Train Acc: 11.55%
Val Loss: 3.6649, Val Acc: 11.00%
Epoch 5/20
Epoch 5/20
Train Loss: 3.3503, Train Acc: 13.58%
Val Loss: 3.3317, Val Acc: 14.07%
Epoch 6/20
Epoch 6/20
Train Loss: 3.2242, Train Acc: 16.32%
Val Loss: 3.2122, Val Acc: 16.73%
Epoch 7/20
Epoch 7/20
Train Loss: 3.0869, Train Acc: 18.89%
Val Loss: 3.0637, Val Acc: 19.27%
Epoch 8/20
Epoch 8/20
Train Loss: 2.8196, Train Acc: 24.67%
Val Loss: 2.8411, Val Acc: 24.67%
Epoch 9/20
Epoch 9/20
Train Loss: 2.7333, Train Acc: 26.99%
Val Loss: 2.7897, Val Acc: 25.97%
Epoch 10/20
Epoch 10/20
Train Loss: 2.6562, Train Acc: 28.98%
Val Loss: 2.7524, Val Acc: 27.03%
Epoch 11/20
Epoch 11/20
Train Loss: 2.6141,

In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

# 1. Custom Dataset Class
class BirdDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_path = row['filepath']
        image = Image.open(img_path).convert('RGB')
        label = row['label']
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

# 2. Data Transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 3. Prepare datasets
train_df = images_inat_plus_names_df[images_inat_plus_names_df['split'] == 'train'].reset_index(drop=True)
val_df = images_inat_plus_names_df[images_inat_plus_names_df['split'] == 'test'].reset_index(drop=True)

train_dataset = BirdDataset(train_df, transform=train_transform)
val_dataset = BirdDataset(val_df, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# 4. Model Setup (ResNet50 pretrained)
num_classes = len(taxa_df)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = models.resnet50(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# 5. Training Setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# 6. Training Loop
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(loader), 100. * correct / total

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(loader), 100. * correct / total

# 7. Train
print("Kiwi != Victor")
num_epochs = 3
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    scheduler.step()
    
    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

# 8. Save Model
torch.save(model.state_dict(), 'bird_classifier_no_pretrained.pth')

cuda




Kiwi != Victor
Epoch 1/3
Epoch 1/3
Train Loss: 4.0648, Train Acc: 3.83%
Val Loss: 3.9352, Val Acc: 4.03%
Epoch 2/3
Epoch 2/3
Train Loss: 3.8515, Train Acc: 5.53%
Val Loss: 3.8234, Val Acc: 5.80%
Epoch 3/3
Epoch 3/3
Train Loss: 3.7725, Train Acc: 6.65%
Val Loss: 3.7190, Val Acc: 7.73%
