In [50]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
from torchvision import models
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

import joblib


In [19]:

PATH_TO_RINS = "/home/strazi" # REPLACE WITH YOUR PATH!!!

with open(f"{PATH_TO_RINS}/RINS-kappa/workspace/src/task_2s/data/relevant_species.txt") as f:
    relevant_species = f.read().splitlines()

relevant_species = [s.strip() for s in relevant_species if s.strip()]
relevant_species

['002.Laysan_Albatross',
 '012.Yellow_headed_Blackbird',
 '014.Indigo_Bunting',
 '025.Pelagic_Cormorant',
 '029.American_Crow',
 '033.Yellow_billed_Cuckoo',
 '035.Purple_Finch',
 '042.Vermilion_Flycatcher',
 '048.European_Goldfinch',
 '050.Eared_Grebe',
 '059.California_Gull',
 '068.Ruby_throated_Hummingbird',
 '073.Blue_Jay',
 '081.Pied_Kingfisher',
 '095.Baltimore_Oriole',
 '101.White_Pelican',
 '106.Horned_Puffin',
 '108.White_necked_Raven',
 '112.Great_Grey_Shrike',
 '118.House_Sparrow',
 '134.Cape_Glossy_Starling',
 '138.Tree_Swallow',
 '144.Common_Tern',
 '191.Red_headed_Woodpecker']

In [23]:
# Define paths
root_dir = f'{PATH_TO_RINS}/RINS-kappa/workspace/src/task_2s/data/CUB_200_2011/CUB_200_2011'
images_txt = f'{root_dir}/images.txt'
image_class_labels_txt = f'{root_dir}/image_class_labels.txt'
classes_txt = f'{root_dir}/classes.txt'
train_test_split_txt = f'{root_dir}/train_test_split.txt'
bounding_boxes_txt = f'{root_dir}/bounding_boxes.txt'

# Load files
images_df = pd.read_csv(images_txt, sep=' ', names=['image_id', 'image_name'])
labels_df = pd.read_csv(image_class_labels_txt, sep=' ', names=['image_id', 'class_id'])
classes_df = pd.read_csv(classes_txt, sep=' ', names=['class_id', 'class_name'])
split_df = pd.read_csv(train_test_split_txt, sep=' ', names=['image_id', 'is_training_image'])
bboxes_df = pd.read_csv(bounding_boxes_txt, sep=' ', names=['image_id', 'bbox_x', 'bbox_y', 'bbox_width', 'bbox_height'])

# Merge all together
df = images_df.merge(labels_df, on='image_id') \
              .merge(classes_df, on='class_id') \
              .merge(split_df, on='image_id') \
              .merge(bboxes_df, on='image_id')

# Add full image path (optional)
df['image_path'] = root_dir + '/images/' + df['image_name']

# Filter for relevant species
df = df[df['class_name'].isin(relevant_species)]

# Show the first few rows
print(df.head())


    image_id                                         image_name  class_id  \
60        61  002.Laysan_Albatross/Laysan_Albatross_0002_102...         2   
61        62  002.Laysan_Albatross/Laysan_Albatross_0003_103...         2   
62        63  002.Laysan_Albatross/Laysan_Albatross_0082_524...         2   
63        64  002.Laysan_Albatross/Laysan_Albatross_0044_784...         2   
64        65  002.Laysan_Albatross/Laysan_Albatross_0070_788...         2   

              class_name  is_training_image  bbox_x  bbox_y  bbox_width  \
60  002.Laysan_Albatross                  0   144.0    40.0       333.0   
61  002.Laysan_Albatross                  1   202.0    28.0       164.0   
62  002.Laysan_Albatross                  0    72.0    68.0       383.0   
63  002.Laysan_Albatross                  1    60.0   128.0       438.0   
64  002.Laysan_Albatross                  0    32.0    35.0       259.0   

    bbox_height                                         image_path  
60        165.0  

In [None]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['class_name'])
num_classes = len(le.classes_)
print(f'{num_classes} classes')
joblib.dump(le, f'{PATH_TO_RINS}/RINS-kappa/workspace/src/task_2s/models/label_encoder.pkl')

24 classes


In [37]:
train_df = df[df['is_training_image'] == 1]
test_df = df[df['is_training_image'] == 0]

In [38]:
def crop_to_bounding_box(image, bbox):
    x, y, w, h = bbox
    return image.crop((x, y, x + w, y + h))

# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Custom dataset
class BirdDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'image_path']
        label = self.df.loc[idx, 'label']
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

In [39]:
# Load datasets
train_dataset = BirdDataset(train_df, transform=transform)
test_dataset = BirdDataset(test_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [41]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load pretrained model
model = models.resnet18(pretrained=True)

# Replace the last layer
model.fc = nn.Linear(model.fc.in_features, num_classes)

model = model.to(device)



In [42]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    acc = 100. * correct / total
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Acc: {acc:.2f}%')


Epoch 1, Loss: 2.4766, Acc: 38.89%
Epoch 2, Loss: 0.7731, Acc: 95.97%
Epoch 3, Loss: 0.2773, Acc: 100.00%
Epoch 4, Loss: 0.1298, Acc: 100.00%
Epoch 5, Loss: 0.0783, Acc: 100.00%
Epoch 6, Loss: 0.0502, Acc: 100.00%
Epoch 7, Loss: 0.0365, Acc: 100.00%
Epoch 8, Loss: 0.0306, Acc: 100.00%
Epoch 9, Loss: 0.0241, Acc: 100.00%
Epoch 10, Loss: 0.0211, Acc: 100.00%


In [43]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f'Test Accuracy: {100. * correct / total:.2f}%')


Test Accuracy: 89.50%


In [None]:
path = f'{PATH_TO_RINS}/RINS-kappa/workspace/src/task_2s/models/bird_species_model.pth'
torch.save(model, path)