In [1]:
import pandas as pd
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os
from PIL import Image
# to install pytorch, follow instructions on https://pytorch.org/get-started/locally/
# if CUDA is installed, this should allow GPU training
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
# -> pip install torchsummary
from torchsummary import summary

In [2]:
import os
from PIL import Image
from sklearn.model_selection import train_test_split
import numpy as np

# Path to expanded dataset
dataset_dir = r"C:\Users\antoi\Documents\Nell_Antoine_Project\DATA"

# Step 1: Load images and labels
def load_images_from_directory(directory, target_size=(1024, 1024)):
    images = []
    labels = []
    class_names = sorted(os.listdir(directory))  # Ensure label order is consistent
    class_to_index = {name: idx for idx, name in enumerate(class_names)}

    for class_name in class_names:
        class_path = os.path.join(directory, class_name)
        if not os.path.isdir(class_path):
            continue

        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            try:
                img = Image.open(img_path).convert("RGB")
                img = img.resize(target_size)
                images.append(np.array(img))
                labels.append(class_to_index[class_name])
            except Exception as e:
                print(f"Skipping {img_path}: {e}")

    return np.array(images), np.array(labels), class_to_index

images, labels, label_map = load_images_from_directory(dataset_dir)

# Step 2: Split into training and validation sets
train_images, val_images, train_labels, val_labels = train_test_split(
    images, labels, test_size=0.2, stratify=labels, random_state=42
)

print("Dataset split complete:")
print(f"Train set: {len(train_images)} images")
print(f"Validation set: {len(val_images)} images")
print("Label map:", label_map)

Dataset split complete:
Train set: 2272 images
Validation set: 569 images
Label map: {'Blackbird': 0, 'Bluetit': 1, 'Carrion_Crow': 2, 'Chaffinch': 3, 'Coal_Tit': 4, 'Collared_Dove': 5, 'Dunnock': 6, 'Feral_Pigeon': 7, 'Goldfinch': 8, 'Great_Tit': 9, 'Greenfinch': 10, 'House_Sparrow': 11, 'Jackdaw': 12, 'Long_Tailed_Tit': 13, 'Magpie': 14, 'Robin': 15, 'Song_Thrush': 16, 'Starling': 17, 'Wood_Pigeon': 18, 'Wren': 19}


In [None]:
# Define a set of data augmentation transformations
transform = transforms.Compose([
    # transforms.RandomRotation(20, fill=(255, 255, 255)),  # Random rotation with white border
    transforms.RandomAffine(20, translate=(0.2, 0.2), fill=(255, 255, 255)),  # Random shifts with white border
    transforms.RandomHorizontalFlip(),  # Random horizontal flips
    transforms.ColorJitter(brightness=(0.8, 1.2)),  # Random brightness adjustment
    transforms.ToTensor()  # Convert image to tensor
])



# Apply the transformations to the training dataset
class AugmentedDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.fromarray(self.images[idx].astype('uint8'))
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Create the augmented dataset
augmented_dataset = AugmentedDataset(train_images, train_labels, transform=transform)

In [None]:
class BirdSpeciesCNNModel(nn.Module):
    def __init__(self):
        super(BirdSpeciesCNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling after conv1

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling after conv2

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling after conv3

        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling after conv4

        # Use a dummy input to calculate the size
        with torch.no_grad():
            dummy_input = torch.randn(1, 3, 256, 256)  # Batch size 1
            x = F.relu(self.conv1(dummy_input))
            x = self.pool1(x)
            x = F.relu(self.conv2(x))
            x = self.pool2(x)
            x = F.relu(self.conv3(x))
            x = self.pool3(x)
            x = F.relu(self.conv4(x))
            x = self.pool4(x)
            self.flatten_size = x.numel()

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(self.flatten_size, 256)
        self.dropout = nn.Dropout(p=0.5)  # Add dropout layer with 50% probability
        self.fc2 = nn.Linear(256, 20)  # Assuming 20 classes
        
        # Initialize weights
        self._initialize_weights()
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = F.relu(self.conv4(x))
        x = self.pool4(x)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout
        x = self.fc2(x)  # No softmax needed
        return x

# Example of model instantiation
model = SimplifiedCNNModel()
print(model)

# Move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
