In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
import torch

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# Check the CUDA version used by PyTorch
print("PyTorch CUDA version:", torch.version.cuda)

# Check the installed GPU
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected")


CUDA available: True
PyTorch CUDA version: 12.6
GPU: NVIDIA GeForce RTX 4070


In [4]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image

def load_images_from_directory(directory, target_size=(1024, 1024)):
    images = []
    labels = []
    for label in os.listdir(directory):
        label_dir = os.path.join(directory, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = image.load_img(img_path, target_size=target_size)
                img_array = image.img_to_array(img)
                images.append(img_array)
                labels.append(label)
    return images, labels

# Load all images and labels
all_images, all_labels = load_images_from_directory(r"C:\Users\antoi\Documents\Nell_Antoine_Project\DATA")

# Split into training and validation datasets
train_images, validation_images, train_labels, validation_labels = train_test_split(
    all_images, all_labels, test_size=0.2, random_state=42
)

In [5]:
train_images = np.array(train_images)
train_labels = np.array(train_labels)
validation_images = np.array(validation_images)
validation_labels = np.array(validation_labels)

print(train_images.shape)
print(train_labels.shape)
print(validation_images.shape)
print(validation_labels.shape)

(2272, 1024, 1024, 3)
(2272,)
(569, 1024, 1024, 3)
(569,)


In [19]:
print(train_labels[:5])


['Collared_Dove' 'Wren' 'Starling' 'Collared_Dove' 'Long_Tailed_Tit']


In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3)
        self.conv5 = nn.Conv2d(128, 256, kernel_size=3)
        self.conv6 = nn.Conv2d(256, 512, kernel_size=3)
        self.conv7 = nn.Conv2d(512, 1024, kernel_size=3)
        
        self.pool = nn.MaxPool2d(2)
        self.dropout = nn.Dropout(0.5)  # Dropout with 50% probability
        
        # Use a dummy input to calculate the size
        with torch.no_grad():
            dummy_input = torch.randn(1, 3, 1024, 1024)  # Batch size 1
            x = self.pool(F.relu(self.conv1(dummy_input)))
            x = self.pool(F.relu(self.conv2(x)))
            x = self.pool(F.relu(self.conv3(x)))
            x = self.pool(F.relu(self.conv4(x)))
            x = self.pool(F.relu(self.conv5(x)))
            x = self.pool(F.relu(self.conv6(x)))
            x = self.pool(F.relu(self.conv7(x)))
            self.flatten_size = x.numel()

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(self.flatten_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 20)  # Assuming 20 classes
        # Use CrossEntropyLoss for multi-class classification
        
        # Initialize weights
        self._initialize_weights()
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = self.pool(F.relu(self.conv5(x)))
        x = self.pool(F.relu(self.conv6(x)))
        x = self.pool(F.relu(self.conv7(x)))
        x = self.flatten(x)
        x = self.dropout(F.relu(self.fc1(x)))  # Apply dropout after fc1
        x = self.dropout(F.relu(self.fc2(x)))  # Apply dropout after fc2
        x = self.fc3(x)  # No softmax needed
        return x

# Example of model instantiation
model = CNNModel()
print(model)

# Move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


CNNModel(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (conv6): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
  (conv7): Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=36864, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=20, bias=True)
)


CNNModel(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (conv6): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
  (conv7): Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=36864, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=20, bias=True)
)

In [23]:
from torchsummary import summary
summary(model, input_size=(3, 1024, 1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1       [-1, 16, 1022, 1022]             448
         MaxPool2d-2         [-1, 16, 511, 511]               0
            Conv2d-3         [-1, 32, 509, 509]           4,640
         MaxPool2d-4         [-1, 32, 254, 254]               0
            Conv2d-5         [-1, 64, 252, 252]          18,496
         MaxPool2d-6         [-1, 64, 126, 126]               0
            Conv2d-7        [-1, 128, 124, 124]          73,856
         MaxPool2d-8          [-1, 128, 62, 62]               0
            Conv2d-9          [-1, 256, 60, 60]         295,168
        MaxPool2d-10          [-1, 256, 30, 30]               0
           Conv2d-11          [-1, 512, 28, 28]       1,180,160
        MaxPool2d-12          [-1, 512, 14, 14]               0
           Conv2d-13         [-1, 1024, 12, 12]       4,719,616
        MaxPool2d-14           [-1, 102

In [24]:
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For classification tasks
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer

In [9]:
from sklearn.preprocessing import LabelEncoder

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
validation_labels_encoded = label_encoder.transform(validation_labels)

In [10]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Normalize only when converting, avoid extra copies
train_images_tensor = torch.from_numpy(train_images).float().div(255)
train_labels_tensor = torch.from_numpy(train_labels_encoded).long()

train_dataset = TensorDataset(train_images_tensor, train_labels_tensor)
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)

validation_images_tensor = torch.from_numpy(validation_images).float().div(255)
validation_labels_tensor = torch.from_numpy(validation_labels_encoded).long()

validation_dataset = TensorDataset(validation_images_tensor, validation_labels_tensor)
validation_dataloader = DataLoader(validation_dataset, batch_size=16, shuffle=False)


In [25]:
# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    
    running_loss = 0.0
    
    for inputs, labels in train_dataloader:
        # Move data to GPU
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Permute the dimensions of the input tensor
        inputs = inputs.permute(0, 3, 1, 2)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Compute loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_dataloader)}")
    #with torch.no_grad(): #doesn't interfere with training
    #    print(model(torch.randn(1, 3, 100, 100).to(device)))



Epoch 1/10, Loss: 402.8092208848873
Epoch 2/10, Loss: 2.9950731153219516
Epoch 3/10, Loss: 2.9930918535716096
Epoch 4/10, Loss: 2.992284482633564
Epoch 5/10, Loss: 2.992149274114152
Epoch 6/10, Loss: 2.9911839777315166
Epoch 7/10, Loss: 2.9914019225348887
Epoch 8/10, Loss: 2.99117413540961
Epoch 9/10, Loss: 2.9913894139545065
Epoch 10/10, Loss: 2.991161153350078


In [26]:
# Evaluation
model.eval()

correct = 0
total = 0

with torch.no_grad():  # Disable gradients for evaluation
    for test_X, test_y in validation_dataloader:
        # Move data to GPU
        test_X, test_y = test_X.to(device), test_y.to(device)
        
        # Fix shape (if images are in (batch, H, W, C) format)
        test_X = test_X.permute(0, 3, 1, 2)  

        # Forward pass
        test_outputs = model(test_X)
        
        # Get predictions
        _, predicted = torch.max(test_outputs, 1)  # Get the class with highest probability
        
        # Update total and correct predictions
        correct += (predicted == test_y).sum().item()
        total += test_y.size(0)

# Compute final accuracy across all batches
accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 4.22%
