In [90]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

In [91]:
import torch

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# Check the CUDA version used by PyTorch
print("PyTorch CUDA version:", torch.version.cuda)

# Check the installed GPU
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected")


CUDA available: True
PyTorch CUDA version: 12.6
GPU: NVIDIA GeForce RTX 4070


In [92]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image

def load_images_from_directory(directory, target_size=(256, 256)):
    images = []
    labels = []
    for label in os.listdir(directory):
        label_dir = os.path.join(directory, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = image.load_img(img_path, target_size=target_size)
                img_array = image.img_to_array(img)
                images.append(img_array)
                labels.append(label)
    return images, labels

# Load all images and labels
all_images, all_labels = load_images_from_directory(r"C:\Users\antoi\Documents\Nell_Antoine_Project\DATA")

# Split into training and validation datasets
train_images, validation_images, train_labels, validation_labels = train_test_split(
    all_images, all_labels, test_size=0.2, random_state=42
)

In [93]:
train_images = np.array(train_images)
train_labels = np.array(train_labels)
validation_images = np.array(validation_images)
validation_labels = np.array(validation_labels)

print(train_images.shape)
print(train_labels.shape)
print(validation_images.shape)
print(validation_labels.shape)

(2272, 256, 256, 3)
(2272,)
(569, 256, 256, 3)
(569,)


In [94]:
print(train_images[0])

[[[  0.   0.   0.]
  [  0.   0.   0.]
  [  0.   0.   0.]
  ...
  [  0.   0.   0.]
  [  0.   0.   0.]
  [  0.   0.   0.]]

 [[  0.   0.   0.]
  [  0.   0.   0.]
  [  0.   0.   0.]
  ...
  [  0.   0.   0.]
  [  0.   0.   0.]
  [  0.   0.   0.]]

 [[  0.   0.   0.]
  [  0.   0.   0.]
  [  0.   0.   0.]
  ...
  [  0.   0.   0.]
  [  0.   0.   0.]
  [  0.   0.   0.]]

 ...

 [[ 95.  84.  92.]
  [ 96.  85.  91.]
  [120. 109. 113.]
  ...
  [ 99. 131.  82.]
  [109. 136.  93.]
  [109. 136.  95.]]

 [[149. 135. 134.]
  [150. 136. 136.]
  [150. 136. 135.]
  ...
  [ 96. 129.  76.]
  [104. 131.  86.]
  [105. 132.  89.]]

 [[175. 163. 147.]
  [174. 162. 150.]
  [173. 160. 154.]
  ...
  [ 91. 124.  71.]
  [ 98. 126.  78.]
  [104. 131.  86.]]]


In [95]:
print(train_labels[:5])


['Collared_Dove' 'Wren' 'Starling' 'Collared_Dove' 'Long_Tailed_Tit']


In [58]:
print(set(train_labels))
print(len(set(train_labels)))

{np.str_('Bluetit'), np.str_('Jackdaw'), np.str_('Coal_Tit'), np.str_('Great_Tit'), np.str_('Carrion_Crow'), np.str_('Magpie'), np.str_('Robin'), np.str_('Wren'), np.str_('Goldfinch'), np.str_('Blackbird'), np.str_('House_Sparrow'), np.str_('Collared_Dove'), np.str_('Greenfinch'), np.str_('Dunnock'), np.str_('Song_Thrush'), np.str_('Chaffinch'), np.str_('Feral_Pigeon'), np.str_('Wood_Pigeon'), np.str_('Starling'), np.str_('Long_Tailed_Tit')}
20


In [75]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimplifiedCNNModel(nn.Module):
    def __init__(self):
        super(SimplifiedCNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Use a dummy input to calculate the size
        with torch.no_grad():
            dummy_input = torch.randn(1, 3, 256, 256)  # Batch size 1
            x = self.pool1(F.relu(self.conv1(dummy_input)))
            x = self.pool2(F.relu(self.conv2(x)))
            x = self.pool3(F.relu(self.conv3(x)))
            self.flatten_size = x.numel()

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(self.flatten_size, 256)
        self.fc2 = nn.Linear(256, 20)  # Assuming 20 classes
        
        # Initialize weights
        self._initialize_weights()
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0)
        
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # No softmax needed
        return x

# Example of model instantiation
model = SimplifiedCNNModel()
print(model)

# Move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


SimplifiedCNNModel(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=57600, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=20, bias=True)
)


SimplifiedCNNModel(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=57600, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=20, bias=True)
)

In [76]:
from torchsummary import summary
summary(model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 254, 254]             448
         MaxPool2d-2         [-1, 16, 127, 127]               0
            Conv2d-3         [-1, 32, 125, 125]           4,640
         MaxPool2d-4           [-1, 32, 62, 62]               0
            Conv2d-5           [-1, 64, 60, 60]          18,496
         MaxPool2d-6           [-1, 64, 30, 30]               0
           Flatten-7                [-1, 57600]               0
            Linear-8                  [-1, 256]      14,745,856
            Linear-9                   [-1, 20]           5,140
Total params: 14,774,580
Trainable params: 14,774,580
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.75
Forward/backward pass size (MB): 17.24
Params size (MB): 56.36
Estimated Total Size (MB): 74.35
----------------------------------

In [77]:
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For classification tasks
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer

In [78]:
from sklearn.preprocessing import LabelEncoder

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
validation_labels_encoded = label_encoder.transform(validation_labels)

In [79]:
print("Encoded labels:", train_labels_encoded[:5])
print("Encoded labels shape:", train_labels_encoded.shape)
print("Validation labels shape:", validation_labels_encoded.shape)
print("Number of classes:", len(set(train_labels_encoded)))

Encoded labels: [ 5 19 17  5 13]
Encoded labels shape: (2272,)
Validation labels shape: (569,)
Number of classes: 20


In [80]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Permute if in HWC format (i.e., (N, H, W, C))
if train_images.shape[-1] == 3:
    train_images = train_images.transpose(0, 3, 1, 2)  # (N, H, W, C) → (N, C, H, W)
    
# Normalize only when converting, avoid extra copies
train_images_tensor = torch.from_numpy(train_images).float().div(255)
train_labels_tensor = torch.from_numpy(train_labels_encoded).long()

train_dataset = TensorDataset(train_images_tensor, train_labels_tensor)
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)

if validation_images.shape[-1] == 3:
    validation_images = validation_images.transpose(0, 3, 1, 2)  # (N, H, W, C) → (N, C, H, W)

validation_images_tensor = torch.from_numpy(validation_images).float().div(255)
validation_labels_tensor = torch.from_numpy(validation_labels_encoded).long()

validation_dataset = TensorDataset(validation_images_tensor, validation_labels_tensor)
validation_dataloader = DataLoader(validation_dataset, batch_size=16, shuffle=False)


In [86]:
inputs, labels = next(iter(train_dataloader))
inputs, labels = inputs.to(device), labels.to(device)

for step in range(200):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    print(f"Step {step}, Loss: {loss.item()}")


Step 0, Loss: 3.0473928451538086
Step 1, Loss: 3.0458953380584717
Step 2, Loss: 3.0434937477111816
Step 3, Loss: 3.0402889251708984
Step 4, Loss: 3.0363733768463135
Step 5, Loss: 3.0318312644958496
Step 6, Loss: 3.026740074157715
Step 7, Loss: 3.0211706161499023
Step 8, Loss: 3.0151875019073486
Step 9, Loss: 3.008847951889038
Step 10, Loss: 3.0022060871124268
Step 11, Loss: 2.995309591293335
Step 12, Loss: 2.9882009029388428
Step 13, Loss: 2.980919122695923
Step 14, Loss: 2.9735004901885986
Step 15, Loss: 2.965975284576416
Step 16, Loss: 2.9583725929260254
Step 17, Loss: 2.9507174491882324
Step 18, Loss: 2.9430322647094727
Step 19, Loss: 2.935337543487549
Step 20, Loss: 2.927651882171631
Step 21, Loss: 2.9199893474578857
Step 22, Loss: 2.9123659133911133
Step 23, Loss: 2.9047932624816895
Step 24, Loss: 2.897282838821411
Step 25, Loss: 2.889843225479126
Step 26, Loss: 2.882484197616577
Step 27, Loss: 2.8752121925354004
Step 28, Loss: 2.8680336475372314
Step 29, Loss: 2.8609540462493896


In [89]:
inputs, labels = next(iter(train_dataloader))
print("Input shape:", inputs.shape)          # should be (16, 3, 256, 256)
print("Input min/max:", inputs.min(), inputs.max())  # should be ~0–1
print("Label shape:", labels.shape)          # should be (16,)
print("Label dtype:", labels.dtype)          # should be torch.long
print("Unique labels:", torch.unique(labels))


Input shape: torch.Size([16, 3, 256, 256])
Input min/max: tensor(0.) tensor(1.)
Label shape: torch.Size([16])
Label dtype: torch.int64
Unique labels: tensor([ 3,  4,  5,  7,  8,  9, 10, 11, 12, 13, 15, 16])


In [87]:
# Training loop
num_epochs = 1000

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    
    running_loss = 0.0
    
    for inputs, labels in train_dataloader:
        # Move data to GPU
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Compute loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_dataloader)}")
    #with torch.no_grad(): #doesn't interfere with training
    #    print(model(torch.randn(1, 3, 100, 100).to(device)))



Epoch 1/1000, Loss: 3.1519047072235966
Epoch 2/1000, Loss: 3.003735559087404
Epoch 3/1000, Loss: 2.9918064214813875
Epoch 4/1000, Loss: 2.9913947800515404
Epoch 5/1000, Loss: 2.9913294483238544
Epoch 6/1000, Loss: 2.992136364251795
Epoch 7/1000, Loss: 2.9918716323207803
Epoch 8/1000, Loss: 2.9913684499095865
Epoch 9/1000, Loss: 2.991327532580201
Epoch 10/1000, Loss: 2.991953309153167
Epoch 11/1000, Loss: 2.9914673012746893
Epoch 12/1000, Loss: 2.9914225474209855
Epoch 13/1000, Loss: 2.991028026795723


KeyboardInterrupt: 

In [83]:
print("Sample outputs:", outputs[0])


Sample outputs: tensor([ 0.0375,  0.0177, -0.1196, -0.0408,  0.0601,  0.0240,  0.0783, -0.4260,
         0.0589,  0.0623,  0.0538,  0.0540, -0.1837,  0.0886, -0.3779, -0.0043,
         0.0972, -0.0189,  0.0651,  0.1338], device='cuda:0',
       grad_fn=<SelectBackward0>)


In [84]:
# Evaluation
model.eval()

correct = 0
total = 0

with torch.no_grad():  # Disable gradients for evaluation
    for test_X, test_y in validation_dataloader:
        # Move data to GPU
        test_X, test_y = test_X.to(device), test_y.to(device)

        # Forward pass
        test_outputs = model(test_X)
        
        # Get predictions
        _, predicted = torch.max(test_outputs, 1)  # Get the class with highest probability
        
        # Update total and correct predictions
        correct += (predicted == test_y).sum().item()
        total += test_y.size(0)

# Compute final accuracy across all batches
accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 4.57%
