In [11]:
# Citations
# GBIF.org (15 June 2025) GBIF Occurrence Download  https://doi.org/10.15468/dl.mhxdcy
# Data was obtained from GBIF-NZ, it includes records from 6 datasets: iNaturalist Research-grade Observations Observation.org, Nature data from around the World, Auckland Museum Land Vertebrates Collection, Xeno-canto - Bird sounds from around the world, NABU|naturgucker, MVZ Egg and Nest Collection (Arctos)
# There were 4258 occurances in the original dataset, but this was reduced to 4202 occurances as a result of data cleaning.
# Of the original 4258 occurances, 581 were takahe (Porphyrio hochstetteri) and 3621 were pukeko (Porphyrio melanotus subsp. melanotus).
# In the cleaned dataset 550 were takahe (Porphyrio hochstetteri) and 3557 were pukeko (Porphyrio melanotus subsp. melanotus).
# The data cleaning and downloading of the images was performed in Rstudio.
# Made using Python 3.12.9


In [12]:
# loading packages, using python 3.12.9
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset, random_split
from torchvision.datasets import ImageFolder
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets


In [13]:
with open("test_idx.txt", "r") as file:
    test_idx = [int(line.strip()) for line in file]

In [14]:
print(test_idx)

[624, 4, 3279, 1936, 2719, 3423, 3913, 2111, 2653, 950, 742, 2352, 1518, 996, 4098, 1794, 1990, 3761, 3422, 1882, 3100, 2899, 2979, 3361, 2061, 3299, 2697, 1805, 1950, 1209, 3921, 979, 1760, 803, 1444, 3434, 2330, 3057, 2987, 2959, 3773, 2301, 49, 3648, 1567, 2230, 2366, 511, 1334, 3883, 81, 1787, 3945, 949, 2446, 2128, 1400, 2264, 3292, 846, 3464, 1227, 2621, 2634, 2701, 3383, 2677, 2151, 934, 3145, 3587, 3264, 2211, 983, 2101, 1362, 3906, 3498, 3160, 2489, 1048, 1878, 1035, 2449, 1839, 3845, 2894, 2685, 985, 3949, 3712, 1328, 3594, 1499, 3679, 280, 3843, 569, 3904, 974, 1407, 1756, 1908, 2617, 2811, 1033, 1144, 2378, 496, 2739, 2131, 3641, 2087, 2830, 32, 967, 1366, 1317, 1657, 3213, 3889, 1164, 3297, 464, 2240, 1203, 2939, 2212, 969, 116, 1924, 4045, 3527, 1972, 992, 3531, 1027, 1078, 2976, 1537, 1174, 2213, 481, 3435, 4070, 3308, 3121, 1091, 542, 3777, 578, 3729, 3104, 1714, 970, 2958, 196, 1833, 3153, 1616, 746, 3310, 2748, 1510, 499, 1699, 1255, 1709, 529, 3672, 2568, 2204, 2584,

In [15]:
# Transforming the data by resizing the images, converting the images to tensors and normalizing the images
Transform_data = transforms.Compose([
    # Resizes images to 600 by 600 pixels
    transforms.Resize((600, 600)),
    # randomly horizontally flips images
    transforms.RandomHorizontalFlip(p = 0.1),
    # randomly rotates images
    transforms.RandomRotation(degrees = (0, 180)),
    # Randomly alters the visual appearance of the images during training
    transforms.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2, hue = 0.1),
    # randomly crops a random portion of an image
    # transforms.RandomResizedCrop(size = (128, 128)),
    # turns images into tensors
    transforms.ToTensor(),
    # Normalises the data
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# random data augmentation techniques are not used on the validation set to avoid overfitting and 
V_Transform_data = transforms.Compose([
    # Resizes images to 600 by 600 pixels
    transforms.Resize((600, 600)),
    # turns images into tensors
    transforms.ToTensor(),
    # Normalises the data
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [16]:
# Importing data and splitting the dataset into training, validation and testing sets
Images = ImageFolder(root = "Train_data", transform = None)

# Setting a seed to ensure reproducible results
generator = torch.Generator().manual_seed(0)
test_ds  = Subset(ImageFolder("Train_data", transform = V_Transform_data),  test_idx)
test_loader  = DataLoader(test_ds, batch_size=35, shuffle = False)


In [17]:
# Defining the CNN model
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # Feature extraction
        # convolution layer generates a feature map
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 3, stride = 2, padding = 1)
        # batch normalisation
        self.batch1 = nn.BatchNorm2d(16, momentum = 0.1, affine = True) #  momentum and affine are set to default,
        # Using stride = 2 for first conv, then pooling on 2nd conv, then stride = 2 for thrid conv

        self.conv2 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 3, stride = 1)
        self.batch2 = nn.BatchNorm2d(32, momentum = 0.1, affine = True) #  momentum and affine are set to default
        # reduce the size of the feature map
        self.pool = nn.MaxPool2d(kernel_size = 5, stride = 5, padding = 0, ceil_mode = True)

        self.conv3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 2)
        self.batch3 = nn.BatchNorm2d(64, momentum = 0.1, affine = True) #  momentum and affine are set to default
        
        self.pool2 = nn.AdaptiveAvgPool2d((1, 1))

        # applies a linear transformation to the data
        self.fc1 = nn.Linear(in_features = 64, out_features = 32)
        self.batchfc1 = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(in_features = 32, out_features = 16)
        self.batchfc2 = nn.BatchNorm1d(16)
        self.fc3 = nn.Linear(in_features = 16, out_features = 8)
        self.batchfc3 = nn.BatchNorm1d(8)
        # out_features corresponds to the number of classes
        self.fc4 = nn.Linear(in_features = 8, out_features = 2)

    def forward(self, x):
        # Note that printing is for debugging
        #print(x.shape)
        # blocks: conv, batch, relu, pooling
        x = F.relu(self.pool(self.batch1(self.conv1(x))))
        #print(x.shape)
        x = F.relu(self.batch2(self.conv2(x)))
        #print(x.shape)
        x = F.relu(self.pool(self.batch3(self.conv3(x))))
        #print(x.shape)
        # Flattens input 'x' by reshaping it into a one dimensional tensor.
        x = self.pool2(x)
        #print("just pooled")
        #print(x.shape)
        x = torch.flatten(x, 1)
        # linear
        #print(x.shape)
        #print("just flattened")
        x = F.relu(self.batchfc1(self.fc1(x)))
        #print("lin 1")
        x = F.relu(self.batchfc2(self.fc2(x)))
        x = F.relu(self.batchfc3(self.fc3(x)))
        x = self.fc4(x)
        return x



model1 = Net()

In [20]:
# loading trained model
model = model1
model.load_state_dict(torch.load("Models/model.pth"))
model.eval()

Net(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (batch1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (batch2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=5, stride=5, padding=0, dilation=1, ceil_mode=True)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
  (batch3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc1): Linear(in_features=64, out_features=32, bias=True)
  (batchfc1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=32, out_features=16, bias=True)
  (batchfc2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=16, out_features=8, bias=True)
  (batchfc3): BatchNo

In [21]:
classes = ("Pukeko", "Takahe")

In [22]:
# Checking the overall accuracy of the model on the Test set.
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the Test images: {100 * correct // total} %')


Accuracy of the network on the Test images: 80 %


In [23]:
# Checking the accuracy for each class on the Test set.
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1

for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')


Accuracy for class: Pukeko is 86.7 %
Accuracy for class: Takahe is 43.5 %
