In [37]:
# Citations
# GBIF.org (15 June 2025) GBIF Occurrence Download  https://doi.org/10.15468/dl.mhxdcy
# Data was obtained from GBIF-NZ, it includes records from 6 datasets: iNaturalist Research-grade Observations Observation.org, Nature data from around the World, Auckland Museum Land Vertebrates Collection, Xeno-canto - Bird sounds from around the world, NABU|naturgucker, MVZ Egg and Nest Collection (Arctos)
# There were 4258 occurances in the original dataset, but this was reduced to 4202 occurances as a result of data cleaning.
# Of the original 4258 occurances, 581 were takahe (Porphyrio hochstetteri) and 3621 were pukeko (Porphyrio melanotus subsp. melanotus).
# In the cleaned dataset 550 were takahe (Porphyrio hochstetteri) and 3557 were pukeko (Porphyrio melanotus subsp. melanotus).
# The data cleaning and downloading of the images was performed in Rstudio.
# Made using Python 3.12.9


In [38]:
# Loading packages
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset, random_split, Dataset, TensorDataset
from torchvision.datasets import ImageFolder
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from torchvision.models import efficientnet_b3, EfficientNet_B3_Weights


In [None]:
# Transforming the data by resizing the images, converting the images to tensors and normalizing the images
Transform_data = transforms.Compose([
    # Resizes images to 600 by 600 pixels
    transforms.Resize((600, 600)),
    # randomly horizontally flips images
    transforms.RandomHorizontalFlip(p = 0.3),
    # randomly rotates images
    transforms.RandomRotation(degrees = (0, 180)),
    # Randomly alters the visual appearance of the images during training
    transforms.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2, hue = 0.1),
    # turns images into tensors
    transforms.ToTensor(),
    # Normalises the data
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Random data augmentation techniques are not used on the validation set to avoid overfitting
V_Transform_data = transforms.Compose([
    # Resizes images to 600 by 600 pixels
    transforms.Resize((600, 600)),
    # turns images into tensors
    transforms.ToTensor(),
    # Normalises the data
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Importing data and splitting the dataset into training, validation and testing sets
Images = ImageFolder(root = "Train_data", transform = None)

# Splitting into train/val/test once, capturing indices
total_size = len(Images)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size  # Ensures total sums correctly

# Setting a seed to ensure reproducible results
generator = torch.Generator().manual_seed(0)

training_subset, validation_subset, Test_subset = random_split(Images, [train_size, val_size, test_size], generator = generator)
train_idx = training_subset.indices
val_idx   = validation_subset.indices
test_idx  = Test_subset.indices

# Creating three ImageFolder datasets with transforms, and sub-indexing them:
train_ds = Subset(ImageFolder("Train_data", transform = Transform_data), train_idx)
val_ds   = Subset(ImageFolder("Train_data", transform = V_Transform_data),  val_idx)
test_ds  = Subset(ImageFolder("Train_data", transform = V_Transform_data),  test_idx)

# Build the data loaders
train_loader = DataLoader(train_ds, batch_size = 35, shuffle = True)
validation_loader = DataLoader(val_ds, batch_size = 35, shuffle = False)
test_loader  = DataLoader(test_ds, batch_size = 35, shuffle = False)


In [None]:
# Defining the CNN model
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # Feature extraction - convolution layer generates a feature map
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 3, stride = 2, padding = 1)
        # Batch normalisation
        self.batch1 = nn.BatchNorm2d(16, momentum = 0.1, affine = True)
        # Using pooling for first conv, then stride = 2 on 2nd conv, then pooling for thrid conv

        self.conv2 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 3, stride = 1)
        self.batch2 = nn.BatchNorm2d(32, momentum = 0.1, affine = True)
        self.pool = nn.MaxPool2d(kernel_size = 5, stride = 5, padding = 0, ceil_mode = True)

        self.conv3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 2)
        self.batch3 = nn.BatchNorm2d(64, momentum = 0.1, affine = True)
        
        self.pool2 = nn.AdaptiveAvgPool2d((1, 1))

        # Applies a linear transformation to the data
        self.fc1 = nn.Linear(in_features = 64, out_features = 32)
        self.batchfc1 = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(in_features = 32, out_features = 16)
        self.batchfc2 = nn.BatchNorm1d(16)
        self.fc3 = nn.Linear(in_features = 16, out_features = 8)
        self.batchfc3 = nn.BatchNorm1d(8)
        # out_features corresponds to the number of classes
        self.fc4 = nn.Linear(in_features = 8, out_features = 2)

    def forward(self, x):
        # blocks: conv, batch, relu, pooling
        x = F.relu(self.pool(self.batch1(self.conv1(x))))
        x = F.relu(self.batch2(self.conv2(x)))
        x = F.relu(self.pool(self.batch3(self.conv3(x))))
        # Flattens input 'x' by reshaping it into a one dimensional tensor.
        x = self.pool2(x)
        x = torch.flatten(x, 1)
        # linear
        x = F.relu(self.batchfc1(self.fc1(x)))
        x = F.relu(self.batchfc2(self.fc2(x)))
        x = F.relu(self.batchfc3(self.fc3(x)))
        x = self.fc4(x)
        return x

modelA = Net()

In [41]:
# Defining model B - only difference between modelA and B are the class weights
modelB = Net()

In [42]:
# Defining model C
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Loading pretrained EfficientNet-B3
weights = EfficientNet_B3_Weights.IMAGENET1K_V1
modelC = efficientnet_b3(weights=weights)

# Replacing the classifier head for 2 classes: EfficientNet_B3 has `model.classifier = nn.Sequential( Dropout, Linear )`
in_features = modelC.classifier[1].in_features
modelC.classifier[1] = nn.Linear(in_features, 2)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# loading trained models
modelA.load_state_dict(torch.load("Models/modelA.pth"))
modelB.load_state_dict(torch.load("Models/modelB.pth"))
modelC.load_state_dict(torch.load("Models/modelC.pth"))
modelA.to(device)
modelB.to(device)
modelC.to(device)


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
            (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

In [None]:
# Using raw probabilities:
modelA.eval()
modelB.eval()
modelC.eval()
all_preds  = []
all_labels = []

with torch.no_grad():
    for inputs, targets in validation_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        # Raw logits
        outA = modelA(inputs)
        outB = modelB(inputs)
        outC = modelC(inputs)

        # Raw class-1 probabilities
        pA = torch.softmax(outA, dim = 1)[:, 1]
        pB = torch.softmax(outB, dim = 1)[:, 1]
        pC = torch.softmax(outC, dim = 1)[:, 1]

        # Stacking the floats, shape [batch,2]
        stacked_input = torch.stack([pA, pB, pC], dim=1) # Shape: [batch, num_models]

        all_preds.append(stacked_input.cpu())
        all_labels.append(targets.cpu())

final_preds  = torch.cat(all_preds)   # [N_val, 3] where 3 is the number of models
final_labels = torch.cat(all_labels)  # [N_val]

In [46]:
# Checking that the final_pred and final_labels match size
print("final_preds.shape:", final_preds.shape)
print("final_labels.shape:", final_labels.shape)

final_preds.shape: torch.Size([616, 3])
final_labels.shape: torch.Size([616])


In [47]:
# Checking that there is more than one unique row for final_preds
uniq, counts = torch.unique(final_preds, return_counts=True, dim=0)
for u,c in zip(uniq, counts):
    print(u.tolist(), "→", c.item(), "samples")

[0.03909480571746826, 0.01853853650391102, 0.46096596121788025] → 1 samples
[0.06400875747203827, 0.038229119032621384, 0.017123812809586525] → 1 samples
[0.09014938026666641, 0.05046525225043297, 0.08127690851688385] → 1 samples
[0.09074638783931732, 0.03855164349079132, 0.13635680079460144] → 1 samples
[0.09144870191812515, 0.02581791952252388, 0.04538532346487045] → 1 samples
[0.10982140153646469, 0.0642043724656105, 0.01951802708208561] → 1 samples
[0.1123390719294548, 0.07824064046144485, 0.14137162268161774] → 1 samples
[0.11412861198186874, 0.057338327169418335, 0.07877081632614136] → 1 samples
[0.11696574091911316, 0.05548581853508949, 0.02731083519756794] → 1 samples
[0.1228940486907959, 0.04109540954232216, 0.0666346400976181] → 1 samples
[0.12729845941066742, 0.07667181640863419, 0.030236901715397835] → 1 samples
[0.1294945776462555, 0.04713558405637741, 0.010667994618415833] → 1 samples
[0.1327301561832428, 0.11861240118741989, 0.36970406770706177] → 1 samples
[0.1358194351

In [None]:
# Building a DataLoader for meta‐training
meta_ds = TensorDataset(final_preds, final_labels)
meta_loader= DataLoader(meta_ds, batch_size = 64, shuffle = True)

# Defining the MetaModel (logistic regression)
class MetaModel(nn.Module):
    def __init__(self, in_features = 3, num_classes = 2): # 3 inputs (from 3 base models), 2 outputs for 2 classes
        super().__init__()
        self.linear = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.linear(x)

meta_model = MetaModel().to(device)

# Optimizer & Loss
optimizer = torch.optim.Adam(meta_model.parameters(), lr = 1e-2)
neg, pos = (final_labels == 0).sum(), (final_labels == 1).sum()
weights = torch.tensor([1.0, neg/pos]).to(device)
criterion = nn.CrossEntropyLoss(weight=weights)


In [49]:
# Training meta model
num_epochs = 20
num_classes = 2
for epoch in range(num_epochs):
    meta_model.train()
    running_loss = 0.0
    correct, total = 0, 0
    class_correct = [0]*num_classes
    class_total   = [0]*num_classes

    for Xb, yb in meta_loader:
        Xb, yb = Xb.to(device), yb.to(device)

        optimizer.zero_grad()
        logits = meta_model(Xb)
        loss   = criterion(logits, yb)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * Xb.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total   += yb.size(0)

        # Per-class
        for cls in range(num_classes):
            mask = (yb == cls)
            class_total[cls]   += mask.sum().item()
            class_correct[cls] += (preds[mask] == cls).sum().item()

    epoch_loss = running_loss / total
    epoch_acc  = correct / total * 100
    cls0_acc   = class_correct[0] / class_total[0] * 100
    cls1_acc   = class_correct[1] / class_total[1] * 100

    print(f"Epoch {epoch+1}/{num_epochs}  "
          f"loss: {epoch_loss:.4f}, acc: {epoch_acc:.2f}%  "
          f"cls0_acc: {cls0_acc:.2f}%, cls1_acc: {cls1_acc:.2f}%")


Epoch 1/20  loss: 0.6983, acc: 71.59%  cls0_acc: 78.58%, cls1_acc: 24.05%
Epoch 2/20  loss: 0.6777, acc: 41.72%  cls0_acc: 35.94%, cls1_acc: 81.01%
Epoch 3/20  loss: 0.6525, acc: 44.81%  cls0_acc: 37.43%, cls1_acc: 94.94%
Epoch 4/20  loss: 0.6340, acc: 62.34%  cls0_acc: 57.54%, cls1_acc: 94.94%
Epoch 5/20  loss: 0.6151, acc: 72.08%  cls0_acc: 69.09%, cls1_acc: 92.41%
Epoch 6/20  loss: 0.5960, acc: 77.76%  cls0_acc: 75.98%, cls1_acc: 89.87%
Epoch 7/20  loss: 0.5815, acc: 79.55%  cls0_acc: 78.03%, cls1_acc: 89.87%
Epoch 8/20  loss: 0.5654, acc: 79.55%  cls0_acc: 77.28%, cls1_acc: 94.94%
Epoch 9/20  loss: 0.5477, acc: 80.52%  cls0_acc: 78.40%, cls1_acc: 94.94%
Epoch 10/20  loss: 0.5374, acc: 82.79%  cls0_acc: 81.01%, cls1_acc: 94.94%
Epoch 11/20  loss: 0.5231, acc: 84.58%  cls0_acc: 83.43%, cls1_acc: 92.41%
Epoch 12/20  loss: 0.5118, acc: 85.71%  cls0_acc: 84.73%, cls1_acc: 92.41%
Epoch 13/20  loss: 0.4993, acc: 86.53%  cls0_acc: 85.66%, cls1_acc: 92.41%
Epoch 14/20  loss: 0.4891, acc: 86

In [None]:
# Using test set
modelA.eval()
modelB.eval()
modelC.eval()
meta_model.eval()

final_preds_test = []
with torch.no_grad():
  for X, _ in test_loader:
    X = X.to(device)

    pA = torch.softmax(modelA(X), dim = 1)[:, 1]
    pB = torch.softmax(modelB(X), dim = 1)[:, 1]
    pC = torch.softmax(modelC(X), dim = 1)[:, 1]

    stacked = torch.stack([pA, pB, pC], dim = 1)
    logits  = meta_model(stacked.to(device))
    preds   = logits.argmax(dim = 1).cpu()
    
    final_preds_test.append(preds)

final_preds_test = torch.cat(final_preds_test)  


In [None]:
# Getting the ground‐truths from test_loader
all_labels_test = []
with torch.no_grad():
    for _, y in test_loader:
        all_labels_test.append(y)
final_labels_test = torch.cat(all_labels_test)  # [N_test]

# Computing metrics with scikit‐learn
y_true = final_labels_test.numpy()
y_pred = final_preds_test .numpy()
confusion = confusion_matrix(y_true, y_pred)
class0_acc = confusion[0, 0] / confusion[0, :].sum()
class1_acc = confusion[1, 1] / confusion[1, :].sum()

print("Accuracy:", accuracy_score(y_true, y_pred))
print(f"Class 0 accuracy: {class0_acc*100:.2f}%")
print(f"Class 1 accuracy: {class1_acc*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_true, y_pred, digits = 4))

Accuracy: 0.8816855753646677
Class 0 accuracy: 88.76%
Class 1 accuracy: 84.78%

Classification Report:
               precision    recall  f1-score   support

           0     0.9708    0.8876    0.9274       525
           1     0.5693    0.8478    0.6812        92

    accuracy                         0.8817       617
   macro avg     0.7701    0.8677    0.8043       617
weighted avg     0.9110    0.8817    0.8907       617



In [None]:
# How many samples are in each class for the validation and test sets.
print("Val label counts:", torch.bincount(final_labels))
print("Test label counts:", torch.bincount(final_labels_test))

Val label counts: tensor([537,  79])
Test label counts: tensor([525,  92])


In [None]:
# Saving model
# PATH = 'Models/meta_model.pth'
# torch.save(meta_model.state_dict(), PATH)