In [11]:
import torch
import cv2
import numpy as np
from torchvision import transforms
import torch.nn.functional as F

# Define your model architecture (example)
import torch.nn as nn

class BirdSpeciesCNNModel(nn.Module):
    def __init__(self):
        super(BirdSpeciesCNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(p=0.2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=0)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(p=0.2)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=0)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout3 = nn.Dropout(p=0.2)

        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=0)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout4 = nn.Dropout(p=0.2)

        # New layer
        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, padding=0)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout5 = nn.Dropout(p=0.2)

        # Dynamically calculate the flattened size
        with torch.no_grad():
            dummy_input = torch.randn(1, 3, 256, 256)  # Adjust input size if needed
            x = self.pool1(F.relu(self.conv1(dummy_input)))
            x = self.pool2(F.relu(self.conv2(x)))
            x = self.pool3(F.relu(self.conv3(x)))
            x = self.pool4(F.relu(self.conv4(x)))
            x = self.pool5(F.relu(self.conv5(x)))
            self.flatten_size = x.numel()

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(self.flatten_size, 1024)
        self.dropout_fc = nn.Dropout(p=0.3)
        self.fc2 = nn.Linear(1024, 21)

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.dropout1(x)
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.dropout2(x)
        x = self.pool3(F.relu(self.conv3(x)))
        x = self.dropout3(x)
        x = self.pool4(F.relu(self.conv4(x)))
        x = self.dropout4(x)
        x = self.pool5(F.relu(self.conv5(x)))  # Include the new layer
        x = self.dropout5(x)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.dropout_fc(x)
        x = self.fc2(x)
        return x

# Instantiate the model and load weights
model = BirdSpeciesCNNModel()
model.eval()

# Define any necessary preprocessing (example for image classification)
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

class_labels = [
    'Blackbird', 'Bluetit', 'Carrion_Crow', 'Chaffinch', 'Coal_Tit', 'Collared_Dove', 'Dunnock',
    'Feral_Pigeon', 'Goldfinch', 'Great_Tit', 'Greenfinch', 'House_Sparrow', 'Jackdaw',
    'Long_Tailed_Tit', 'Magpie', 'NotBird', 'Robin', 'Song_Thrush', 'Starling',
    'Wood_Pigeon', 'Wren'
]



# Open the camera
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    

    # Preprocess the frame
    input_tensor = preprocess(frame)
    input_tensor = input_tensor.unsqueeze(0)  # Add batch dimension

    # Run inference
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted = torch.max(output, 1)

    # Display prediction on the frame (show class label instead of index)
    predicted_label = class_labels[predicted.item()]
    cv2.putText(frame, f'Predicted: {predicted_label}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow('Camera', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [23]:
import cv2

# Load and preprocess the image
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"Image not found or path is incorrect: {image_path}")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_tensor = preprocess(image).unsqueeze(0)  # Add batch dimension
    return image_tensor

# Path to the test image
test_image_path = r"C:\Users\antoi\Documents\Henallux_24_25\Q2\DeepL\Bird_CNN_model\TestPics\téléchargement (3).jpg"

# Preprocess the image
test_image_tensor = preprocess_image(test_image_path)

# Set the model to evaluation mode
model.eval()

# Perform inference
with torch.no_grad():
    output = model(test_image_tensor)
    predicted_class = torch.argmax(output, dim=1).item()

# Map the predicted class index to the class name
predicted_label = class_labels[predicted_class]
print(f"Predicted class: {predicted_label}")

FileNotFoundError: Image not found or path is incorrect: C:\Users\antoi\Documents\Henallux_24_25\Q2\DeepL\Bird_CNN_model\TestPics\téléchargement (3).jpg