In [14]:
import pandas as pd
import matplotlib.pyplot as plt

import numpy as np
import scipy.io.wavfile as wav
from scipy.signal import spectrogram

import os

import cv2

from tqdm import tqdm

import torch
import torch.nn as nn

## image preprocessing

In [15]:
def saveBinarySpectrogramAsImage(audioFile, outputPath, threshold=0.05):
    sampleRate, audioData = wav.read(audioFile)

    # If the audio has multiple channels, use only the first one
    if len(audioData.shape) > 1:
        audioData = audioData[:, 0]

    # Set FFT Window Size and Overlap
    nps = 2048  # Larger FFT window improves frequency resolution
    frequencies, time, intensity = spectrogram(audioData, 
                                               fs=sampleRate, 
                                               nperseg=nps, 
                                               noverlap=(nps // 2))

    # Normalize the intensity
    intensityNorm = np.clip(intensity / np.max(intensity), 0, 1)

    # Apply the threshold
    binarySpectrogram = intensityNorm > threshold

    # Restrict frequency range (optional)
    mask = (frequencies >= 10000) & (frequencies <= 40000)
    frequencies = frequencies[mask]
    binarySpectrogram = binarySpectrogram[mask, :]

    # Save the binary spectrogram as an image
    plt.figure(figsize=(10, 6))
    plt.pcolormesh(time, frequencies, binarySpectrogram, cmap='gray', shading='gouraud')
    plt.axis('off')  # Remove axes for clean image
    plt.tight_layout()
    plt.savefig(outputPath, bbox_inches='tight', pad_inches=0)
    plt.close()  # Close the plot to free memory

def processImage(inputImagePath, outputFolder):
    # Load the binary spectrogram image
    image = cv2.imread(inputImagePath, 0)
    kernel = np.ones((5, 5), np.uint8)

    # Apply median filtering
    medianFilteredImage = cv2.medianBlur(image, 5)

    # Apply dilation
    dilatedImage = cv2.dilate(medianFilteredImage, kernel, iterations=3)

    # Apply dilation then erosion
    dilatedThenEroded = cv2.erode(dilatedImage, kernel, iterations=2)

    # Apply dilation again
    dilatedAgain = cv2.dilate(dilatedThenEroded, kernel, iterations=3)

    processedImage = dilatedAgain
    # Save processed images
    baseName = os.path.splitext(os.path.basename(inputImagePath))[0]
    cv2.imwrite(os.path.join(outputFolder, f"{baseName}_processed_img.png"), processedImage)

if __name__ == "__main__":
    # for testing -  testClips  # main folder - LabelledAudioChunks
    audioFolder = "E:\\audioChunks\\South Control Grassland 4 - chunks\\chunkFolder-63"  # Folder containing audio files      
    binarySpectrogramFolder = "testBinSpecImgsFolder"  # Folder to save binary spectrogram images     
    processedImagesFolder = "testProcessedSpecImgsFolder"  # Folder to save processed images  

    # Create output folders if they don't exist
    os.makedirs(binarySpectrogramFolder, exist_ok=True)
    os.makedirs(processedImagesFolder, exist_ok=True)

    # Generate binary spectrograms with progress bar
    audioFiles = [file for file in os.listdir(audioFolder) if file.endswith(".wav")]
    for file in tqdm(audioFiles, desc="Generating Binary Spectrograms"):
        filePath = os.path.join(audioFolder, file)
        outputImagePath = os.path.join(binarySpectrogramFolder, f"{os.path.splitext(file)[0]}_binary.png")

        # Generate and save the binary spectrogram as an image
        saveBinarySpectrogramAsImage(filePath, outputImagePath)

    # Process each generated binary spectrogram with progress bar
    spectrogramFiles = [file for file in os.listdir(binarySpectrogramFolder) if file.endswith(".png")]
    for file in tqdm(spectrogramFiles, desc="Processing Spectrogram Images"):
        inputImagePath = os.path.join(binarySpectrogramFolder, file)

        # Apply image processing and save the results
        processImage(inputImagePath, processedImagesFolder)

Generating Binary Spectrograms: 100%|██████████| 600/600 [16:22<00:00,  1.64s/it]
Processing Spectrogram Images: 100%|██████████| 600/600 [00:09<00:00, 60.33it/s]


## using the model

In [2]:
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128) 
        self.fc2 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # flatten 
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [3]:
# Load the model
model = CNNModel(4)
model.load_state_dict(torch.load('cnn_grasshopper_cricket_classifier_13-01-2025.pth'))
model.eval()

  model.load_state_dict(torch.load('cnn_grasshopper_cricket_classifier_13-01-2025.pth'))


CNNModel(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=200704, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=4, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [None]:
# Load the dataset from the CSV file
dataset = pd.read_csv("chirp-analysis-results.csv")

speciesCounts = dataset['Species'].value_counts()

# Print the number of occurrences for each species
print(speciesCounts)

Species
Roesel'sBush-Cricket    406
MeadowGrasshopper       180
Rain                    104
NoID1                    94
NoID10                   58
Name: count, dtype: int64


In [None]:
import cv2
import torch


# List of class labels (species)
classLabels = ['MeadowGrasshopper', 'NoID1', 'NoID10', "Roesel'sBush-Cricket"]

# Example image path
imagePath = "processedSpectrogramImages\\MeadowGrasshopper_1_binary_processed_img.png"

# Load the preprocessed image
imageData = cv2.imread(imagePath, cv2.IMREAD_GRAYSCALE)

if imageData is None:
    raise ValueError(f"Image {imagePath} could not be loaded.")

# Resize the image to match the input size expected by the model (224x224)
imageDataResized = cv2.resize(imageData, (224, 224))  # Resize to (224, 224)

# Normalize and prepare the image for the model
imageTensor = torch.tensor(imageDataResized, dtype=torch.float32).unsqueeze(0).unsqueeze(0)  # (1, 1, 224, 224)
imageTensor = imageTensor / 255.0  # Normalize pixel values to [0, 1]

# Perform inference
with torch.no_grad():
    output = model(imageTensor)  # Pass the image through the model
    _, predicted = torch.max(output, 1)  # Get the index of the class with the highest probability
    classLabel = classLabels[predicted.item()]  # Map the index to the class label

print(f"Predicted Class: {classLabel}")


Predicted Class: MeadowGrasshopper


In [16]:
# Folder containing test images
imageFolder = "testProcessedSpecImgsFolder"

# Loop through each file in the folder
for imageFilename in os.listdir(imageFolder):
    # Check if the file is an image (you can filter by file extension, e.g., .png)
    if imageFilename.endswith(".png"):
        # Full path to the image
        imagePath = os.path.join(imageFolder, imageFilename)
        
        # Load the preprocessed image
        imageData = cv2.imread(imagePath, cv2.IMREAD_GRAYSCALE)
        
        if imageData is None:
            print(f"Error loading image {imagePath}")
            continue  # Skip this image if it couldn't be loaded
        
        # Resize the image to match the input size expected by the model (224x224)
        imageDataResized = cv2.resize(imageData, (224, 224))  # Resize to (224, 224)
        
        # Normalize and prepare the image for the model
        imageTensor = torch.tensor(imageDataResized, dtype=torch.float32).unsqueeze(0).unsqueeze(0)  # (1, 1, 224, 224)
        imageTensor = imageTensor / 255.0  # Normalize pixel values to [0, 1]
        
        # Perform inference
        with torch.no_grad():
            output = model(imageTensor)  # Pass the image through the model
            _, predicted = torch.max(output, 1)  # Get the index of the class with the highest probability
            classLabel = classLabels[predicted.item()]  # Map the index to the class label
        
        # Print the predicted class for this image
        print(f"Image: {imageFilename}, Predicted Class: {classLabel}")

Image: chunk_000_binary_processed_img.png, Predicted Class: MeadowGrasshopper
Image: chunk_001_binary_processed_img.png, Predicted Class: Roesel'sBush-Cricket
Image: chunk_002_binary_processed_img.png, Predicted Class: MeadowGrasshopper
Image: chunk_003_binary_processed_img.png, Predicted Class: NoID1
Image: chunk_004_binary_processed_img.png, Predicted Class: MeadowGrasshopper
Image: chunk_005_binary_processed_img.png, Predicted Class: Roesel'sBush-Cricket
Image: chunk_006_binary_processed_img.png, Predicted Class: MeadowGrasshopper
Image: chunk_007_binary_processed_img.png, Predicted Class: MeadowGrasshopper
Image: chunk_008_binary_processed_img.png, Predicted Class: MeadowGrasshopper
Image: chunk_009_binary_processed_img.png, Predicted Class: Roesel'sBush-Cricket
Image: chunk_010_binary_processed_img.png, Predicted Class: MeadowGrasshopper
Image: chunk_011_binary_processed_img.png, Predicted Class: Roesel'sBush-Cricket
Image: chunk_012_binary_processed_img.png, Predicted Class: Mead