## ChirpNet Model 3 - CNN

By Thejaswin Kumaran<div>
Class: CS 4375.001 - Sriraam Natarajan

In [26]:
# Imports Cell
import os
import sys

import numpy as np
import pandas as pd
import librosa

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from sklearn import tree
# from sklearn.tree import DecisionTreeClassifier, export_text, DecisionTreeClassifier
# from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score
# from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
# from decision_tree import load_monk_data



In [27]:

# Attributes Cell

# --- User Malleable parameters ---
# data_dir = 'jeantetDataset/Audio/Training'
data_dir = r'C:\Users\Thejas\Documents\Classes\4375_MachineLearning_Sriraam\Programs\ChirpNET\jeantetDataset\Audio\Training'
annotations_dir = r'C:\Users\Thejas\Documents\Classes\4375_MachineLearning_Sriraam\Programs\ChirpNET\jeantetDataset\Annotation\Training'
sr = 22050
duration = 5.0
n_mels = 128 
test_size = 0.2 
random_state = 42 
samples_per_clip = int(sr * duration)
hop_length = 512
# y_train = 0.8


# Helper: load an audio file, pad/trim, and compute Mel-spectrogram
# def preprocess(file_path):
#     # librosa.load can handle WAV, MP3, etc.
#     y, _ = librosa.load(file_path, sr=sr, duration=duration)
#     if len(y) < samples_per_clip:
#         y = np.pad(y, (0, samples_per_clip - len(y)), mode='constant')
#     else:
#         y = y[:samples_per_clip]
#     mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, hop_length=hop_length)
#     mel_db = librosa.power_to_db(mel_spec, ref=np.max)
#     return mel_db

def preprocess(file_path):
    y, _ = librosa.load(file_path, sr=sr, duration=duration)
    if len(y) < samples_per_clip:
        y = np.pad(y, (0, samples_per_clip - len(y)), mode='constant')
    else:
        y = y[:samples_per_clip]
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, hop_length=hop_length)
    mel_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_db

In [None]:
# Load annotations from .svl files and map to audio files
file_paths = []
labels = []
species_set = set()
# Each .svl filename encodes species; e.g., Family_Genus_species_Country_Date_XCid_type.svl
for svl_file in os.listdir(annotations_dir):
    if not svl_file.lower().endswith('.svl'):
        continue
    parts = svl_file.split('_')
    species_key = f"{parts[1]}_{parts[2]}"
    species_set.add(species_key)
    audio_fname = svl_file.replace('.svl', '.mp3')
    audio_path = os.path.join(data_dir, audio_fname)
    if os.path.isfile(audio_path):
        file_paths.append(audio_path)
        labels.append(species_key)

# Create species index mapping
taxon_list = sorted(species_set)
species_to_idx = {sp: i for i, sp in enumerate(taxon_list)}
labels_idx = [species_to_idx[sp] for sp in labels]
print(f"Found {len(taxon_list)} species, {len(file_paths)} files.")


# Test Statements 1
print(f"Found {len(taxon_list)} unique species.")
print(f"Total files: {len(file_paths)}")
# print(f"Number of species: {len(species)}")
# print(f"Species: {species}")
# print(f"Number of files found: {len(file_paths)}")
# print(f"Labels: {labels[:10]}")
# print(f"/n/n")

# Preprocess all files into spectrogram arrays
data = [preprocess(fp) for fp in file_paths]
X = np.array(data)[..., np.newaxis]
y = to_categorical(labels_idx, num_classes=len(taxon_list))
# print(f"X shape: {X.shape}, y shape: {y.shape}")

# Temp Sanity check
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")


Found 22 species, 965 files.
Found 22 unique species.
Total files: 965
X shape: (965, 128, 216, 1), y shape: (965, 22)
X shape: (965, 128, 216, 1)
y shape: (965, 22)


In [29]:

# Split data
X_train, X_val, y_train, y_val, idx_train, idx_val = train_test_split(
    X, y, labels_idx, test_size=test_size, random_state=random_state, stratify=labels_idx
)

# Build CNN model
input_shape = X_train.shape[1:]
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(len(taxon_list), activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [30]:
# 5) Train model
epochs = 20
batch_size = 32
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=epochs,
    batch_size=batch_size
)

# 6) Evaluate and plot metrics
# Prepare epoch indices
epochs_range = range(1, epochs + 1)

# Plot Accuracy
plt.figure()
plt.plot(epochs_range, history.history['accuracy'], label='Train Accuracy')
plt.plot(epochs_range, history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot Loss
plt.figure()
plt.plot(epochs_range, history.history['loss'], label='Train Loss')
plt.plot(epochs_range, history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 141ms/step - accuracy: 0.0680 - loss: 80.8108 - val_accuracy: 0.2487 - val_loss: 2.7785
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 134ms/step - accuracy: 0.2219 - loss: 2.6632 - val_accuracy: 0.3005 - val_loss: 2.3789
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 147ms/step - accuracy: 0.3764 - loss: 2.1773 - val_accuracy: 0.4145 - val_loss: 2.1265
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 136ms/step - accuracy: 0.4570 - loss: 1.8777 - val_accuracy: 0.4508 - val_loss: 2.0185
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 137ms/step - accuracy: 0.6057 - loss: 1.3424 - val_accuracy: 0.4611 - val_loss: 1.8613
Epoch 6/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 136ms/step - accuracy: 0.6495 - loss: 1.0988 - val_accuracy: 0.4819 - val_loss: 1.8197
Epoch 7/20
[1m25/25[0m [

  plt.show()
  plt.show()


In [31]:

# Confusion matrix & Predict on validation set
y_pred_probs = model.predict(X_val)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.array(idx_val)
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10,8))
plt.imshow(cm, interpolation='nearest', aspect='auto')
plt.title('Confusion Matrix')
plt.colorbar()
plt.xticks(np.arange(len(taxon_list)), taxon_list, rotation=90)
plt.yticks(np.arange(len(taxon_list)), taxon_list)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()
plt.show()

# Save model
model.save('jeantet_cnn_model.h5')
print('Training complete, plots displayed, model saved to jeantet_cnn_model.h5')


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step


  plt.show()


Training complete, plots displayed, model saved to jeantet_cnn_model.h5
