In [1]:
# Import libraries
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization, Add
from tqdm import tqdm
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("TensorFlow is using:", tf.test.gpu_device_name())
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled.")
    except RuntimeError as e:
        print(e)

2025-02-09 00:55:51.642127: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739091351.724801     607 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739091351.748392     607 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-09 00:55:51.950393: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available: 1
TensorFlow is using: /device:GPU:0
GPU memory growth enabled.


I0000 00:00:1739091355.578297     607 gpu_device.cc:2022] Created device /device:GPU:0 with 5564 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:06:00.0, compute capability: 8.6


In [2]:
# Data Preparation
def load_data(spectrogram_dir, label_dir, quarter='full'):
    spectrograms = []
    labels = []

    # Load spectrograms
    spectrogram_files = sorted(os.listdir(spectrogram_dir))
    label_files = sorted(os.listdir(label_dir))

    total_files = len(spectrogram_files)
    quarter_idx = total_files // 4
    
    if quarter == 'first':
        spectrogram_files = spectrogram_files[:quarter_idx]
        label_files = label_files[:quarter_idx]
    elif quarter == 'second':
        spectrogram_files = spectrogram_files[quarter_idx:2*quarter_idx]
        label_files = label_files[quarter_idx:2*quarter_idx]
    elif quarter == 'third':
        spectrogram_files = spectrogram_files[2*quarter_idx:3*quarter_idx]
        label_files = label_files[2*quarter_idx:3*quarter_idx]
    elif quarter == 'fourth':
        spectrogram_files = spectrogram_files[3*quarter_idx:]
        label_files = label_files[3*quarter_idx:]
    
    for spec_file, label_file in tqdm(zip(spectrogram_files, label_files),
                                      total=len(spectrogram_files),
                                      desc=f"Loading {quarter} data",
                                      unit="file"):
        spectrogram = np.load(os.path.join(spectrogram_dir, spec_file))  
        label = np.load(os.path.join(label_dir, label_file)).T 

        # Pad or trim labels
        T_spec = spectrogram.shape[0]  
        T_label = label.shape[0]      
        if T_label < T_spec:
            diff = T_spec - T_label
            label = np.pad(label, ((0, diff), (0, 0)), mode='constant')
        elif T_label > T_spec:
            label = label[:T_spec]

        spectrograms.append(spectrogram)
        labels.append(label)

    # Convert to numpy arrays
    X = np.vstack(spectrograms)  
    Y = np.vstack(labels)     

    return X, Y


In [None]:
# Define CNN model
model = Sequential([
    Input(shape=(128, 87, 1)),  
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu', padding='same'),  
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(256, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.3),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(129, activation='sigmoid') 
])

# Compile the model with binary cross-entropy loss and binary accuracy as the metric
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

I0000 00:00:1739091364.226818     607 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5564 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:06:00.0, compute capability: 8.6


In [4]:
# Track history
history_all = {'loss': [], 'binary_accuracy': []}

In [None]:
# Train on first quarter
X_train, Y_train = load_data("spectrograms_train", "labels_train", quarter='first')
X_train = np.expand_dims(X_train, axis=-1)
print("Training on first quarter:", X_train.shape, Y_train.shape)
history = model.fit(X_train, Y_train, batch_size=64, epochs=5, verbose=1)

# Save history
history_all['loss'].extend(history.history['loss'])
history_all['binary_accuracy'].extend(history.history['binary_accuracy'])

Loading first data: 100%|██████████| 322/322 [00:03<00:00, 97.08file/s] 


Training on first quarter: (79341, 128, 87, 1) (79341, 129)


2025-02-09 00:56:20.185451: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3534165504 exceeds 10% of free system memory.
2025-02-09 00:56:22.019931: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3534165504 exceeds 10% of free system memory.


Epoch 1/10


I0000 00:00:1739091384.008892     939 service.cc:148] XLA service 0x7fc3e800eab0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1739091384.010750     939 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6
2025-02-09 00:56:24.300192: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1739091384.513290     939 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   7/1240[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m29s[0m 24ms/step - binary_accuracy: 0.5949 - loss: 0.8547 

I0000 00:00:1739091389.508299     939 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1240/1240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 28ms/step - binary_accuracy: 0.9481 - loss: 0.1491
Epoch 2/10
[1m1240/1240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 25ms/step - binary_accuracy: 0.9654 - loss: 0.0994
Epoch 3/10
[1m1240/1240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 25ms/step - binary_accuracy: 0.9690 - loss: 0.0861
Epoch 4/10
[1m1240/1240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 24ms/step - binary_accuracy: 0.9721 - loss: 0.0762
Epoch 5/10
[1m1240/1240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 25ms/step - binary_accuracy: 0.9748 - loss: 0.0683
Epoch 6/10
[1m1240/1240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 23ms/step - binary_accuracy: 0.9765 - loss: 0.0633
Epoch 7/10
[1m1240/1240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 25ms/step - binary_accuracy: 0.9781 - loss: 0.0589
Epoch 8/10
[1m1240/1240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 25ms/step - bi

In [None]:
# Train on second quarter
X_train, Y_train = load_data("spectrograms_train", "labels_train", quarter='second')
X_train = np.expand_dims(X_train, axis=-1)
print("Training on second quarter:", X_train.shape, Y_train.shape)
history = model.fit(X_train, Y_train, batch_size=64, epochs=5, verbose=1)

# Save history
history_all['loss'].extend(history.history['loss'])
history_all['binary_accuracy'].extend(history.history['binary_accuracy'])

Loading second data: 100%|██████████| 322/322 [00:03<00:00, 88.91file/s]


Training on second quarter: (80558, 128, 87, 1) (80558, 129)


2025-02-09 01:02:23.328424: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3588375552 exceeds 10% of free system memory.
2025-02-09 01:02:25.161156: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3588375552 exceeds 10% of free system memory.


Epoch 1/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 29ms/step - binary_accuracy: 0.9654 - loss: 0.1025
Epoch 2/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 23ms/step - binary_accuracy: 0.9711 - loss: 0.0806
Epoch 3/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24ms/step - binary_accuracy: 0.9748 - loss: 0.0692
Epoch 4/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24ms/step - binary_accuracy: 0.9774 - loss: 0.0618
Epoch 5/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24ms/step - binary_accuracy: 0.9790 - loss: 0.0571
Epoch 6/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24ms/step - binary_accuracy: 0.9801 - loss: 0.0541
Epoch 7/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24ms/step - binary_accuracy: 0.9809 - loss: 0.0515
Epoch 8/10
[1m1259/1259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24m

In [7]:
# Train on third quarter
X_train, Y_train = load_data("spectrograms_train", "labels_train", quarter='third')
X_train = np.expand_dims(X_train, axis=-1)
print("Training on third quarter:", X_train.shape, Y_train.shape)
history = model.fit(X_train, Y_train, batch_size=64, epochs=5, verbose=1)

# Save history
history_all['loss'].extend(history.history['loss'])
history_all['binary_accuracy'].extend(history.history['binary_accuracy'])

Loading third data: 100%|██████████| 322/322 [00:03<00:00, 90.08file/s]


Training on third quarter: (81353, 128, 87, 1) (81353, 129)


2025-02-09 01:08:19.291847: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3623788032 exceeds 10% of free system memory.


Epoch 1/5
[1m1272/1272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 27ms/step - binary_accuracy: 0.9653 - loss: 0.1040
Epoch 2/5
[1m1272/1272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24ms/step - binary_accuracy: 0.9701 - loss: 0.0837
Epoch 3/5
[1m1272/1272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24ms/step - binary_accuracy: 0.9738 - loss: 0.0722
Epoch 4/5
[1m1272/1272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 24ms/step - binary_accuracy: 0.9764 - loss: 0.0642
Epoch 5/5
[1m1272/1272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 25ms/step - binary_accuracy: 0.9781 - loss: 0.0593


In [None]:
# Train on fourth quarter
X_train, Y_train = load_data("spectrograms_train", "labels_train", quarter='fourth')
X_train = np.expand_dims(X_train, axis=-1)
print("Training on fourth quarter:", X_train.shape, Y_train.shape)
history = model.fit(X_train, Y_train, batch_size=64, epochs=5, verbose=1)

# Save history
history_all['loss'].extend(history.history['loss'])
history_all['binary_accuracy'].extend(history.history['binary_accuracy'])

In [8]:
# Final loss & accuracy
final_loss = history_all['loss'][-1]
final_accuracy = history_all['binary_accuracy'][-1]
print(f"Final Loss: {final_loss:.4f}, Final Accuracy: {final_accuracy:.4f}")

Final Loss: 0.0586, Final Accuracy: 0.9783


In [7]:
# Get predictions
Y_pred = model.predict(X_train)

# Convert to binary (threshold the probabilities)
threshold = 0.5
Y_pred_binary = (Y_pred > threshold).astype(int)

# Calculate metrics
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
recall = recall_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
f1 = f1_score(Y_train, Y_pred_binary, average='samples', zero_division=0)

# Print the metrics
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

2025-02-08 21:29:46.336094: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2935984128 exceeds 10% of free system memory.
2025-02-08 21:29:47.709445: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2935984128 exceeds 10% of free system memory.


[1m2050/2060[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step




[1m2060/2060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 6ms/step
Precision: 0.8699
Recall: 0.7699
F1-Score: 0.7996


In [10]:
X_test, Y_test = load_data("spectrograms_test", "labels_test")  
X_test = np.expand_dims(X_test, axis=-1)  

Y_pred_test = model.predict(X_test)
Y_pred_test_binary = (Y_pred_test > 0.5).astype(int)

# Calculate metrics
from sklearn.metrics import precision_score, recall_score, f1_score

precision_test = precision_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)
recall_test = recall_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)
f1_test = f1_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)

print(f"Test Precision: {precision_test:.4f}")
print(f"Test Recall: {recall_test:.4f}")
print(f"Test F1-Score: {f1_test:.4f}")


Loading full data: 100%|██████████| 151/151 [00:00<00:00, 357.28file/s]


[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step
Test Precision: 0.6682
Test Recall: 0.2988
Test F1-Score: 0.3915
