In [1]:
# Import libraries
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization, Add
from tqdm import tqdm
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("TensorFlow is using:", tf.test.gpu_device_name())
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled.")
    except RuntimeError as e:
        print(e)

2025-02-19 20:08:24.028170: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740024504.076963    1280 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740024504.093101    1280 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-19 20:08:24.228038: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available: 1
TensorFlow is using: /device:GPU:0
GPU memory growth enabled.


I0000 00:00:1740024506.770890    1280 gpu_device.cc:2022] Created device /device:GPU:0 with 6073 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070 SUPER, pci bus id: 0000:01:00.0, compute capability: 7.5


In [2]:
# Data Preparation
def load_data(spectrogram_dir, label_dir):
    spectrograms = []
    labels = []

    # Load spectrograms
    spectrogram_files = sorted(os.listdir(spectrogram_dir))
    label_files = sorted(os.listdir(label_dir))

    total_files = len(spectrogram_files)  

    for spec_file, label_file in tqdm(zip(spectrogram_files, label_files), 
                                      total=total_files, 
                                      desc="Loading Data", 
                                      unit="file"):
        
        spectrogram = np.load(os.path.join(spectrogram_dir, spec_file))  
        label = np.load(os.path.join(label_dir, label_file)).T 

        # Pad or trim labels
        T_spec = spectrogram.shape[0]  
        T_label = label.shape[0]      
        if T_label < T_spec:
            diff = T_spec - T_label
            label = np.pad(label, ((0, diff), (0, 0)), mode='constant')
        elif T_label > T_spec:
            label = label[:T_spec]
            
        spectrograms.append(spectrogram)
        labels.append(label)

    # Convert to numpy arrays
    X = np.vstack(spectrograms)  
    Y = np.vstack(labels)     

    return X, Y

In [11]:
# ========= USING VALIDATION SET FOR SMALLER DATA ==========

X_train, Y_train = load_data("spectrograms_reduced", "labels_reduced") 
X_train = np.expand_dims(X_train, axis=-1)  
print("X_train:", X_train.shape, "Y_train:", Y_train.shape)

Loading Data: 100%|██████████| 270/270 [00:02<00:00, 115.42file/s]


X_train: (65912, 128, 87, 1) Y_train: (65912, 8)


In [None]:
from tensorflow.keras.optimizers import SGD

# Define CNN model
model = Sequential([
    Input(shape=(128, 87, 1)),  # Shape of each input spectrogram
    
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(256, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    
    Dense(8, activation='sigmoid') 
])
optimizer = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)

# Compile the model with binary cross-entropy loss and binary accuracy as the metric
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['binary_accuracy'])
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

In [15]:
# Train model
history = model.fit(X_train, Y_train, batch_size=64, epochs=5, verbose=1)

# Final loss & accuracy
final_loss = history.history['loss'][-1]
final_accuracy = history.history['binary_accuracy'][-1]
print(f"Final Loss: {final_loss:.4f}, Final Accuracy: {final_accuracy:.4f}")

Epoch 1/5
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 19ms/step - binary_accuracy: 0.8400 - loss: 0.4080
Epoch 2/5
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - binary_accuracy: 0.8719 - loss: 0.3287
Epoch 3/5
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 17ms/step - binary_accuracy: 0.8842 - loss: 0.2905
Epoch 4/5
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - binary_accuracy: 0.8950 - loss: 0.2600
Epoch 5/5
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - binary_accuracy: 0.9043 - loss: 0.2370
Final Loss: 0.2320, Final Accuracy: 0.9063


In [16]:
# Get predictions
Y_pred = model.predict(X_train)

# Convert to binary (threshold the probabilities)
threshold = 0.5
Y_pred_binary = (Y_pred > threshold).astype(int)

# Calculate metrics
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
recall = recall_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
f1 = f1_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
model.summary()

# Print the metrics
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

[1m2060/2060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step


Precision: 0.9066
Recall: 0.8772
F1-Score: 0.8836


In [17]:

X_test, Y_test = load_data("spectrograms_reduced_test", "labels_reduced_test")  
X_test = np.expand_dims(X_test, axis=-1)  

Y_pred_test = model.predict(X_test)
Y_pred_test_binary = (Y_pred_test > 0.5).astype(int)

precision_test = precision_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)
recall_test = recall_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)
f1_test = f1_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)

print(f"Test Precision: {precision_test:.4f}")
print(f"Test Recall: {recall_test:.4f}")
print(f"Test F1-Score: {f1_test:.4f}")

# F1-score on different set not so good, likely overfitting

Loading Data: 100%|██████████| 151/151 [00:01<00:00, 106.14file/s]


[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step
Test Precision: 0.8629
Test Recall: 0.8392
Test F1-Score: 0.8377


In [19]:
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import precision_recall_fscore_support
# Compute per-class precision, recall, and F1-score
precision_per_class, recall_per_class, f1_per_class, _ = precision_recall_fscore_support(
    Y_test, Y_pred_test_binary, average=None, zero_division=0
)

# Occurrences
class_counts = np.sum(Y_train, axis=0)
total_samples = len(Y_train)

num_classes = Y_test.shape[1]  # Number of instrument classes
for i in range(num_classes):
    print(f"Class {i}: Precision={precision_per_class[i]:.4f}, Recall={recall_per_class[i]:.4f}, "
          f"F1={f1_per_class[i]:.4f} | Occurrences={int(class_counts[i])} ({class_counts[i] / total_samples:.2%})")

Class 0: Precision=0.8545, Recall=0.9200, F1=0.8861 | Occurrences=51494 (78.13%)
Class 1: Precision=0.8780, Recall=0.9353, F1=0.9058 | Occurrences=51444 (78.05%)
Class 2: Precision=0.9544, Recall=0.9445, F1=0.9494 | Occurrences=52716 (79.98%)
Class 3: Precision=0.8615, Recall=0.8671, F1=0.8643 | Occurrences=37491 (56.88%)
Class 4: Precision=0.6101, Recall=0.2328, F1=0.3370 | Occurrences=5706 (8.66%)
Class 5: Precision=0.3984, Recall=0.1122, F1=0.1750 | Occurrences=9328 (14.15%)
Class 6: Precision=0.4644, Recall=0.1633, F1=0.2416 | Occurrences=7436 (11.28%)
Class 7: Precision=0.9891, Recall=0.9691, F1=0.9790 | Occurrences=56208 (85.28%)
