In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gc
import h5py
from collections import Counter

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Bidirectional, LSTM, Dense, Dropout, Activation, Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from sklearn.utils.class_weight import compute_class_weight

2025-03-20 15:29:15.818561: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-20 15:29:16.057275: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742484556.144359   97464 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742484556.169528   97464 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-20 15:29:16.388374: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [3]:
# paths
train_path = "IsoDatasetPro/Train"
val_path = "IsoDatasetPro/Val"
test_path = "IsoDatasetPro/Test"

num_classes = {"root": 14, "bass": 14, "triad": 8, "fourth": 6}

In [4]:
# Data generating functions
def load_train():
    with h5py.File(os.path.join(train_path, 'train.h5'), "r") as f:
        dataset = f['X_train'][:]
        root = f['root'][:]
        bass = f['bass'][:]
        triad = f['triad'][:]
        fourth = f['fourth'][:]
        return dataset, root, bass, triad, fourth

def load_val():
    with h5py.File(os.path.join(val_path, 'val.h5'), "r") as f:
        dataset = f['X_val'][:]
        root = f['root'][:]
        bass = f['bass'][:]
        triad = f['triad'][:]
        fourth = f['fourth'][:]
        return dataset, root, bass, triad, fourth

def load_test():
    with h5py.File(os.path.join(test_path, 'test.h5'), "r") as f:
        dataset = f['X_test'][:]
        root = f['root'][:]
        bass = f['bass'][:]
        triad = f['triad'][:]
        fourth = f['fourth'][:]
        return dataset, root, bass, triad, fourth

# Initialize batch size
batch_size = 64



In [5]:
result = load_train()
X_train = result[0]
root_train = result[1]
bass_train = result[2]
triad_train = result[3]
fourth_train = result[4]
gc.collect()
del result

result = load_val()
X_val = result[0]
root_val = result[1]
bass_val = result[2]
triad_val = result[3]
fourth_val = result[4]
gc.collect()
del result

result = load_test()
X_test = result[0]
root_test = result[1]
bass_test = result[2]
triad_test = result[3]
fourth_test = result[4]
gc.collect()
del result

In [6]:
# Convert one-hot encoded labels to integer class labels
triad_train_labels = np.argmax(triad_train, axis=2).flatten()

# print(triad_train[0][0])
# Count occurrences of each triad class
class_counts = Counter(triad_train_labels)

# Manually add the 8th class ('X') if it's not present
if 7 not in class_counts:
    class_counts[7] = 0  # Add class 'X' (index 7) with a count of 0

# Display the counts
print("Class counts:", class_counts)

# Total number of samples
total_samples = sum(class_counts.values())

# Print the total number of samples
print(f"Total samples: {total_samples}")

# Compute class weights, set a default value for classes with zero occurrences
triad_class_weights = {
    class_label: (total_samples / (len(class_counts) * count)) if count > 0 else 0
    for class_label, count in class_counts.items()
}

# Print the class weights
print("Class Weights:", triad_class_weights)

def weighted_kl_divergence(class_weights):
    # Convert the dictionary to a tensor
    class_values = list(class_weights.values())  # Get the weights for those indices
    class_weights_tensor = tf.convert_to_tensor(class_values, dtype=tf.float32)

    def loss(y_true, y_pred):
        # Define output names
        output_names = ['root_output', 'bass_output', 'triad_output', 'fourth_output']
        
        # Initialize loss collection
        losses = []

        for i, output_name in enumerate(output_names):
            y_true_output = y_true[:, i]
            y_pred_output = y_pred[:, i]

            if output_name == 'triad_output':  # Apply weighted KL divergence only to triad output
                kl_loss = tf.keras.losses.KLDivergence()(y_true_output, y_pred_output)
                weighted_kl_loss = kl_loss * class_weights_tensor[i]  # Apply class weight
                losses.append(weighted_kl_loss)
            else:
                # Use a different loss function (e.g., categorical cross-entropy) for the other outputs
                ce_loss = tf.keras.losses.CategoricalCrossentropy()(y_true_output, y_pred_output)
                losses.append(ce_loss)

        # Return the total loss
        return tf.reduce_sum(losses)

    return loss




Class counts: Counter({np.int64(0): 815898, np.int64(1): 200452, np.int64(6): 144455, np.int64(2): 9480, np.int64(5): 8586, np.int64(3): 5609, np.int64(4): 3220, 7: 0})
Total samples: 1187700
Class Weights: {np.int64(6): 1.0277422034543628, np.int64(0): 0.1819620834957311, np.int64(5): 17.29122990915444, np.int64(1): 0.7406386566360026, np.int64(3): 26.46862185772865, np.int64(2): 15.660601265822784, np.int64(4): 46.10636645962733, 7: 0}


In [7]:
# BI LSTM MODEL
# Learning rate decay after every 10 epochs
def lr_scheduler(epoch, lr):
    if epoch % 10 == 0 and epoch != 0:
        return lr * 0.8  # Decrease by 20%
    return lr

lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

# # Custom Kullback-Leibler Divergence loss function
# def custom_kl_divergence(y_true, y_pred):
#     # temperature = 0.6  # Control the softness
#     # y_pred = tf.nn.softmax(y_pred / temperature)
#     return tf.keras.losses.KLDivergence()(y_true, y_pred)
    



# Define input shape
input_shape = (300, 192)  # 192 cqt bins

# Input layer
input_layer = Input(shape=input_shape)

# Dense Projection
x = Dense(192, activation='relu')(input_layer)

# BiLSTM 1
x1 = Bidirectional(LSTM(96, return_sequences=True))(x)

# BiLSTM 2
x2 = Bidirectional(LSTM(96, return_sequences=True))(x1)

# Residual Skip Connection
x = Add()([x1, x2])

# Dense Layer
x = Dense(128, activation='sigmoid')(x)

# Outputs
root_output = Dense(14, activation='softmax', name="root_output")(x)
bass_output = Dense(14, activation='softmax', name="bass_output")(x)
triad_output = Dense(8, activation='softmax', name="triad_output")(x)
fourth_output = Dense(6, activation='softmax', name="fourth_output")(x)

# # Full Chord Classification Layer
# chord_class_output = Dense(num_classes, activation="softmax", name="chord_class_output")(x)


# Define model
model = Model(inputs=input_layer, outputs=[root_output, bass_output, triad_output, fourth_output])

# Loss Dictionary
losses = {
    'root_output': 'categorical_crossentropy',
    'bass_output': 'categorical_crossentropy',
    'triad_output': weighted_kl_divergence(triad_class_weights),
    'fourth_output': 'categorical_crossentropy'
    # ,'chord_class_output': 'categorical_crossentropy'
}

# Loss Weights (focus more on triad and fourth)
loss_weights = {
    "root_output": 1.5,
    "bass_output": 1.0,
    "triad_output": 4.0,
    "fourth_output": 1.0
    # ,"chord_class_output": 4.0
}

class_weights = {
    'triad_output': triad_class_weights  # 'triad_output' is the name of the triad classification output
}

# Compile model
model.compile(optimizer=Adam(learning_rate=0.0003),
              loss=losses,
              loss_weights=loss_weights,
              metrics = {
    "root_output": ["accuracy"], 
    "bass_output": ["accuracy"], 
    "triad_output": ["accuracy"], 
    "fourth_output": ["accuracy"]
    # ,"chord_class_output": ["accuracy"]
}
)

# Learning Rate Callback
lr_callback = LearningRateScheduler(lr_scheduler)

# Model summary
model.summary()


W0000 00:00:1742484581.201776   97464 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [8]:
# Initialize model save path
model_save_path = "accordo_ai_model_CQT.keras"

# # Early Stopping Callback
# early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


print("X_train shape:", X_train.shape)
print("root_train shape:", root_train.shape)
print("bass_train shape:", bass_train.shape)
print("triad_train shape:", triad_train.shape)
print("fourth_train shape:", fourth_train.shape)

print("X_val shape:", X_val.shape)
print("root_val shape:", root_val.shape)
print("bass_val shape:", bass_val.shape)
print("triad_val shape:", triad_val.shape)
print("fourth_val shape:", fourth_val.shape)

class ValidationAccuracyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        print(f"\nEpoch {epoch + 1} Validation Accuracies:")
        print(f"  Root Val Acc: {logs.get('val_root_output_accuracy', 0) * 100:.2f}%")
        print(f"  Bass Val Acc: {logs.get('val_bass_output_accuracy', 0) * 100:.2f}%")
        print(f"  Triad Val Acc: {logs.get('val_triad_output_accuracy', 0) * 100:.2f}%")
        print(f"  Fourth Val Acc: {logs.get('val_fourth_output_accuracy', 0) * 100:.2f}%")
        print("-" * 50)



# Train the model
history = model.fit(
    X_train,
    {"root_output": root_train, 
     "bass_output": bass_train, 
     "triad_output": triad_train, 
     "fourth_output": fourth_train},
    validation_data=(X_val, 
                     {"root_output": root_val, 
                      "bass_output": bass_val, 
                      "triad_output": triad_val, 
                      "fourth_output": fourth_val}),
    epochs=50, 
    batch_size=32,
    verbose=1,
    callbacks=[lr_callback, ValidationAccuracyCallback()]
)


# # Get the best epoch
# best_epoch = early_stopping.stopped_epoch - early_stopping.patience + 1
# print(f"\n\nHighest accuracy achieved at epoch: {best_epoch}")

# Evaluate the model on the test data
test_result = model.evaluate(
    X_test, 
    {"root_output": root_test, 
     "bass_output": bass_test, 
     "triad_output": triad_test, 
     "fourth_output": fourth_test},
)


test_root_acc = test_result[1] * 100
test_bass_acc = test_result[2] * 100
test_triad_acc = test_result[3] * 100
test_fourth_acc = test_result[4] * 100

print(f"\n\nTest Accuracy - Root: {test_root_acc:.2f}%, Bass: {test_bass_acc:.2f}%, Triad: {test_triad_acc:.2f}%, Fourth: {test_fourth_acc:.2f}%")

# Plot the accuracy curve
plt.plot(history.history['val_root_output_accuracy'], label='Root Accuracy')
plt.plot(history.history['val_bass_output_accuracy'], label='Bass Accuracy')
plt.plot(history.history['val_triad_output_accuracy'], label='Triad Accuracy')
plt.plot(history.history['val_fourth_output_accuracy'], label='Fourth Accuracy')
plt.title('Model Accuracy')

plt.xlabel('Epochs')
plt.ylabel('Validation Accuracy')
plt.legend()
plt.grid(True)
plt.show()


X_train shape: (3959, 300, 192)
root_train shape: (3959, 300, 14)
bass_train shape: (3959, 300, 14)
triad_train shape: (3959, 300, 8)
fourth_train shape: (3959, 300, 6)
X_val shape: (1077, 300, 192)
root_val shape: (1077, 300, 14)
bass_val shape: (1077, 300, 14)
triad_val shape: (1077, 300, 8)
fourth_val shape: (1077, 300, 6)
Epoch 1/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283ms/step - bass_output_accuracy: 0.0674 - bass_output_loss: 2.7362 - fourth_output_accuracy: 0.2525 - fourth_output_loss: 2.1581 - loss: 111.1676 - root_output_accuracy: 0.0908 - root_output_loss: 2.6799 - triad_output_accuracy: 0.6097 - triad_output_loss: 25.5634
Epoch 1 Validation Accuracies:
  Root Val Acc: 11.59%
  Bass Val Acc: 12.72%
  Triad Val Acc: 65.86%
  Fourth Val Acc: 83.67%
--------------------------------------------------
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 352ms/step - bass_output_accuracy: 0.0675 - bass_output_loss: 2.7356 - fourth_output

KeyboardInterrupt: 

In [None]:
model.save(model_save_path)
print("Model saved! New best accuracy recorded.")

Model saved! New best accuracy recorded.
