In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from pathlib import Path

2025-05-13 22:03:57.831708: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-13 22:03:57.871449: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747188237.892003  723686 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747188237.898222  723686 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747188237.929459  723686 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
def combined_mse_cosine_loss(y_true, y_pred):
    mse = tf.reduce_mean(tf.square(y_true - y_pred))
    y_true_norm = tf.nn.l2_normalize(y_true, axis=1)
    y_pred_norm = tf.nn.l2_normalize(y_pred, axis=1)
    cosine_loss = 1 - tf.reduce_mean(tf.reduce_sum(y_true_norm * y_pred_norm, axis=1))
    return mse + 0.3 * cosine_loss

In [7]:
model_path = "models/custom_cnn.keras"
new_spectrogram_path = "spectrogram/test_data/30_60_spectrogram_win_length=2048_hop_length=512_n_fft=2048.png"
FEATURE_NAMES = [
    "acousticness", "instrumentalness", "liveness", "speechiness",
    "danceability", "energy", "tempo", "valence"
]

if len(FEATURE_NAMES) != 8:
    raise ValueError("FEATURE_NAMES list must contain exactly 8 names.")

In [8]:
# 1. Load the trained model with the custom loss function
print(f"Loading model from: {model_path}")
# Ensure model_path is a string or Path object correctly pointing to your model
# from pathlib import Path # if you want to use Path objects
# model_path_obj = Path(model_path)
# if not model_path_obj.exists():
#     raise FileNotFoundError(f"Model file not found at {model_path}")

try:
    # It's good practice to clear session in notebooks if re-running model related code
    tf.keras.backend.clear_session()

    model = load_model(
        model_path,
        custom_objects={'combined_mse_cosine_loss': combined_mse_cosine_loss}
    )
    print("Model loaded successfully.")
    model.summary()  # Optional: print model summary
except Exception as e:
    print(f"Error loading model: {e}")
    # In a notebook, you might want to raise the exception to stop execution
    raise

Loading model from: models/custom_cnn.keras
Model loaded successfully.


In [11]:
def load_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, (int(984 / 3), int(2385 / 3)))
    image = tf.keras.applications.resnet50.preprocess_input(image)
    image = tf.expand_dims(image, axis=0)
    return image

In [12]:
# Load image
image = load_image(new_spectrogram_path)

# Make predictions
print("\nMaking prediction...")
try:
    predictions = model.predict(image)

    # predictions will be a numpy array like [[feat1, feat2, ..., feat8]]
    predicted_features = predictions[0]

    # Print the results
    print("\nPredicted Audio Features:")
    if len(predicted_features) == len(FEATURE_NAMES):
        for name, value in zip(FEATURE_NAMES, predicted_features):
            print(f"- {name}: {value:.4f}")
    else:
        print("Warning: Number of predicted features does not match FEATURE_NAMES length.")
        print("Raw predictions:", predicted_features)

except Exception as e:
    print(f"Error during prediction: {e}")
    # In a notebook, you might want to raise the exception
    raise


Making prediction...


I0000 00:00:1747188632.372847  723881 service.cc:152] XLA service 0x7f40ac0050a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747188632.372915  723881 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6
2025-05-13 22:10:32.391227: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1747188632.417852  723881 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 944ms/step

Predicted Audio Features:
- acousticness: 0.6096
- instrumentalness: 0.4616
- liveness: 0.4695
- speechiness: 0.5778
- danceability: 0.1694
- energy: 0.0829
- tempo: 0.3810
- valence: 0.4230


I0000 00:00:1747188633.245478  723881 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
