In [13]:
import os
from tqdm import tqdm
import numpy as np
import tensorflow as tf
from pathlib import Path
import pandas as pd
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from sklearn.metrics import mean_squared_error
from tensorflow.keras.backend import clear_session

In [14]:
# Clear any previous session
clear_session()

In [15]:
# Import labels
labels_data = pd.read_csv('data/echonest_norm.csv').values
print(f"Label shape: {labels_data.shape}")

Label shape: (13131, 9)


In [16]:
def attach_label(image_path):
    try:
        image_id = int(image_path.split("/")[-1].split("_")[0])
        label = labels_data[labels_data[:, 0] == image_id, 1:]
        if label.shape[0] == 0:
            print(f"No label found for image {image_id}")
            return None
        return image_path, label.reshape(-1)
    except Exception as e:
        print(f"Error with {image_path}: {e}")
        return None

In [17]:
# Load image paths and labels
data_path = Path("spectrogram")
all_image_paths = sorted(map(str, data_path.glob("**/*.png")))
valid_pairs = list(filter(None, map(attach_label, all_image_paths)))

if len(valid_pairs) == 0:
    raise ValueError("No valid image-label pairs found!")

data_paths, labels = zip(*valid_pairs)

In [18]:
def load_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, (int(984 / 3), int(2385 / 3)))
    image = tf.keras.applications.resnet50.preprocess_input(image)
    return image, label

# Create dataset
dataset = tf.data.Dataset.from_tensor_slices((list(data_paths), list(labels)))
dataset = dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)

# Shuffle and split dataset
train_size = int(0.8 * len(data_paths))
train_dataset = dataset.take(train_size).batch(4).prefetch(tf.data.AUTOTUNE)
val_dataset = dataset.skip(train_size).batch(4).prefetch(tf.data.AUTOTUNE)

In [19]:
def combined_mse_cosine_loss(y_true, y_pred):
    mse = tf.reduce_mean(tf.square(y_true - y_pred))
    y_true_norm = tf.nn.l2_normalize(y_true, axis=1)
    y_pred_norm = tf.nn.l2_normalize(y_pred, axis=1)
    cosine_loss = 1 - tf.reduce_mean(tf.reduce_sum(y_true_norm * y_pred_norm, axis=1))
    return mse + 0.3 * cosine_loss

In [20]:
# Build model
clear_session()
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(int(984 / 3), int(2385 / 3), 3))
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(8, activation='linear')
])

base_model.trainable = False
model.compile(optimizer='adam', loss=combined_mse_cosine_loss, metrics=['mae'])
model.summary()

try:
    with tf.device('/device:GPU:0'):
        model.fit(train_dataset, validation_data=val_dataset, epochs=5)
except Exception as e:
    print(f"Training failed: {e}")
    exit(1)

Epoch 1/5
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 47ms/step - loss: 0.1516 - mae: 0.2327 - val_loss: 0.1069 - val_mae: 0.1881
Epoch 2/5
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 46ms/step - loss: 0.0771 - mae: 0.1681 - val_loss: 0.1218 - val_mae: 0.2005
Epoch 3/5
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 46ms/step - loss: 0.0752 - mae: 0.1660 - val_loss: 0.1168 - val_mae: 0.1940
Epoch 4/5
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 45ms/step - loss: 0.0753 - mae: 0.1661 - val_loss: 0.1310 - val_mae: 0.2068
Epoch 5/5
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 45ms/step - loss: 0.0757 - mae: 0.1665 - val_loss: 0.1326 - val_mae: 0.2081


In [21]:
base_model.trainable = True
for layer in base_model.layers[:100]:
    layer.trainable = False  # Keep early layers frozen
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss=combined_mse_cosine_loss, metrics=['mae'])

try:
    with tf.device('/device:GPU:0'):
        model.fit(train_dataset, validation_data=val_dataset, epochs=10)
except Exception as e:
    print(f"Training failed: {e}")
    exit(1)

Epoch 1/10
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 73ms/step - loss: 0.0836 - mae: 0.1758 - val_loss: 0.1290 - val_mae: 0.2006
Epoch 2/10
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 72ms/step - loss: 0.0702 - mae: 0.1619 - val_loss: 0.1197 - val_mae: 0.1928
Epoch 3/10
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 73ms/step - loss: 0.0618 - mae: 0.1522 - val_loss: 0.1183 - val_mae: 0.1904
Epoch 4/10
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 70ms/step - loss: 0.0576 - mae: 0.1465 - val_loss: 0.1123 - val_mae: 0.1868
Epoch 5/10
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 71ms/step - loss: 0.0528 - mae: 0.1402 - val_loss: 0.1066 - val_mae: 0.1806
Epoch 6/10
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 71ms/step - loss: 0.0499 - mae: 0.1359 - val_loss: 0.1020 - val_mae: 0.1755
Epoch 7/10
[1m2625/2625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [22]:
# Save results
y_test = []
y_pred = []
for images, batch_labels in tqdm(val_dataset):
    y_test.append(batch_labels.numpy())
    y_pred.append(model.predict(images, verbose=0))
y_test = np.concatenate(y_test, axis=0)
y_pred = np.concatenate(y_pred, axis=0)

# Create data frame with interleaved y_test and y_pred columns
data = {}
for i in range(y_test.shape[1]):
    data[f'y_test_{i}'] = y_test[:, i]
    data[f'y_pred_{i}'] = y_pred[:, i]
df = pd.DataFrame(data)
df.to_csv("data/evaluate.csv", index=False)

# Report MSE
mse = mean_squared_error(y_test, y_pred)
print(f"Mean squared error: {mse}")

100%|█████████▉| 656/657 [01:07<00:00, 15.72it/s] 2025-05-13 01:12:09.425915: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
100%|██████████| 657/657 [01:09<00:00,  9.44it/s]

Mean squared error: 0.06595480547465296





In [23]:
# Save the model
model.save("models/resnet50.keras")