### Step 1: Import libraries

In [2]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity
import plotly.express as px
import pandas as pd
import json

2024-06-21 01:22:10.131678: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-21 01:22:10.180640: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Step 2: Download model and define paths

Download the model and move to 'model' directory.
This step is not necessary if the model has already been downloaded.

In [None]:
!curl -SLO https://essentia.upf.edu/models/classifiers/genre_tzanetakis/genre_tzanetakis-musicnn-msd-1.json
!curl -SLO https://essentia.upf.edu/models/classifiers/genre_tzanetakis/genre_tzanetakis-musicnn-msd-1.pb

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0^C
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
 39 3166k   39 1237k    0     0   106k      0  0:00:29  0:00:11  0:00:18  111k

In [3]:
MUSICNN_SR = 16000
DATASET_DIR = '../dataset/decades'

In [4]:
MODEL_NAME = '../model/genre_tzanetakis-musicnn-msd-1'
MODEL_JSON = f'{MODEL_NAME}.json'
MODEL_PB = f'{MODEL_NAME}.pb'

musicnn_metadata = json.load(open(MODEL_JSON, 'r'))
for k, v in musicnn_metadata.items():
    print('{}: {}'.format(k , v))

name: genre GTZAN
type: multi-class classifier
link: https://essentia.upf.edu/models/classifiers/genre_tzanetakis/genre_tzanetakis-musicnn-msd-1.pb
version: 1
description: classification of music by genre
author: Pablo Alonso
email: pablo.alonso@upf.edu
release_date: 2020-03-31
framework: tensorflow
framework_version: 1.15.0
classes: ['blu', 'cla', 'cou', 'dis', 'hip', 'jaz', 'met', 'pop', 'reg', 'roc']
model_types: ['frozen_model']
dataset: {'name': 'the GTZAN Genre Collection', 'citation': '@article{tzanetakis2002musical,\n  title={Musical genre classification of audio signals},\n  author={Tzanetakis, George and Cook, Perry},\n  journal={IEEE Transactions on speech and audio processing},\n  volume={10},\n  number={5},\n  pages={293--302},\n  year={2002},\n  publisher={IEEE}\n}', 'size': '1000 track excerpts, 100 per genre', 'metrics': {'5-fold_cross_validation_normalized_accuracy': 0.83}}
schema: {'inputs': [{'name': 'model/Placeholder', 'type': 'float', 'shape': [187, 96]}], 'output

We can observe the output of the penultimate dense layer is proposed as embeddings. 
We will use it to extract songs embeddings from our dataset.

### Step 3: Functions for loading and processing audio

In [5]:
def load_model(pb_file_path):
    with tf.io.gfile.GFile(pb_file_path, "rb") as f:
        graph_def = tf.compat.v1.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.compat.v1.Graph().as_default() as graph:
        tf.import_graph_def(graph_def, name="")
    return graph

def preprocess_audio(file_path, sample_rate=MUSICNN_SR):
    y, sr = librosa.load(file_path, sr=sample_rate)
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=96, n_fft=2048, hop_length=512)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
    if log_mel_spec.shape[1] > 187:
        log_mel_spec = log_mel_spec[:, :187]
    else:
        pad_width = 187 - log_mel_spec.shape[1]
        log_mel_spec = np.pad(log_mel_spec, ((0, 0), (0, pad_width)), mode='constant')
    return log_mel_spec.T[np.newaxis, :, :]

def run_model(graph, input_data):
    input_tensor = graph.get_tensor_by_name('model/Placeholder:0')
    output_tensor = graph.get_tensor_by_name('model/dense/BiasAdd:0')
    with tf.compat.v1.Session(graph=graph) as sess:
        embeddings = sess.run(output_tensor, feed_dict={input_tensor: input_data})
    return embeddings

def extract_mean_embedding(filename):
    audio_data = preprocess_audio(filename)
    graph = load_model(MODEL_PB)
    embeddings = run_model(graph, audio_data)
    return embeddings

def process_dataset(dataset_dir):
    embeddings = []
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.endswith('.mp3'):
                file_path = os.path.join(root, file)
                mean_embedding = extract_mean_embedding(file_path)
                embeddings.append((file_path, mean_embedding))
    return embeddings

### Step 4: Process dataset and extract embeddings

In [6]:
embeddings = []

for folder in os.listdir(DATASET_DIR):
    print(folder)
    folder = os.path.join(DATASET_DIR,folder)
    file_embeddings = process_dataset(folder)
    for file_path, embedding in file_embeddings:
        embeddings.append(embedding)

print(len(embeddings))
embeddings = np.array(embeddings)
embeddings_matrix = np.vstack([emb for emb in embeddings]) 
similarities = cosine_similarity(embeddings_matrix)

2010s


2024-06-21 01:22:19.097296: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-21 01:22:19.161700: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2024-06-21 01:22:19.167468: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
  y, sr = librosa.load(file_path, sr=sample_rate)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  

1980s
40


In [7]:
print(similarities.shape)
similarities = similarities.round(5)
print(f'Min value: {similarities.min()}')
print(f'Max value: {similarities.max()}')
df_similarities = pd.DataFrame(similarities)
df_similarities.to_csv('../data/similarity_data_decades.csv', index=False)
fig = px.imshow(similarities, color_continuous_scale='RdYlGn')
fig.show()

(40, 40)
Min value: 0.6236000061035156
Max value: 1.0
