In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
import pickle
import json
import random
import music_tag

from utils.music_utils import *

from pathlib import Path
import numpy as np

from utils.extract_openl3_embeddings import EmbeddingsOpenL3
from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D

import tensorflow as tf
tf.config.run_functions_eagerly(True)

os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
   tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
   print("No GPU found. Please ensure you have installed TensorFlow correctly")
print("Num GPUs:", len(physical_devices))

[   INFO   ] MusicExtractorSVM: no classifier models were configured by default


Num GPUs: 1


In [2]:
DATASET = Path("MegaSet")
CLASSES = "utils/mtg_jamendo_genre.json"

embedding_87_model = TensorflowPredictEffnetDiscogs(graphFilename="utils/discogs-effnet-bs64-1.pb", output="PartitionedCall:1")
prediction_87_model = TensorflowPredict2D(graphFilename="utils/mtg_jamendo_genre-discogs-effnet-1.pb")

embedding_512_model = EmbeddingsOpenL3("utils/openl3-music-mel128-emb512-3.pb")

[   INFO   ] TensorflowPredict: Successfully loaded graph file: `utils/discogs-effnet-bs64-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `utils/mtg_jamendo_genre-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `utils/openl3-music-mel128-emb512-3.pb`


In [3]:
random_mp3 = pick_random_mp3(DATASET)
if check_predictions_87(random_mp3):
    print(str(random_mp3)[8:], get_top_5_genres(random_mp3, CLASSES))
else:
    print("No predictions for\n")
    print_info(random_mp3)

Mass Hysteria/Mass Hysteria - Live At Montréal/04 - Donnez Vous La Peine.mp3 ['rock', 'electronic', 'metal', 'industrial', 'alternative']


In [4]:
def extract_predictions_87(mp3_file_path, embedding_model, prediction_model):
    try:
        audio = MonoLoader(filename=mp3_file_path, sampleRate=16000, resampleQuality=2)()
        embeddings = embedding_model(audio)
        predictions = prediction_model(embeddings)
        averaged_predictions = np.mean(predictions, axis=0)
        return averaged_predictions
    except Exception as e:
        print(f"Error extracting predictions from {mp3_file_path}: {e}")
        return None

In [5]:
def extract_embeddings_512(file_path, extractor):
    try:
        vector = extractor.compute(file_path)
        embedding = vector.mean(axis=0)
        return embedding
    except Exception as e:
        print(f"Error extracting embeddings from {file_path}: {e}")
        return None

In [6]:
list_of_mp3s = list(DATASET.rglob("*.mp3"))
invalid_files = []
for mp3 in list_of_mp3s:
    if not check_predictions_87(mp3) or not check_embeddings_512(mp3):
        invalid_files.append(mp3)
# Invalid files: 1092
        
print(f"Invalid files: {len(invalid_files)}")

Invalid files: 1019


In [9]:
# update pkl of invalid files

counter = 0

for mp3 in invalid_files:
    
    if not check_predictions_87(mp3):
        predictions = extract_predictions_87(str(mp3), embedding_87_model, prediction_87_model)
        if predictions is not None:
            pkl_path = Path(str(mp3).replace(".mp3", ".pkl"))
            with open(pkl_path, "rb") as f:
                data = pickle.load(f)
            data["predictions_87"] = predictions
            with open(pkl_path, "wb") as f:
                pickle.dump(data, f)

    if not check_embeddings_512(mp3):
        embeddings = extract_embeddings_512(str(mp3), embedding_512_model)
        if embeddings is not None:
            pkl_path = Path(str(mp3).replace(".mp3", ".pkl"))
            with open(pkl_path, "rb") as f:
                data = pickle.load(f)
            data["embedding_512"] = embeddings
            with open(pkl_path, "wb") as f:
                pickle.dump(data, f)
    
    counter += 1
    if counter % (len(invalid_files) // 20) == 0:
        print(f"{counter/len(invalid_files)*100:.2f}% done")

4.91% done
9.81% done
14.72% done
19.63% done
24.53% done
29.44% done
34.35% done
39.25% done
44.16% done
49.07% done
53.97% done
58.88% done
63.79% done
68.69% done
73.60% done
78.51% done
83.42% done
88.32% done
93.23% done
98.14% done


In [10]:
list_of_mp3s = list(DATASET.rglob("*.mp3"))
invalid_files = []
for mp3 in list_of_mp3s:
    if not check_predictions_87(mp3) or not check_embeddings_512(mp3):
        invalid_files.append(mp3)
# Invalid files: 1092
        
print(f"Invalid files: {len(invalid_files)}")

Invalid files: 0
