In [1]:
from keras.models import load_model
from tensorflow.keras.models import Model
import matplotlib.pylab as plt
import librosa 
import numpy as np
from glob import glob 
import librosa.display 
import IPython.display as ipd
from matplotlib.backends.backend_agg import FigureCanvasAgg
from PIL import Image
from scipy.spatial.distance import cosine
from pathlib import Path


def song_to_spectrogram(song_path,length_sec=30,save_image=True, save_name="spectrogram.png"):

    y, sr = librosa.load(song_path)
    y=y[:sr*length_sec  ]

    S = librosa.feature.melspectrogram(y=y, sr=sr,hop_length=512)
    S_DB = librosa.amplitude_to_db(S, ref=np.max)

    # Desired width in pixels
    desired_width = 336

    # Calculate the DPI needed to achieve the desired width
    dpi = int(desired_width / plt.figure(figsize=(desired_width / 80, 4)).get_figwidth())
    #plt.clf()

    width=desired_width / dpi
    fig=plt.figure(figsize=(width, width/3*2))
    librosa.display.specshow(S_DB, sr=sr,hop_length=512,
                             x_axis='time', y_axis='mel')
    plt.gca().set_axis_off()
    #plt.colorbar()
    #plt.savefig("spectrogram.png", bbox_inches='tight', pad_inches=0, transparent=False)
    #plt.title("Mel spectrogram", fontsize=20)
    #plt.show()
    canvas = FigureCanvasAgg(fig)
    canvas.draw()
    image_array = np.frombuffer(canvas.tostring_rgb(), dtype='uint8')
    image_array = image_array.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    image = Image.fromarray(image_array)

    # Define the new size
    #new_size = (432, 288)  # Change this to your desired dimensions
    new_size = (129, 128)
    
    # Resize the image
    resized_image = image.resize(new_size, Image.LANCZOS)
    resized_image_data = np.array(resized_image)[:,:,:3]
    resized_image_data  = np.mean(resized_image_data, axis=2)
    #resized_image.show()  # Opens the image using the default viewer

    plt.close(fig)
    
    if save_image:
        resized_image.save(save_name)
        print("spectrogram saved at "+save_name)

    return resized_image_data

def create_spectrograms_for_dataset(dataset_dir, save_dir):
    songs = glob(dataset_dir + '*.wav')

    for s in songs:
        track_id = Path(s).stem.split("__")[1]
        save_path = save_dir+track_id+".png"
        song_to_spectrogram(s,length_sec=30,save_image=True, save_name=save_path)
        

#dataset_dir = "Datasets/final_playlist/" 
#save_dir = "Datasets/final_spectrograms/"
#create_spectrograms_for_dataset(dataset_dir, save_dir)

2023-11-12 08:27:04.483992: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
base_cnn_model = load_model('/Users/chatsam/Chatura/Umass/HackUmass/CNN/Music-Genre-Classification-GTZAN/Music Genre Classification/models/custom_cnn_2d_78.h5')
final_dense_layer_output = base_cnn_model.get_layer('dense').output
embedding_model = Model(inputs=base_cnn_model.input, outputs=final_dense_layer_output)

2023-11-12 08:48:03.205821: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [15]:
from scipy.spatial.distance import cosine

def embed_distance_cosine_sim (embed1, embed2):
    cosine_sim = 1 - cosine(embed1.flatten(), embed2.flatten()) 
    return cosine_sim

In [None]:
# def calculate_distance_top_k(embedding_model, spectrogram_path, reference_song_path, top_k):
#     def embed_distance_cosine_sim (embed1, embed2):
#         cosine_sim = 1 - cosine(embed1.flatten(), embed2.flatten()) 
#         return cosine_sim

#     embeddings_list = list()
#     spectrogram_path = 'Datasets/final_spectrograms/'
#     image_list = glob(spectrogram_path+"*.png")
#     track_ids = [Path(x).stem for x in image_list]

#     reference_spec = song_to_spectrogram(reference_song_path,length_sec=30,save_image=False)
#     reference_embed = embedding_model.predict(reference_spec.reshape(-1, 128, 129,1))

#     for img_path in image_list:
#         spec = np.array(Image.open(img_path))
#         embeds = embedding_model.predict(spec.reshape(-1, 128, 129,1))
#         cosine_sim_score = embed_distance_cosine_sim(reference_embed, embeds)
#         embeddings_list.append(cosine_sim_score)



In [27]:

from sklearn.preprocessing import normalize
from glob import glob
from pathlib import Path
from PIL import Image
import numpy as np
import subprocess


def precompute_embeddings(embedding_model, spectrogram_path):
    embeddings_list = []
    track_ids = []

    # Process all spectrogram images
    image_list = glob(spectrogram_path + "*.png")
    for img_path in image_list:
        spec = np.array(Image.open(img_path))
        embeds = embedding_model.predict(spec.reshape(-1, 128, 129, 1)).flatten()
        embeddings_list.append(embeds)
        track_ids.append(Path(img_path).stem)

    # Normalize embeddings
    normalized_embeddings = normalize(np.array(embeddings_list))

    return normalized_embeddings, track_ids

def find_top_k_similar_songs(embedding_model, reference_song_path, normalized_embeddings, track_ids, top_k):
    # Process reference song
    reference_spec = song_to_spectrogram(reference_song_path,length_sec=30,save_image=False)
    reference_embed = embedding_model.predict(reference_spec.reshape(-1, 128, 129, 1)).flatten()

    # Normalize reference embedding
    normalized_reference_embed = normalize(reference_embed.reshape(1, -1))

    # Calculate cosine similarity scores
    similarity_scores = np.dot(normalized_embeddings, normalized_reference_embed.T).flatten()

    # Get top K similar tracks (excluding the reference track itself)
    top_indices = np.argsort(similarity_scores)[-top_k - 1:][::-1]
    top_similar_tracks = [(track_ids[i], similarity_scores[i]) for i in top_indices if track_ids[i] != Path(reference_song_path).stem]

    return top_similar_tracks[:top_k]


def download_spotify_track(url, output_dir):
    try:
        # Command split into parts
        command = ['spotify_dl', '-l', url, '-o', output_dir]
        
        # Run the command
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        # Check for errors
        if result.returncode != 0:
            return f"Error: {result.stderr}"
        
        
    except Exception as e:
        return str(e)

    save_path = glob(output_dir+"*/*.wav")[0]

    return save_path


def initialize_model(spec_path='Datasets/final_spectrograms/'):
    # Example usage
    normalized_embeddings, track_ids = precompute_embeddings(embedding_model, spec_path)
    return normalized_embeddings, track_ids


def get_scores(ref_track_id, normalized_embeddings, track_ids, top_k=3):
    reference_song_path = 'path_to_reference_song.png'
    ref_url = 'https://open.spotify.com/track/'+ ref_track_id
    reference_song_path = download_spotify_track(ref_url, output_dir='./'+ref_track_id)

    top_k_tracks = find_top_k_similar_songs(embedding_model, reference_song_path, normalized_embeddings, track_ids, top_k)
    return top_k_tracks


spec_path = 'Datasets/final_spectrograms/'

spectrogram saved at Datasets/final_spectrograms/6ebcJ4agTdGzmfpXHnedY6.png


<Figure size 420x400 with 0 Axes>

In [25]:
tp = glob('/Users/chatsam/Chatura/Umass/HackUmass/Bkup_test_data/*/*.mp3')

In [26]:
tp

['/Users/chatsam/Chatura/Umass/HackUmass/Bkup_test_data/test/0_ref_piano.mp3',
 '/Users/chatsam/Chatura/Umass/HackUmass/Bkup_test_data/test/1_ref_song_malone.mp3']