In [117]:
import librosa
from IPython.display import Audio
from scipy import signal
import numpy as np
import glob
from typing import List, Dict, Tuple
from tqdm import tqdm
import pickle
from PIL import Image
from pydub import AudioSegment
from pydub.utils import make_chunks

In [118]:
#
window_length = 2048
sr = 22050
distance_between_samples = 1 / sr  # in secunde
frequencies = np.fft.fftfreq(window_length, distance_between_samples)[:window_length//2+1]  # valorile pozitive ale frequency bin urilor
num_peaks = 15

def create_constellation(spectrogram):
    # audio asta e spectrograma
    # window_length_seconds = 0.5
    # window_length_samples = int(window_length_seconds * sr)
    # window_length_samples += window_length_samples % 2

    # amount_to_pad = window_length_samples - audio.size % window_length_samples
    # song_input = np.pad(audio, (0, amount_to_pad))

    # frequencies, times, stft = signal.stft(audio, sr, nperseg=window_length_samples, nfft=window_length_samples, return_onesided=True)

    constellation = []
    for time, window in enumerate(spectrogram.T):
        # window is the spectrum from the time-th bin
        # we only want real values
        spectrum = abs(window)  # amplitudini, loudness
        peaks, props = signal.find_peaks(spectrum, prominence=0, distance=200)
        # distance - required minimal horizontal distance (>= 1) in samples between neighbouring peaks. Smaller peaks are removed first until the condition is fulfilled for all remaining peaks.
        # prominence - required prominence of peaks. Either a number, None, an array matching x or a 2-element sequence of the former. The first element is always interpreted as the minimal and the second, if supplied, as the maximal required prominence.

        # Returns
        # peaks - ndarray - indices of peaks in x that satisfy all given conditions.
        # properties - dict - a dictionary containing properties of the returned peaks which were calculated as intermediate results during evaluation of the specified conditions

        n_peaks = min(num_peaks, len(peaks))

        largest_peaks = np.argpartition(props["prominences"], -n_peaks)[-n_peaks:]  # ultimele n_peaks elemente vor fi indexes pt prominences cele mai mari

        for peak in peaks[largest_peaks]:
            frequency = frequencies[peak]  # iau frecventa coresp binului de frecventa in care se incadreaza valoarea peak
            constellation.append([time, frequency])

    return constellation

# plt.scatter(*zip(*constellation_map))
# plt.show()

In [119]:
# constellation_map = create_constellation(audio, sr)

In [120]:
def create_hashes (constellation, song_id=None):
    hashes = {}

    upper_frequency = 22050
    frequency_bits = 10

    for index, (first_time, first_freq) in enumerate(constellation):
        for second_time, second_freq in constellation[index : index + 100]:
            difference = second_time - first_time
            if difference <= 1 or difference >= 99:
                continue

            first_binned = first_freq / upper_frequency * (2 ** frequency_bits)
            second_binned = second_freq / upper_frequency * (2 ** frequency_bits)

            hash = int(first_binned) | (int(second_binned) << 10) | (int(difference) << 20)
            hashes[hash] = (first_time, song_id)

    return hashes




In [121]:
# hashes = create_hashes(constellation_map, 0)
# for i, (hash, (time, _)) in enumerate(hashes.items()):
#     if i > 10:
#         break
#     print(f"Hash {hash} occurred at {time}")


In [122]:

# songs = songs[:15]
# country_songs = glob.glob('Data/genres_original/country/*.wav')
# country_songs = country_songs[:15]
# pop_songs = glob.glob('Data/genres_original/pop/*.wav')
# pop_songs = pop_songs[:15]
# print(len(songs))
# songs.extend(country_songs)
# songs.extend(pop_songs)

In [123]:
sr = 22050
songs_indexes = {}
database: Dict[int, List[Tuple[int, int]]] = {}
def load_songs():

    songs = glob.glob('Data/pop+rock+pop_rock+blues_rock/*.[jpPn][npP]*[gG]')
    # Am incarcat spectrogramele din memorie folosind modulul glob.

    for index, filename in enumerate(tqdm(sorted(songs))):
        songs_indexes[index] = filename
        image = Image.open(filename)
        np_image = np.array(image)
        # audio, sr = librosa.load(filename, sr=44100)
        constellation = create_constellation(np_image)
        hashes = create_hashes(constellation, index)

        for hash, time_index_pair in hashes.items():
            if hash not in database:
                database[hash] = []
            database[hash].append(time_index_pair)

load_songs()

100%|██████████| 402/402 [01:33<00:00,  4.29it/s]


In [124]:
with open("database.pickle", 'wb') as db:
    pickle.dump(database, db, pickle.HIGHEST_PROTOCOL)
with open("song_index.pickle", 'wb') as songs:
    pickle.dump(songs_indexes, songs, pickle.HIGHEST_PROTOCOL)

In [125]:
database = pickle.load(open('database.pickle', 'rb'))
song_name_index = pickle.load(open("song_index.pickle", "rb"))

In [126]:
def find_scores(hashes):
    matches_per_song = {}
    for hash, (sample_time, _) in hashes.items():
        if hash in database:
            matching_occurences = database[hash]
            for source_time, song_index in matching_occurences:
                if song_index not in matches_per_song:
                    matches_per_song[song_index] = []
                matches_per_song[song_index].append((hash, sample_time, source_time))


    # %%
    scores = {}
    for song_index, matches in matches_per_song.items():
        song_scores_by_offset = {}
        for hash, sample_time, source_time in matches:
            delta = source_time - sample_time
            if delta not in song_scores_by_offset:
                song_scores_by_offset[delta] = 0
            song_scores_by_offset[delta] += 1

        max = (0, 0)
        for offset, score in song_scores_by_offset.items():
            if score > max[1]:
                max = (offset, score)

        scores[song_index] = max

    # Sort the scores for the user
    scores = list(sorted(scores.items(), key=lambda x: x[1][1], reverse=True))

    return scores

# audio = AudioSegment.from_file(path)
# duration = len(audio)
# third = duration // 3
# third_audio = audio[:third]
# chunk_length_ms = 1000  # 1 second
# chunks = make_chunks(third_audio, chunk_length_ms)
#
# # Convert each chunk to a NumPy array
# np_audio = np.concatenate([np.array(chunk.get_array_of_samples()) for chunk in chunks], dtype=np.float32)


In [129]:
path = "Data/pop+drums+guitar_cut0.1.wav"
audio, srr = librosa.load(path)
spect = librosa.feature.melspectrogram(y=audio, sr=sr, fmin=0.)

def print_top_five(new_audio):
    song_constellation = create_constellation(new_audio)
    song_hashes = create_hashes(song_constellation, None)
    scores = find_scores(song_hashes)

    print(f"Total number of songs in the dataset: {len(scores)}")
    print(f"Recording {path.rsplit('/', 1)[1]}:\n")
    for index, (song_id, score) in enumerate(scores):
        song_name = song_name_index[song_id]
        _, filename = song_name.rsplit('/', 1)
        filename = filename.rsplit('.', 1)[0]
        print(f"{index}. {filename}: Score of {score[1]} at {score[0]}")


print_top_five(spect)



Total number of songs in the dataset: 402
Recording pop+drums+guitar_cut0.1.wav:

0. pop_spec89: Score of 105 at 842
1. pop_spec77: Score of 86 at 844
2. pop_rock_spec19: Score of 83 at 802
3. pop_spec83: Score of 81 at 827
4. pop_spec95: Score of 81 at 748
5. pop_rock_spec36: Score of 80 at 844
6. pop_rock_spec61: Score of 78 at 700
7. rock_spec23: Score of 78 at 903
8. pop_spec99: Score of 77 at 840
9. pop_rock_spec1: Score of 76 at 842
10. blues_rock_spec85: Score of 75 at 821
11. pop0_to_rock_feat-matching: Score of 75 at 0
12. rock_spec49: Score of 73 at 898
13. pop_rock_spec94: Score of 71 at 851
14. pop_rock_spec85: Score of 70 at 949
15. pop_rock_spec86: Score of 70 at 854
16. blues_rock_spec58: Score of 69 at 729
17. pop_rock_spec89: Score of 69 at 841
18. pop_spec65: Score of 69 at 844
19. pop_spec33: Score of 68 at 326
20. pop_spec97: Score of 68 at 860
21. pop_spec29: Score of 65 at 818
22. rock_spec61: Score of 65 at 815
23. pop_spec49: Score of 64 at 831
24. pop_rock_spec