In [1]:
import numpy as np      
import matplotlib.pyplot as plt 
import scipy.io.wavfile 
import subprocess
import librosa
import librosa.display
import IPython.display as ipd

from pathlib import Path, PurePath   
from tqdm.notebook import tqdm

## Utility functions

In [2]:
def convert_mp3_to_wav(audio:str) -> str:  
    """Convert an input MP3 audio track into a WAV file.

    Args:
        audio (str): An input audio track.

    Returns:
        [str]: WAV filename.
    """
    if audio[-3:] == "mp3":
        wav_audio = audio[:-3] + "wav"
        if not Path(wav_audio).exists():
                subprocess.check_output(f"ffmpeg -i {audio} {wav_audio}", shell=True)
        return wav_audio
    
    return audio

def plot_spectrogram_and_picks(track:np.ndarray, sr:int, peaks:np.ndarray, onset_env:np.ndarray) -> None:
    """[summary]

    Args:
        track (np.ndarray): A track.
        sr (int): Aampling rate.
        peaks (np.ndarray): Indices of peaks in the track.
        onset_env (np.ndarray): Vector containing the onset strength envelope.
    """
    times = librosa.frames_to_time(np.arange(len(onset_env)),
                            sr=sr, hop_length=HOP_SIZE)

    plt.figure()
    ax = plt.subplot(2, 1, 2)
    D = librosa.stft(track)
    librosa.display.specshow(librosa.amplitude_to_db(np.abs(D), ref=np.max),
                            y_axis='log', x_axis='time')
    plt.subplot(2, 1, 1, sharex=ax)
    plt.plot(times, onset_env, alpha=0.8, label='Onset strength')
    plt.vlines(times[peaks], 0,
            onset_env.max(), color='r', alpha=0.8,
            label='Selected peaks')
    plt.legend(frameon=True, framealpha=0.8)
    plt.axis('tight')
    plt.tight_layout()
    plt.show()

def load_audio_picks(audio, duration, hop_size):
    """[summary]

    Args:
        audio (string, int, pathlib.Path or file-like object): [description]
        duration (int): [description]
        hop_size (int): 

    Returns:
        tuple: Returns the audio time series (track) and sampling rate (sr), a vector containing the onset strength envelope
        (onset_env), and the indices of peaks in track (peaks).
    """
    try:
        track, sr = librosa.load(audio, duration=duration)
        onset_env = librosa.onset.onset_strength(track, sr=sr, hop_length=hop_size)
        peaks = librosa.util.peak_pick(onset_env, 10, 10, 10, 10, 0.5, 0.5)
    except Error as e:
        print('An error occurred processing ', str(audio))
        print(e)

    return track, sr, onset_env, peaks
    
    

## Settings

In [3]:
N_TRACKS = 1413
HOP_SIZE = 512
DURATION = 30 # TODO: to be tuned!
THRESHOLD = 5 # TODO: to be tuned!

In [27]:
data_folder = Path("./data/mp3s-32k/")
mp3_tracks = data_folder.glob("*/*/*.mp3")
tracks = data_folder.glob("*/*/*.wav")

## Preprocessing

In [5]:
def preprocessing(mp3_tracks):
    for track in tqdm(mp3_tracks, total=N_TRACKS):
        convert_mp3_to_wav(str(track))

In [6]:
#preprocessing(mp3_tracks)

## Audio signals

In [7]:
"""for idx, audio in enumerate(tracks):
    if idx >= 1:
        break
    track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)
    plot_spectrogram_and_picks(track, sr, peaks, onset_env)
        
        """

'for idx, audio in enumerate(tracks):\n    if idx >= 1:\n        break\n    track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)\n    plot_spectrogram_and_picks(track, sr, peaks, onset_env)\n        \n        '

## Minhash

In [8]:
# TODO

In [9]:
from bitstring import BitArray
import pandas as pd

In [10]:
def timeOfPeaks(peaks, times):
    timesPeaks = []
    
    for i in peaks:
        timesPeaks.append(times[i])
    
    return timesPeaks   

In [11]:
def fibonacci_hash_float(value:float):
    
    value = BitArray(float=value, length=64)
    phi = (1 + 5 ** 0.5) / 2
    g = int(2 ** 64 /phi)
    

    value ^= value >> 61
    value = int(g * value.float * np.random.random_sample())
 
    return value

In [12]:
def hash_2(vet):
    out = 0
    for el in vet:
        out ^= el
        
    out = fibonacci_hash_float(out)
        
    return int(str(out)[:15])

In [13]:
def minhash(freqs, times, threshold = None):
    old_threshold = threshold
    picks = []
    out = []
    f = 0
    
    for i in range(0,threshold):
        picks = []
        for fr, tm in zip(freqs,times):
            picks.append(fibonacci_hash_float(fr) ^ fibonacci_hash_float(tm))
        
        out.append(np.min(picks))
        


    
    return out    

In [36]:
def make_signatureMatrix(songs):
    sig_matrix = []
    for freq, tm in songs:
        sig_matrix.append(minhash(freq,tm, 5))
        
    return np.array(sig_matrix).T

In [15]:
def lsh(sig_matrix, b=2):

    band_numb = sig_matrix.shape[0]//b
    bucket_matrix = []

    for i in range(0,band_numb):

        band = m[b*i:b*(i+1)]
        
        bucket_row = defaultdict(lambda: 0)
        
        for j in range(band.shape[1]):
            
            bucket_row[hash_2(band[:,j])] += 1

        bucket_matrix.append(bucket_row.values())


    return np.array(bucket_matrix)


In [16]:
def guess_song(song):
    hashtable = pd.read_csv("./data/hashtable.csv", index_col="hashid")
    
    #get the frequencies and time of picks 
    track, sr, onset_env, peaks = load_audio_picks(song, DURATION, HOP_SIZE)
    times = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr, hop_length=HOP_SIZE)
    timesPeaks = timeOfPeaks(peaks, times)
    freqsP = [onset_env[i] for i in peaks]
    
    #do the minhash of the song
    h = minhash(freqsP, timesPeaks, THRESHOLD, DURATION)
    
    return (hashtable.loc[h][0])

In [17]:
def make_hashtable():
    hashtable = []
    names = []
    data = pd.DataFrame()
    for idx, audio in tqdm(enumerate(tracks), total = N_TRACKS):
        names.append(audio.name)
        track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)
        timess = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr, hop_length=HOP_SIZE)

        timesPeaks = timeOfPeaks(peaks, timess)
        freqsP = [onset_env[i] for i in peaks]


        hashtable.append(minhash(freqsP, timesPeaks, THRESHOLD, DURATION))

    data['hashid'] = hashtable
    data['name'] = names
    data.to_csv('data/hashtable.csv', index=False)

In [18]:
#make_hashtable()

In [19]:
#data = pd.read_csv("./data/hashtable.csv", index_col="hashid")

Lets try with some test...

In [28]:
songs = []
for idx, audio in tqdm(enumerate(tracks), total = N_TRACKS):
    if(idx > 4):
        break
    track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)
    timess = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr, hop_length=HOP_SIZE)

    timesPeaks = timeOfPeaks(peaks, timess)
    freqsP = [onset_env[i] for i in peaks]

    songs.append((freqsP, timesPeaks))


  0%|          | 0/1413 [00:00<?, ?it/s]

In [37]:
sg_m = make_signatureMatrix(songs)

In [38]:
sg_m

array([[2599823476760257536, 5235882738895912960, 1519980343989952848,
          14138150187448320, 7810486529529361408],
       [4770035240315681280,  829978137479610368, 2290948224215203840,
        2340465178254475264, 1929932249329175040],
       [2277858609715068928, 1029899941942870016,  697980105098465280,
         474210908033249280,  225901381859167232],
       [3099639594430160896,  180005304757567488, 5722511343032594432,
        1124642358261370880, 2470612548683230208],
       [1642892077528174592,  350410050667245568, 4470789329091936256,
        3820825539325075456, 3023336120105865216]])

# Query test

take the fisrt query

In [20]:
"""audio = 'data/queries/track3.wav'"""

"audio = 'data/queries/track3.wav'"

make the hashmin of the song

In [21]:
"""track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)
timess = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr, hop_length=HOP_SIZE)
timesPeaks = timeOfPeaks(peaks, timess)
freqsP = [onset_env[i] for i in peaks]
    
h = minhash(freqsP, timesPeaks, THRESHOLD, DURATION)
h"""

'track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)\ntimess = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr, hop_length=HOP_SIZE)\ntimesPeaks = timeOfPeaks(peaks, timess)\nfreqsP = [onset_env[i] for i in peaks]\n    \nh = minhash(freqsP, timesPeaks, THRESHOLD, DURATION)\nh'

lets see if it match something...

In [22]:
"""guess_song(audio)"""

'guess_song(audio)'

In [23]:
"""data_folder2 = Path("./data/queries/")
query_tracks = data_folder2.glob("./*.wav")
get = 0
miss = 0
for query in query_tracks:
    print("\nCurrent query: " + str(query) + "\n")
    try:
        print(guess_song(query))
        get += 1
    except KeyError:
        print("Not matched!")
        miss += 1
    print("\n===========================================\n")
    
print("Song matched: " + str(get) + "  Song missed: " + str(miss))"""

