In [33]:
import numpy as np      
import matplotlib.pyplot as plt 
import scipy.io.wavfile 
import subprocess
import librosa
import librosa.display
import IPython.display as ipd
import random
import csv
import pickle

from collections import defaultdict
from collections import OrderedDict
from pathlib import Path, PurePath   
from tqdm.notebook import tqdm

## Utility functions

In [2]:
def convert_mp3_to_wav(audio:str) -> str:  
    """Convert an input MP3 audio track into a WAV file.

    Args:
        audio (str): An input audio track.

    Returns:
        [str]: WAV filename.
    """
    if audio[-3:] == "mp3":
        wav_audio = audio[:-3] + "wav"
        if not Path(wav_audio).exists():
                subprocess.check_output(f"ffmpeg -i {audio} {wav_audio}", shell=True)
        return wav_audio
    
    return audio

def plot_spectrogram_and_picks(track:np.ndarray, sr:int, peaks:np.ndarray, onset_env:np.ndarray) -> None:
    """[summary]

    Args:
        track (np.ndarray): A track.
        sr (int): Aampling rate.
        peaks (np.ndarray): Indices of peaks in the track.
        onset_env (np.ndarray): Vector containing the onset strength envelope.
    """
    times = librosa.frames_to_time(np.arange(len(onset_env)),
                            sr=sr, hop_length=HOP_SIZE)

    plt.figure()
    ax = plt.subplot(2, 1, 2)
    D = librosa.stft(track)
    librosa.display.specshow(librosa.amplitude_to_db(np.abs(D), ref=np.max),
                            y_axis='log', x_axis='time')
    plt.subplot(2, 1, 1, sharex=ax)
    plt.plot(times, onset_env, alpha=0.8, label='Onset strength')
    plt.vlines(times[peaks], 0,
            onset_env.max(), color='r', alpha=0.8,
            label='Selected peaks')
    plt.legend(frameon=True, framealpha=0.8)
    plt.axis('tight')
    plt.tight_layout()
    plt.show()

def load_audio_picks(audio, duration, hop_size):
    """[summary]

    Args:
        audio (string, int, pathlib.Path or file-like object): [description]
        duration (int): [description]
        hop_size (int): 

    Returns:
        tuple: Returns the audio time series (track) and sampling rate (sr), a vector containing the onset strength envelope
        (onset_env), and the indices of peaks in track (peaks).
    """
    try:
        track, sr = librosa.load(audio, duration=duration)
        onset_env = librosa.onset.onset_strength(track, sr=sr, hop_length=hop_size)
        peaks = librosa.util.peak_pick(onset_env, 10, 10, 10, 10, 0.5, 0.5)
    except Error as e:
        print('An error occurred processing ', str(audio))
        print(e)

    return track, sr, onset_env, peaks

    

## Settings

In [3]:
N_TRACKS = 1413
HOP_SIZE = 512
DURATION = 30 # TODO: to be tuned!
THRESHOLD = 0 # TODO: to be tuned!

In [35]:
data_folder = Path("D:/ADM/data/mp3s-32k/")
mp3_tracks = data_folder.glob("*/*/*.mp3")
tracks = data_folder.glob("*/*/*.wav")

## Preprocessing

In [5]:
for track in tqdm(mp3_tracks, total=N_TRACKS):
    convert_mp3_to_wav(str(track))

  0%|          | 0/1413 [00:00<?, ?it/s]

## Audio Signals

In [25]:
for idx, audio in enumerate(tracks):
    if idx >= 4:
        break
    track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)
    plot_spectrogram_and_picks(track, sr, peaks, onset_env)
    seed = [1,132,48,962,95,88]
    print(minhash(peaks,DURATION,seed[0]))
        

In [36]:
# creating dictionary
thirty_sec = OrderedDict()
for idx, audio in tqdm(enumerate(tracks),total = N_TRACKS):

    track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)
    thirty_sec[idx] = peaks
with open('thirty_sec.pkl','wb') as f:
    pickle.dump(thirty_sec,f)

  0%|          | 0/1413 [00:00<?, ?it/s]

In [37]:
# loading dict
with open('thirty_sec.pkl','rb') as f:
    thirty_sec = pickle.load(f)

## Minhash

In [38]:
def minhash(peaks,DURATION,seed):
    random.seed(seed)
    coeff=random.sample([i for i in range(3000)],len(peaks))
    val=sum(np.multiply(coeff,peaks))
    bin_=val%1410
    return bin_
            

In [39]:
seeds = [21,123,45,87,656]
# for every hash function
for i in range(len(seeds)):
    bins=defaultdict(list)
    # for every song
    for k, v in thirty_sec.items():
        bins[minhash(v,DURATION,seeds[i])].append(k)
    with open('bins_h({}).pkl'.format(i+1),'wb') as f:
        pickle.dump(bins,f)
    f.close()
    

In [40]:
with open('bins_h(1).pkl','rb') as f:
    bins_1 = pickle.load(f)
with open('bins_h(2).pkl','rb') as f:
    bins_2 = pickle.load(f)
with open('bins_h(3).pkl','rb') as f:
    bins_3 = pickle.load(f)
with open('bins_h(4).pkl','rb') as f:
    bins_4 = pickle.load(f)
with open('bins_h(5).pkl','rb') as f:
    bins_5 = pickle.load(f)

In [44]:
# now that we have the bins, we analyze queries
q_bin=[]
for i in range(len(seeds)):
    track, sr, onset_env, peaks = load_audio_picks('D:/ADM/data/queries/track3.wav', DURATION, HOP_SIZE)
    value = minhash(peaks,DURATION,seeds[i])
    q_bin.append(value)
print("hash della query:",q_bin)
print('canzoni in ogni bin relativo all hash',bins_1[357],bins_2[342],bins_3[1293],bins_4[358],bins_5[1171])


hash della query: [357, 342, 1293, 358, 1171]
canzoni in ogni bin relativo all hash [1367] [548, 1108, 1367] [717, 1244, 1367] [1149, 1367] [1367]
