In [1]:
import numpy as np      
import matplotlib.pyplot as plt 
import scipy.io.wavfile 
import subprocess
import librosa
import librosa.display
import IPython.display as ipd
import pandas as pd

from pathlib import Path, PurePath   
from tqdm.notebook import tqdm

In [2]:
def convert_mp3_to_wav(audio:str) -> str:  
    """Convert an input MP3 audio track into a WAV file.

    Args:
        audio (str): An input audio track.

    Returns:
        [str]: WAV filename.
    """
    if audio[-3:] == "mp3":
        wav_audio = audio[:-3] + "wav"
        if not Path(wav_audio).exists():
                subprocess.check_output(f"ffmpeg -i {audio} {wav_audio}", shell=True)
        return wav_audio
    
    return audio

def plot_spectrogram_and_picks(track:np.ndarray, sr:int, peaks:np.ndarray, onset_env:np.ndarray) -> None:
    """[summary]

    Args:
        track (np.ndarray): A track.
        sr (int): Aampling rate.
        peaks (np.ndarray): Indices of peaks in the track.
        onset_env (np.ndarray): Vector containing the onset strength envelope.
    """
    times = librosa.frames_to_time(np.arange(len(onset_env)),
                            sr=sr, hop_length=HOP_SIZE)

    plt.figure()
    ax = plt.subplot(2, 1, 2)
    D = librosa.stft(track)
    librosa.display.specshow(librosa.amplitude_to_db(np.abs(D), ref=np.max),
                            y_axis='log', x_axis='time')
    plt.subplot(2, 1, 1, sharex=ax)
    plt.plot(times, onset_env, alpha=0.8, label='Onset strength')
    plt.vlines(times[peaks], 0,
            onset_env.max(), color='r', alpha=0.8,
            label='Selected peaks')
    plt.legend(frameon=True, framealpha=0.8)
    plt.axis('tight')
    plt.tight_layout()
    plt.show()

def load_audio_picks(audio, duration, hop_size):
    """[summary]

    Args:
        audio (string, int, pathlib.Path or file-like object): [description]
        duration (int): [description]
        hop_size (int): 

    Returns:
        tuple: Returns the audio time series (track) and sampling rate (sr), a vector containing the onset strength envelope
        (onset_env), and the indices of peaks in track (peaks).
    """
    try:
        track, sr = librosa.load(audio, duration=duration)
        onset_env = librosa.onset.onset_strength(track, sr=sr, hop_length=hop_size)
        peaks = librosa.util.peak_pick(onset_env, 10, 10, 10, 10, 0.5, 0.5)
    except Error as e:
        print('An error occurred processing ', str(audio))
        print(e)

    return track, sr, onset_env, peaks

In [3]:
N_TRACKS = 1413
HOP_SIZE = 512
DURATION = 5 # TODO: to be tuned!
THRESHOLD = 0 # TODO: to be tuned!

In [4]:
data_folder = Path("data/mp3s-32k/")
mp3_tracks = data_folder.glob("*/*/*.mp3")
tracks = data_folder.glob("*/*/*.wav")

In [5]:
def max_length(tracks,N_TRACKS,DURATION):
    len_peaks = []
    all_peaks = []
    
    for idx, audio in enumerate(tracks):
        if idx >= N_TRACKS:
            break
        track, sr, onset_env, peaks = load_audio_picks(audio, DURATION, HOP_SIZE)
        all_peaks.append(peaks.tolist())
        len_peaks.append(max(peaks))
        
    return max(len_peaks), all_peaks

In [6]:
def peaksToDataframe(DURATION):
    tracks = data_folder.glob("*/*/*.wav")
    MAX, peaks = max_length(tracks,N_TRACKS, DURATION)
    mat = np.zeros((N_TRACKS, MAX), dtype=int)
    
    for idx in range(N_TRACKS):
        for j in range(MAX):
            if(j in peaks[idx]):
                mat[idx][j] = 1    
    return(pd.DataFrame(mat))

In [None]:
from datetime import datetime

for i in range(10,160,10):
    print(i)
    now = datetime.now()

    current_time = now.strftime("%H:%M:%S")
    print("Current Time =", current_time)
    
    df = peaksToDataframe(i)
    df.to_csv(str(i)+"-duration.csv")

10
Current Time = 11:16:14
20
Current Time = 11:19:39
30
Current Time = 11:26:24
40
Current Time = 11:36:33
50
Current Time = 11:49:30
60
Current Time = 12:05:40
70
Current Time = 12:24:54
80
Current Time = 12:47:13
90
Current Time = 13:13:08
100
Current Time = 13:42:03
110
Current Time = 14:13:37
120
Current Time = 14:48:25
130
Current Time = 15:26:09
140
Current Time = 16:07:14
150
Current Time = 16:51:09
