# Experiment Here!

In [1]:
import os
import pickle

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

import madmom
import librosa
import mir_eval

In [2]:
FPS = 100

In [3]:
from madmom.utils import search_files, match_file

AUDIO_FILES = search_files('data/train', '.flac')

def find_audio_files(ann_files, audio_files, ann_suffix=None, audio_suffix='.wav'):
    """
    Find matching audio files.
    
    Parameters
    ----------
    ann_files : list
        List with annotation file names.
    audio_files : list
        List with audio file names to be matched
    ann_suffix : str, optional
        Suffix of the annotation files. If 'None'
        the suffix is inferred from the annotation
        files.
    audio_suffix : str, optional
        Suffix of the audio files.
    
    Returns
    -------
    matched_files : list
        List of matched audio file (names).
    matched_indices : list
        List of matching indices in `audio_files`.
        
    """
    matched_files = []
    matched_indices = []
    for i, ann_file in enumerate(ann_files):
        if ann_suffix is None:
            ann_suffix = os.path.splitext(ann_file)[1]
        matches = match_file(ann_file, audio_files,
                             ann_suffix, audio_suffix)
        if len(matches) == 1:
            matched_files.append(matches[0])
            matched_indices.append(i)
        else:
            continue
    return matched_files, matched_indices

In [4]:
len(AUDIO_FILES)

321

In [5]:
from scripts import utilities

In [6]:
utilities.main()

main()
sys.version_info(major=3, minor=7, micro=1, releaselevel='final', serial=0)


# Pre-Processing

In [7]:
import math
import numpy as np

def pre_process(filename, frame_size=2048, frame_rate=FPS, num_bands=40, **kwargs):
    """
    Pre-process the audio signal.

    Parameters
    ----------
    filename : str
        File to be processed.
    frame_size : int
        Size of the frames.
    frame_rate : float
        Frame rate used for the STFT.
    num_bands : int
        Number of frequency bands for the Mel filterbank.
    kwargs : dict, optional
        Additional keyword arguments.

    Returns
    -------
    spectrogram : numpy array
        Spectrogram.

    """
    ######## COMPUTE FRAMES ########
    
    sr = 44100 # samping rate    
    hop_size = int(sr / frame_rate) # hop size depends on sampling rate and frame rate
    signal, sr = librosa.load(filename, sr=sr) # read file
    
    # number of frames without remainder frame
    full_frames = int(len(signal) / hop_size)
    
    # compute 0 padded signal
    front_padded_signal = np.concatenate((np.zeros(int(frame_size/2)), signal))
    end_zeros = full_frames * hop_size + frame_size - len(front_padded_signal)
    padded_signal = np.concatenate((front_padded_signal, np.zeros(end_zeros)))
    
    # compute frames
    frames = []
    for i in range(full_frames+1):
        index = i*hop_size
        frame = padded_signal[index : index+2048]
        frames.append(frame)
        
    
    # madmom to double check
    frames_madmom = madmom.audio.signal.FramedSignal(signal)
    
    """
    print("signal length:", len(signal))
    print("end zeros:", end_zeros)
    print("padded signal length:",len(padded_signal))
    
    print("")
    print(frames_madmom[10])
    print(frames[10])
    """
    
    spectrogram = None
    return spectrogram

In [8]:

texasName = AUDIO_FILES[19] #AUDIO_FILES[19]

pre_process(texasName, 2048, 100, 40)

