# Experiment Here!

In [1]:
import os
import pickle

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

import madmom
import librosa
import mir_eval

In [2]:
FPS = 100

In [3]:
from madmom.utils import search_files, match_file

AUDIO_FILES = search_files('data/train', '.flac')

def find_audio_files(ann_files, audio_files, ann_suffix=None, audio_suffix='.wav'):
    """
    Find matching audio files.
    
    Parameters
    ----------
    ann_files : list
        List with annotation file names.
    audio_files : list
        List with audio file names to be matched
    ann_suffix : str, optional
        Suffix of the annotation files. If 'None'
        the suffix is inferred from the annotation
        files.
    audio_suffix : str, optional
        Suffix of the audio files.
    
    Returns
    -------
    matched_files : list
        List of matched audio file (names).
    matched_indices : list
        List of matching indices in `audio_files`.
        
    """
    matched_files = []
    matched_indices = []
    for i, ann_file in enumerate(ann_files):
        if ann_suffix is None:
            ann_suffix = os.path.splitext(ann_file)[1]
        matches = match_file(ann_file, audio_files,
                             ann_suffix, audio_suffix)
        if len(matches) == 1:
            matched_files.append(matches[0])
            matched_indices.append(i)
        else:
            continue
    return matched_files, matched_indices

In [4]:
len(AUDIO_FILES)

321

In [5]:
from scripts import utilities

In [6]:
utilities.main()

main()
sys.version_info(major=3, minor=7, micro=1, releaselevel='final', serial=0)


# Pre-Processing

In [295]:
import math
import numpy as np

def pre_process(filename, frame_size=2048, frame_rate=FPS, num_bands=40, **kwargs):
    """
    Pre-process the audio signal.

    Parameters
    ----------
    filename : str
        File to be processed.
    frame_size : int
        Size of the frames.
    frame_rate : float
        Frame rate used for the STFT.
    num_bands : int
        Number of frequency bands for the Mel filterbank.
    kwargs : dict, optional
        Additional keyword arguments.

    Returns
    -------
    spectrogram : numpy array
        Spectrogram.

    """
    
    
    signal, sr = librosa.load(filename, sr=44100)
    
    print("signal length:", len(signal))
    
    hop_size = 441
    
    framesNeeded = math.trunc(len(signal) / hop_size)
    
    frontZeroPaddedSignal = np.concatenate((np.zeros(1024), signal))
    endZeros = 2048 - (len(frontZeroPaddedSignal) - framesNeeded * hop_size)
    print("end zeros:", endZeros)
    
    paddedSignal = np.concatenate((frontZeroPaddedSignal, np.zeros(endZeros)))
    print("padded signal length:",len(paddedSignal))
    
    framesCustom = []
    for i in range(framesNeeded+1):
        index = i*hop_size
        frame = paddedSignal[index : index+2048]
        framesCustom.append(frame)
        
    
    frames = madmom.audio.signal.FramedSignal(signal)
    
    """
    samples = []
    for i, sample in enumerate(frames[986]):
        samples.append(sample)
        
    samples.reverse()
    
    zs = 0
    foundNon0 = False;
    for j, sample in enumerate(samples):
        #print(sample)
        if(sample == 0 and not foundNon0):
            zs = zs+1
        else:
            foundNon0 = True
    
    print(zs)
    
    print(frames[985][1712])
    """
    
    print("")
    print(frames[10])
    print(framesCustom[10])
    
    spectrogram = None
    return spectrogram

In [297]:

texasName = AUDIO_FILES[19] #AUDIO_FILES[19]
texasName

pre_process(texasName, 2048, 100, 40)




signal length: 435074
end zeros: 776
padded signal length: 436874

[0.00112915 0.00119019 0.00125122 ... 0.00018311 0.00024414 0.00033569]
[0.00112915 0.00119019 0.00125122 ... 0.00018311 0.00024414 0.00033569]


In [298]:
a = [1,2,3,4,5]
a[2:4]

[3, 4]