# Imports

In [1]:
import numpy as np
import librosa

# Functions

In [None]:
def mfsc(y, sfr, window_size=0.025, window_stride=0.010, window='hamming', n_mels=80, preemCoef=0.97):
    
    win_length = int(sfr * window_size)
    hop_length = int(sfr * window_stride)
    n_fft = 512
    lowfreq = 0
    highfreq = sfr/2

    # melspectrogram
    y *= 32768
    y[1:] = y[1:] - preemCoef*y[:-1]
    y[0] *= (1 - preemCoef)
    S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window, center=False)
    D = np.abs(S)
    param = librosa.feature.melspectrogram(S=D, sr=sfr, n_mels=n_mels, fmin=lowfreq, fmax=highfreq, norm=None)
    mf = np.log(np.maximum(1, param))
    
    return mf

In [None]:
def normalize(features):
    return features-np.mean(features, axis=0)

In [None]:
def extractFeatures(audioPath):

    y, sfreq = sf.read(audioPath)
    features = mfsc(y, sfreq)    
    return normalize(np.transpose(features))

In [None]:
def main(params):

    with open(params.audioFilesList,'r') as  filesFile:
        for featureFile in filesFile:
            print(featureFile[:-1])
            y, sfreq = sf.read('{}'.format(featureFile[:-1])) 
            mf = mfsc(y, sfreq)
            with open('{}.pickle'.format(featureFile[:-5]), 'wb') as handle:
                pickle.dump(mf,handle)

# 1 - Input processing

In [2]:
from IPython.display import Audio
Audio('audiosPath/discovery.wav')

#### Load with librosa

In [43]:
samples, sampling_rate = librosa.load(
    path = 'audiosPath/discovery.wav',
)

#### Pre emphasis filtering

In [44]:
preem_coef = 0.97
samples *= 32768 # no idea why!

samples[1:] = samples[1:] - preem_coef * samples[:-1]
samples[0] *= (1 - preem_coef)

#### STFT

In [46]:
# Each frame of audio is windowed by window of length win_length and then padded with zeros to match n_fft.

n_fft = 512 # corresponds to 23 miliseconds
window_stride = 0.010
window_size = 0.023
window = 'hamming'

hop_length = int(sampling_rate * window_stride)
win_length = int(sampling_rate * window_size)

S = librosa.stft(
    y = samples, 
    n_fft = n_fft, 
    hop_length = hop_length, 
    win_length = win_length, 
    window = window, 
    center = False
)

In [48]:
n_mels = 80
lowfreq = 0
highfreq = sampling_rate / 2

mel_spectrogram = librosa.feature.melspectrogram(
    S = samples, 
    sr = sampling_rate, 
    n_mels = n_mels, 
    fmin = lowfreq, 
    fmax = highfreq, 
    norm = None,
)

IndexError: tuple index out of range

In [None]:
plt.figure(figsize=(25, 10))

samples, sampling_rate = librosa.load(mecanico_path)

mel_spectrogram = librosa.feature.melspectrogram(
    samples, 
    sr = sampling_rate, 
    n_fft = 512, 
    hop_length = 512 // 4, 
    n_mels = 80,
    )

librosa.display.specshow(
    librosa.power_to_db(mel_spectrogram), 
    x_axis = "time",
    y_axis = "mel", 
    sr = sampling_rate,
    )

plt.colorbar(format="%+2.f dB")
plt.show()

In [None]:
param = 
    mf = np.log(np.maximum(1, param))

In [None]:

S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window, center=False)

In [None]:


y *= 32768

In [None]:
a = 1

In [None]:
a *= 5

In [None]:
a

In [None]:

print(len(y))
print(sr)

In [None]:
(1827063 / 22050) / 60

In [None]:
from librosa import display
display.waveshow(y, sr=sr)

In [None]:
# Applying the Fourier Transform
spectrogram_data = librosa.stft(y)
print(spectrogram_data.shape)

# convert to decibels
decibels = librosa.amplitude_to_db(abs(spectrogram_data))

# Create the spectrogram
librosa.display.specshow(decibels)

In [None]:
melspect = librosa.feature.melspectrogram(y)

In [None]:
melspect.shape

# 2 - Feature extractor

In [None]:
with open("files.lst",'r') as filesFile:
    for featureFile in filesFile:
        print(featureFile)
        y, sfreq = sf.read(f'{featureFile}')
        #mf = mfsc(y, sfreq)
        #with open('{}.pickle'.format(featureFile[:-5]), 'wb') as handle:
        #    pickle.dump(mf,handle)

In [None]:
y, sfreq

In [None]:
sfr = 48000

window_size=0.025
window_stride=0.010
window='hamming'
n_mels=80
preemCoef=0.97

win_length = int(sfr * window_size)
hop_length = int(sfr * window_stride)
n_fft = 512
lowfreq = 0
highfreq = sfr/2

print(len(y))
print(np.mean(y))
print(np.std(y))
print(np.median(y))

# melspectrogram
y *= 32768

print("-"*50)

print(len(y))
print(np.mean(y))
print(np.std(y))
print(np.median(y))

y[1:] = y[1:] - preemCoef*y[:-1]
y[0] *= (1 - preemCoef)

print("-"*50)

print(len(y))
print(np.mean(y))
print(np.std(y))
print(np.median(y))

In [None]:
n_fft

In [None]:
hop_length

In [None]:
win_length

In [None]:
S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window=window, center=False)

In [None]:
D = np.abs(S)
param = librosa.feature.melspectrogram(S=D, sr=sfr, n_mels=n_mels, fmin=lowfreq, fmax=highfreq, norm=None)
mf = np.log(np.maximum(1, param))

In [None]:
mf.shape