In [90]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm

import librosa
from librosa import display as dp
from scipy.io import wavfile

In [91]:
root = '../../ml_course/saidkhan_venv/data/'
files = os.listdir(root)

In [92]:
print(f'{len(files)} FILES IN FOLDER')

3000 FILES IN FOLDER


In [None]:
def extract_spectrogram(y, sr=8000, n_fft=None) -> np.array:
    '''
    y = time series audio
    sr = sample rate (8000 by default)
    
    returns: np.array of spectrogram
    '''
    if n_fft:
        stft = librosa.stft(y, n_fft=n_fft)
    else:
        stft = librosa.stft(y)
    spectrogram = np.abs(stft)**2
    return spectrogram

def extract_melspectrogram(y, sr=8000, n_fft=2048, hop_length=512, win_length=None) -> np.array:
    '''
    y = time series audio
    sr = sample rate (8000 by default)
    TODO: define other parameters
    
    returns: np.array of melspectrogram
    '''
    melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    return melspectrogram

def extract_mfcc(y, sr=8000, n_mfcc=20):
    '''
    y = time series audio
    sr = sample rate (8000 by default)
    n_mfcc = numner of MFCC
    
    returns: np.array of mfcc
    '''
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    return mfcc

def extract_rms(y):
    
    rms = librosa.feature.rms(y=y) 
    return rms

def extract_poly_feutures(y):
    
    poly = librosa.feature.poly_features(y=y) 
    return poly

def extract_chroma_stft(y, sr = 8000, n_fft = 2048):
    
    chroma = librosa.feature.chroma_stft(y=y) 
    return chroma

In [None]:
def cut_if_necessary(y=y, size=8000):
    '''
    cuts audios with duration over size
    y = time series audio
    size = duration of audio that we need (i.g. if sr=8000Hz then 1sec=8000, 0.5=4000)
    returns: np.array of y
    '''
    if y.shape[0] > size:
        y = y[:size]

    return y

def pad_if_necessary(y=y, size=8000):
    '''
    pads audios with duration less than size with zeros
    y = time series audio
    size = duration of audio that we need (i.g. if sr=8000Hz then 1sec=8000, 0.5=4000)
    returns: np.array of y
    '''
    if y.shape[0] < size:
        diff = size - y.shape[0]
        zeros = np.zeros((diff))
        y = np.concatenate([y, zeros])

    return y

In [None]:
def save_numpy(root_path, fname, arr):
    dst_path = os.path.join(root_path, fname)
    np.save(dst_path, arr)

In [None]:
dst = 'features'
dst_spec = 'spectrograms'
dst_melspecs = 'melspectrograms'
dst_mfcc = 'mfcc'
dst_rms = 'rms'
dst_poly = 'poly feature'
dst_chroma = 'chroma stft'

In [None]:
os.makedirs(dst, exist_ok=True)
os.makedirs(os.path.join(dst, dst_melspecs), exist_ok=True)
os.makedirs(os.path.join(dst, dst_mfcc), exist_ok=True)
os.makedirs(os.path.join(dst, dst_spec), exist_ok=True)
os.makedirs(os.path.join(dst, dst_rms), exist_ok=True)
os.makedirs(os.path.join(dst, dst_poly), exist_ok=True)
os.makedirs(os.path.join(dst, dst_chroma), exist_ok=True)

In [None]:
for f in tqdm(files):
    fpath = os.path.join(root, f)
    fname = os.path.splitext(f)[0]
    
    y, sr = librosa.load(fpath, sr=None)
    
    y = cut_if_necessary(y)
    
    spec = extract_spectrogram(y)
    melpec = extract_melspectrogram(y)
    mfcc = extract_mfcc(y)
    rms = extract_rms(y)
    poly = extract_poly_feutures(y)
    chroma = extract_chroma_stft(y)
    
    save_numpy(os.path.join(dst, dst_spec), fname=fname, arr=spec)
    save_numpy(os.path.join(dst, dst_melspecs), fname=fname, arr=melpec)
    save_numpy(os.path.join(dst, dst_mfcc), fname=fname, arr=mfcc)
    save_numpy(os.path.join(dst, dst_rms), fname=fname, arr=contr)
    save_numpy(os.path.join(dst, dst_poly), fname=fname, arr=contr)
    save_numpy(os.path.join(dst, dst_chroma), fname=fname, arr=contr)