In [None]:
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import soundfile as sf

def feature_extraction(file_name):
    X , sample_rate = librosa.load(file_name, sr=None) 
    if X.ndim > 1:
        X = X[:,0]
    X = X.T
    
    stft = np.abs(librosa.stft(X))
            
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=20).T, axis=0) 
    rmse = np.mean(librosa.feature.rmse(y=X).T, axis=0) 
    spectral_flux = np.mean(librosa.onset.onset_strength(y=X, sr=sample_rate).T, axis=0) 
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=X).T, axis=0) #Returns 1 value
    
    #mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0) #Returns 128 values
    #chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0) #Returns 12 values
    #contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0) #Returns 7 values
    #tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T, axis=0) #tonal centroid features Returns 6 values
    
    return mfccs, rmse, spectral_flux, zcr
    
def parse_audio_files(parent_dir, sub_dirs, file_ext='*.mp3'): 

    n_mfccs = 20 
    number_of_features = 3 + n_mfccs
    #number_of_features = 154 + n_mfccs # 154 are the total values returned by rest of computed features
    features, labels = np.empty((0,number_of_features)), np.empty(0)
    
    ##Extract features for each audio file
    for label, sub_dir in enumerate(sub_dirs): ##The enumerate() function adds a counter to an iterable.
        for file_name in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)): ##parent is data, sub_dirs are the classes
            print "Actual File Name: ", file_name
            try:
                mfccs, rmse, spectral_flux, zcr = feature_extraction(file_name)
                #mfccs, zcr, mel, chroma, contrast, tonnetz = feature_extraction(file_name)
            except Exception as e:
                print("[Error] there was an error in feature extraction. %s" % (e))
                continue
             
            extracted_features = np.hstack([mfccs, rmse, spectral_flux, zcr])
            #extracted_features = np.hstack([mfccs, zcr, mel, chroma, contrast, tonnetz]) #Stack arrays in sequence horizontally (column wise)
            #print "Total Extracted Features: ", len(extracted_features) #This helps us identify really how many features are being computed
            features = np.vstack([features, extracted_features]) #Stack arrays in sequence vertically (row wise).
            labels = np.append(labels, label)
        print("Extrac

In [1]:

import os
audio_subdirectories = os.listdir("audio-data/") 
audio_subdirectories.sort()
print 'Audio Subdirs: ', audio_subdirectories

OSError: [Errno 2] No such file or directory: 'audio-data/'

In [None]:

features, labels = parse_audio_files('audio-data', audio_subdirectories) 
np.save('feat.npy', features) 
np.save('label.npy', labels)