In [52]:
import librosa
import os
import pandas
import soundfile
import numpy as np
import pandas as pd

In [78]:
# pass this function a file and it will return a list of features.
def get_features(file,**features):
    with soundfile.SoundFile(file) as sound_file:
        result = np.array([])
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        stft = np.abs(librosa.stft(X))
        
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
        
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, chroma))
        
        mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
        result = np.hstack((result, mel))
        
    return result

In [79]:
get_features("data_16kHz\\Actor_01\\03-01-01-01-01-01-01.wav")

  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)


array([-6.93497009e+02,  5.00643921e+01,  5.71450472e-01,  1.43299656e+01,
        3.33637071e+00, -2.54071975e+00, -4.05790901e+00, -1.07119989e+01,
       -7.29413891e+00,  1.74018884e+00, -4.19064283e+00,  1.95466173e+00,
       -5.24789429e+00,  2.78143024e+00, -3.16756773e+00, -3.40008307e+00,
       -2.37803173e+00, -5.68717599e-01, -6.47753334e+00, -1.24320543e+00,
       -2.80542517e+00, -5.43635845e+00, -4.46875274e-01, -3.63516593e+00,
       -2.98372602e+00, -5.63902617e-01, -1.65101945e+00, -5.55944860e-01,
       -3.41018438e+00, -2.24465466e+00, -3.13058877e+00, -2.70089960e+00,
       -1.88821328e+00, -5.54154515e-01, -3.96459866e+00, -2.13485193e+00,
       -3.94577074e+00, -1.62457871e+00, -2.03990722e+00, -3.62910867e+00,
        6.33734643e-01,  6.48760676e-01,  6.21744275e-01,  6.34553194e-01,
        6.60345674e-01,  6.60463989e-01,  7.00948954e-01,  7.31556296e-01,
        7.46921480e-01,  7.23420501e-01,  7.16119647e-01,  6.30602837e-01,
        6.52830522e-07,  

In [62]:
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result
extract_feature("data_16kHz\\Actor_01\\03-01-01-01-01-01-01.wav", mfcc=True, chroma=True, mel=True)

  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)


array([-6.93497009e+02,  5.00643921e+01,  5.71450472e-01,  1.43299656e+01,
        3.33637071e+00, -2.54071975e+00, -4.05790901e+00, -1.07119989e+01,
       -7.29413891e+00,  1.74018884e+00, -4.19064283e+00,  1.95466173e+00,
       -5.24789429e+00,  2.78143024e+00, -3.16756773e+00, -3.40008307e+00,
       -2.37803173e+00, -5.68717599e-01, -6.47753334e+00, -1.24320543e+00,
       -2.80542517e+00, -5.43635845e+00, -4.46875274e-01, -3.63516593e+00,
       -2.98372602e+00, -5.63902617e-01, -1.65101945e+00, -5.55944860e-01,
       -3.41018438e+00, -2.24465466e+00, -3.13058877e+00, -2.70089960e+00,
       -1.88821328e+00, -5.54154515e-01, -3.96459866e+00, -2.13485193e+00,
       -3.94577074e+00, -1.62457871e+00, -2.03990722e+00, -3.62910867e+00,
        6.33734643e-01,  6.48760676e-01,  6.21744275e-01,  6.34553194e-01,
        6.60345674e-01,  6.60463989e-01,  7.00948954e-01,  7.31556296e-01,
        7.46921480e-01,  7.23420501e-01,  7.16119647e-01,  6.30602837e-01,
        6.52830522e-07,  