In [1]:
import librosa
import numpy as np
import pandas as pd


In [91]:
class FeatureExtractor:
    
    
    @staticmethod
    def extract(file_path, *args, save_as_npy = False, file_name = "feature_extraction_output_file" ):
        
        """ 
        Ses dosyasının özniteliklerini döndürür.

        Parameters:

        file_path (string) : dosyanın yolu 
        save_as_npy (string)  : npy dosyası olarak cıktıları kaydeder. default -> false

        *args (list) : [mfcc,chroma,zcr,mel,contrast,tonnetz]

        mfccs (numpy.array) : mel Mel frekans ölçeği, insan kulağının ses frekanslarındaki değişimi algılayışını gösteren bir ölçektir.
        chroma (numpy.array) : Spektrum müzikal oktavının 12 farklı yarı tonunu(chroma) temsil eden 12 parçanın belirtildiği ses için güçlü bir sunumudur.
        mel (numpy.array) : mel spektogram verisi
        contrast (numpy.array) : 
        tonnetz (numpy.array) : 
        """
        
        data,sample_rate = librosa.load(file_path)
        data = ( data[:,0] if data.ndim > 1 else data.T)
        
         # Get features         
        
        
        stft = np.abs(librosa.stft(data))
        if "mfcc" in args : mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40).T, axis=0)   # 40 values
        if "chroma" in args : chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        if "mel" in args : mel = np.mean(librosa.feature.melspectrogram(data, sr=sample_rate).T, axis=0)
        if "contrast" in args : contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
        if "tonnetz" in args: tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(data), sr=sample_rate).T, # tonal centroid features
                      axis=0)
        
        #öznitelik dizimizin uzunlugunu hesaplayalım
        
        lenght = 0
        if 'mfcc' in locals() : lenght += len(mfcc)
        if 'chroma' in locals() : lenght += len(chroma)
        if 'mel' in locals() : lenght += len(mel)
        if 'contrast' in locals(): lenght += len(contrast)
        if 'tonnetz' in locals(): lenght += len(tonnetz)
        
        count_of_features = len(args)
        
        features, labels = np.empty((0, lenght)), args
        
        extracted_features = np.hstack(
                [mfcc, chroma, mel, contrast, tonnetz])
        
        
        
        
        

In [92]:
FeatureExtractor.extract("example_audio.ogg",'mfcc','chroma','mel','contrast','tonnetz')

[[-6.00969543e+02  4.73483706e+00 -8.54750538e+00 -4.23647547e+00
  -8.59365582e-01 -1.93358946e+00 -3.34930062e-01  8.65300119e-01
   7.81835794e-01  4.80183721e-01 -5.15032470e-01 -2.08400443e-01
  -8.97986650e-01  4.96130198e-01 -1.15539446e-01 -2.99996644e-01
  -1.14685035e+00 -6.07976280e-02 -1.57832488e-01  5.00616074e-01
  -7.08330631e-01 -5.35296738e-01  3.06388408e-01 -2.26073012e-01
  -4.37756985e-01 -1.01219369e-02 -3.23512197e-01  3.67189258e-01
  -4.05758113e-01 -3.19618672e-01  1.33676529e-02  5.64327776e-01
  -7.75211006e-02  1.28633022e-01 -4.78613108e-01 -1.15880594e-01
   1.65570572e-01 -3.86333540e-02 -1.07888699e-01  1.13799244e-01
   6.99929073e-02  7.85857067e-02  6.13361448e-02  5.01703210e-02
   4.94509786e-02  5.30075170e-02  5.05965985e-02  6.66939691e-02
   8.79254863e-02  6.76687732e-02  5.19975424e-02  6.14324138e-02
   1.84907026e-06  1.12652351e-05  1.77182446e-05  3.10571813e-05
   3.11413351e-05  7.10569366e-05  3.54685850e-04  2.35422063e-04
   1.81472

array([[1, 1, 1, 2],
       [3, 3, 3, 3],
       [4, 4, 4, 4]])