In [22]:
import numpy as np
import librosa, librosa.display #librosa is a python package for music and audio analysis. It provides the building blocks necessary to create music information retrieval systems.
import matplotlib.pyplot as plt
class Preprocessing:
    def __init__(self):
        self.features = {}
        
    def insert_(self,key,value):
        self.features[key] = value
        
    def extract_features(self,file=""):
        signal, sample_rate = librosa.load(file, sr=22050)#plot signal
        self.insert_("signal",signal)
        self.insert_("sample rate",sample_rate)
        # FFT -> power spectrum
        # perform Fourier transform
        fft = np.fft.fft(signal)
        # calculate abs values on complex numbers to get magnitude
        spectrum = np.abs(fft)
        # create frequency variable
        f = np.linspace(0, sample_rate, len(spectrum))
        # take half of the spectrum and frequency
        left_spectrum = spectrum[:int(len(spectrum)/2)]#plot_fft
        left_f = f[:int(len(spectrum)/2)]#plot_fft
        self.insert_("FFT",spectrum)
        # STFT -> spectrogram
        hop_length = 512 # in num. of samples
        frame_length = 2*hop_length
        n_fft = 2048 # window in num. of samples
        # calculate duration hop length and window in seconds
        hop_length_duration = float(hop_length)/sample_rate
        n_fft_duration = float(n_fft)/sample_rate
        print("STFT hop length duration is: {}s".format(hop_length_duration))
        print("STFT window duration is: {}s".format(n_fft_duration))
        # perform stft
        stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)
        # calculate abs values on complex numbers to get magnitude
        spectrogram = np.abs(stft)#plot stft
        self.insert_("STFT",spectrogram)
        log_spectrogram = librosa.amplitude_to_db(spectrogram)
        self.insert_("STFT in Decibels",log_spectrogram)
        #ZCR
        n1=0
        n2=len(signal)
        #ZC Calculation
        zero_crossings = librosa.zero_crossings(signal[n1:n2], pad=False)
        self.insert_("Zero Crossings",sum(zero_crossings))
        #ZCR Calculation 
        zcrs = librosa.feature.zero_crossing_rate(signal[n1:n2])
        self.insert_("Zero Crossing Rate",zcrs[0])
        # MFCCs
        # extract 13 MFCCs
        MFCCs = librosa.feature.mfcc(signal, sample_rate, n_fft=n_fft, hop_length=hop_length, n_mfcc=13)
        self.insert_("13 MFCCs",MFCCs)
        #Calculating Energy
        energy = np.array([sum(abs(signal[i:i+frame_length]**2)) for i in range(0, len(signal), hop_length)])
        self.insert_("Energy",energy)
        #Calculating RMSE
        rmse = librosa.feature.rms(signal, frame_length=frame_length, hop_length=hop_length, center=True)
        self.insert_("RMSE",rmse[0])
        return self.get_features()
        
    def get_features(self):
        return self.features
    
        
        
        
        
    

In [23]:
prp = Preprocessing()
d = prp.extract_features("audio\\blues.00000.wav")
for k,v in d.items():
    try:
        print(k,len(d[k]))
    except:
        print(k,type(d[k]))

STFT hop length duration is: 0.023219954648526078s
STFT window duration is: 0.09287981859410431s
signal 661794
sample rate <class 'int'>
FFT 661794
STFT 1025
STFT in Decibels 1025
Zero Crossings <class 'numpy.int32'>
Zero Crossing Rate 1293
13 MFCCs 13
Energy 1293
RMSE 1293
