In [1]:
from librosa import load
from librosa.onset import onset_strength
from librosa.feature import rhythm, spectral_centroid, spectral_rolloff, zero_crossing_rate, mfcc, chroma_stft, spectral_contrast
import soundfile as sf
import numpy as np

In [2]:
class AudioFeatureExtractor:
    """
    A class for extracting audio features from a given audio file.
    """

    def __init__(self, file_path: str):
        """
        Initialize the AudioFeatureExtractor with the audio file path.

        Args:
            file_path (str): Path to the audio file.
        """
        self.file_path = file_path
        self.audio_data, self.sample_rate = self._load_audio()

    def _load_audio(self):
        """
        Load the audio file and fetch its sample rate.

        Returns:
            tuple: Audio time series data and sample rate.
        """
        try:
            info = sf.info(self.file_path)
            audio, sr = load(self.file_path, sr=info.samplerate, mono=True)  # Use sample rate from sf.info
            return audio, sr
        except Exception as e:
            raise RuntimeError(f"Error loading audio file {self.file_path}: {e}")

    def extract_features(self):
        """
        Extract audio features and handle errors gracefully.

        Returns:
            dict: Extracted audio features.
        """
        feature_extractors = {
            "tempo": self._extract_tempo,
            "energy": self._extract_energy,
            "spectral_centroid": self._extract_spectral_centroid,
            "spectral_rolloff": self._extract_spectral_rolloff,
            "zero_crossing_rate": self._extract_zero_crossing_rate,
            "mfcc": self._extract_mfcc,
            "chroma": self._extract_chroma,
            "spectral_contrast": self._extract_spectral_contrast
        }

        extracted_features = {}
        for feature_name, extractor in feature_extractors.items():
            try:
                extracted_features[feature_name] = extractor()
            except Exception as e:
                extracted_features[feature_name] = None
                print(f"Error calculating {feature_name}: {e}")

        return extracted_features

    def _extract_tempo(self):
        """
        Calculate the tempo of the audio.

        Returns:
            float: Tempo in beats per minute.
        """
        onset_env = onset_strength(y=self.audio_data, sr=self.sample_rate)
        return rhythm.tempo(onset_envelope=onset_env, sr=self.sample_rate)[0]

    def _extract_energy(self):
        """
        Calculate the energy of the audio.

        Returns:
            float: Energy of the audio signal.
        """
        return np.sum(np.square(self.audio_data)) / len(self.audio_data)

    def _extract_spectral_centroid(self):
        """
        Calculate the spectral centroid of the audio.

        Returns:
            float: Spectral centroid.
        """
        return np.mean(spectral_centroid(y=self.audio_data, sr=self.sample_rate))

    def _extract_spectral_rolloff(self):
        """
        Calculate the spectral rolloff of the audio.

        Returns:
            float: Spectral rolloff.
        """
        return np.mean(spectral_rolloff(y=self.audio_data, sr=self.sample_rate))

    def _extract_zero_crossing_rate(self):
        """
        Calculate the zero crossing rate of the audio.

        Returns:
            float: Zero crossing rate.
        """
        return np.mean(zero_crossing_rate(self.audio_data))
        
    def _extract_mfcc(self):
        """
        Extract MFCC features.
        
        Returns:
        np.ndarray: Mean MFCC values.
        """
        return np.mean(mfcc(y=self.audio_data, sr=self.sample_rate, n_mfcc=13), axis=1)
        
    def _extract_chroma(self):
        """
        Extract chroma features.
        
        Returns:
        np.ndarray: Mean chroma features.
        """
        return np.mean(chroma_stft(y=self.audio_data, sr=self.sample_rate), axis=1)
        
    def _extract_spectral_contrast(self):
        """
        Extract spectral contrast.
        
        Returns:
        np.ndarray: Mean spectral contrast.
        """
        return np.mean(spectral_contrast(y=self.audio_data, sr=self.sample_rate), axis=1)

In [3]:
sample_file = "../data/33796__yewbic__ambience03.wav"
feature_extractor = AudioFeatureExtractor(file_path=sample_file)
features = feature_extractor.extract_features()
print("Extracted Features:", features)

Extracted Features: {'tempo': 120.18531976744185, 'energy': 0.011575209602339926, 'spectral_centroid': 927.438716318569, 'spectral_rolloff': 1756.2101354921497, 'zero_crossing_rate': 0.01730436774523618, 'mfcc': array([-331.53696   ,  163.79898   ,    6.9345274 ,   -1.3852935 ,
         26.354372  ,   23.592123  ,   25.786642  ,   17.631735  ,
          0.5665428 ,    0.81963265,    8.260662  ,    7.390131  ,
          3.0685208 ], dtype=float32), 'chroma': array([0.6596594 , 0.63806134, 0.6315188 , 0.774504  , 0.69726646,
       0.62487686, 0.65954196, 0.6829385 , 0.76489925, 0.6294534 ,
       0.59039015, 0.6239128 ], dtype=float32), 'spectral_contrast': array([17.87988988,  9.63966931, 13.82793608, 15.65922779, 20.68371899,
       20.73833454, 22.54783081])}
