In [9]:
import librosa
import numpy as np
import os

In [12]:
def extract_features(file_path, segment_duration=1, sr=22050):
    y, sr = librosa.load(file_path, sr=sr)
    total_duration = librosa.get_duration(y=y, sr=sr)
    features_list = []

    segment_length = int(segment_duration * sr)
    num_segments = int(np.floor(len(y) / segment_length))

    for i in range(num_segments):
        start = i * segment_length
        end = start + segment_length
        segment = y[start:end]

        # Duration
        duration = librosa.get_duration(y=segment, sr=sr)

        # Pitch (fundamental frequency)
        f0, _, _ = librosa.pyin(segment, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
        pitch_mean = np.nanmean(f0)
        pitch_std = np.nanstd(f0)

        # Zero Crossing Rate
        zcr = np.mean(librosa.feature.zero_crossing_rate(segment))

        # Spectral Centroid
        centroid = np.mean(librosa.feature.spectral_centroid(y=segment, sr=sr))

        # Spectral Bandwidth
        bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=segment, sr=sr))

        # RMS Energy
        rms = np.mean(librosa.feature.rms(y=segment))

        features_list.append({
            'Segment': i,
            'Duration': duration,
            'PitchMean': pitch_mean,
            'PitchStd': pitch_std,
            'ZCR': zcr,
            'Centroid': centroid,
            'Bandwidth': bandwidth,
            'RMS': rms
        })

    return features_list

In [13]:
file_path = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset\noise-audio-data\5-261464-A-23.wav"
features = extract_features(file_path)

# Print the features
for f in features:
    print(f)

  pitch_mean = np.nanmean(f0)


{'Segment': 0, 'Duration': 1.0, 'PitchMean': 67.2613189481876, 'PitchStd': 0.6529328417544467, 'ZCR': 0.15897993607954544, 'Centroid': 2327.2129216251374, 'Bandwidth': 1997.4995643922753, 'RMS': 0.009056073}
{'Segment': 1, 'Duration': 1.0, 'PitchMean': nan, 'PitchStd': nan, 'ZCR': 0.12775213068181818, 'Centroid': 1923.4714578839362, 'Bandwidth': 1838.200250021021, 'RMS': 0.003953147}
{'Segment': 2, 'Duration': 1.0, 'PitchMean': 1086.2206357745313, 'PitchStd': 7.890738408182242, 'ZCR': 0.15965687144886365, 'Centroid': 2175.959883100713, 'Bandwidth': 1693.925503630125, 'RMS': 0.00430643}
{'Segment': 3, 'Duration': 1.0, 'PitchMean': nan, 'PitchStd': nan, 'ZCR': 0.20591042258522727, 'Centroid': 2466.976346745169, 'Bandwidth': 1489.4747259532867, 'RMS': 0.004409332}
{'Segment': 4, 'Duration': 1.0, 'PitchMean': 1773.3095132908802, 'PitchStd': 81.96462407607333, 'ZCR': 0.12883966619318182, 'Centroid': 2016.7729815780424, 'Bandwidth': 1759.4623428271886, 'RMS': 0.001653415}
