In [2]:
import librosa
import numpy as np
import os
import random
import pandas as pd

In [3]:
def extract_simple_features(file_path, segment_duration=1, target_sr=48000):
    """Extract simplified audio features from a file.
    
    Args:
        file_path (str): Path to audio file
        segment_duration (float): Duration of each segment in seconds (default: 1s)
        target_sr (int): Target sample rate (default: 48000)
        
    Returns:
        tuple: (filename, mean_pitch, mean_centroid, mean_energy)
    """
    try:
        y, original_sr = librosa.load(file_path, sr=target_sr, res_type='kaiser_best')
        
        segment_length = int(segment_duration * target_sr)
        num_segments = int(np.floor(len(y) / segment_length))
        
        pitch_values = []
        centroid_values = []
        energy_values = []

        for i in range(num_segments):
            start = i * segment_length
            end = start + segment_length
            segment = y[start:end]
            
            if len(segment) < segment_length // 2:
                continue
                
            # 1. Pitch Tracking (PYIN)
            f0, _, _ = librosa.pyin(
                segment,
                fmin=librosa.note_to_hz('C2'),
                fmax=librosa.note_to_hz('C7'),
                sr=target_sr,
                frame_length=min(2048, segment_length)
            )
            pitch_mean = np.nanmean(f0) if np.any(~np.isnan(f0)) else 0
            
            # 2. Spectral Centroid
            centroid = librosa.feature.spectral_centroid(
                y=segment, 
                sr=target_sr,
                n_fft=min(2048, segment_length)
            )[0]
            centroid_mean = np.mean(centroid)
            
            # 3. RMS Energy
            rms = librosa.feature.rms(
                y=segment,
                frame_length=segment_length,
                hop_length=segment_length,
                center=False
            )[0]
            rms_mean = np.mean(rms)
            
            pitch_values.append(pitch_mean)
            centroid_values.append(centroid_mean)
            energy_values.append(rms_mean)
        
        # Calculate mean values across all segments
        min_pitch = min(pitch_values) if pitch_values else 0
        min_centroid = min(centroid_values) if centroid_values else 0
        min_energy = min(energy_values) if energy_values else 0
        max_pitch = max(pitch_values) if pitch_values else 0
        max_centroid = max(centroid_values) if centroid_values else 0
        max_energy = max(energy_values) if energy_values else 0
        
        return os.path.basename(file_path), min_pitch, min_centroid, min_energy, max_pitch, max_centroid, max_energy
    
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return os.path.basename(file_path), 0, 0, 0

In [4]:
dataset_dir = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset\noise-audio-data"
all_files = [f for f in os.listdir(dataset_dir) if f.endswith(('.wav', '.mp3', '.flac'))]

selected_files = random.sample(all_files, min(50, len(all_files)))

In [6]:
results = []
for filename in selected_files:
    file_path = os.path.join(dataset_dir, filename)
    features = extract_simple_features(file_path)
    results.append(features)

df_result = pd.DataFrame(results, columns=['Filename', 'Min Pitch (Hz)', 'Min Centroid (Hz)', 'Min Energy', 'Max Pitch (Hz)', 'Max Centroid (Hz)', 'Max Energy'])

In [7]:
df_result

Unnamed: 0,Filename,Min Pitch (Hz),Min Centroid (Hz),Min Energy,Max Pitch (Hz),Max Centroid (Hz),Max Energy
0,3-100018-A-18.wav,0.0,3056.056688,0.020367,0.0,4362.98671,0.161467
1,4-218199-H-35.wav,99.753667,4264.56118,0.021676,100.930573,4875.23191,0.02279
2,5-213836-C-9.wav,0.0,1791.53659,0.052999,1432.23493,2304.704043,0.089048
3,2-135728-A-38.wav,0.0,3022.8577,0.011393,0.0,4560.360899,0.020132
4,1-43807-A-47.wav,73.030666,576.128477,0.147461,76.566754,950.49604,0.214412
5,1-28135-B-11.wav,0.0,2428.52458,0.065269,0.0,3203.848967,0.127353
6,4-194680-A-36.wav,0.0,4247.470053,0.034707,0.0,4999.979269,0.047269
7,2-96654-A-47.wav,0.0,1678.656591,0.006007,0.0,3527.57739,0.03843
8,3-148330-A-21.wav,0.0,0.0,0.0,0.0,2777.131875,0.164672
9,5-219379-A-11.wav,0.0,2404.164874,0.026019,0.0,3284.843339,0.123519


In [8]:
df_result.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Min Pitch (Hz),50.0,97.274375,239.390661,0.0,0.0,0.0,49.105038,1235.084951
Min Centroid (Hz),50.0,2415.644663,1765.120647,0.0,1443.53636,2368.020566,3055.201367,8836.606349
Min Energy,50.0,0.037285,0.048939,0.0,0.004423,0.021021,0.045182,0.233508
Max Pitch (Hz),50.0,395.724335,569.53763,0.0,0.0,94.900217,588.311362,2007.286404
Max Centroid (Hz),50.0,3513.08496,1664.904345,517.675299,2495.631739,3261.975719,4434.232291,9357.971365
Max Energy,50.0,0.114569,0.109431,0.004934,0.031402,0.087442,0.159735,0.545093


In [11]:
dir_birdsound = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset\xeno-canto"
selected_files = os.listdir(dir_birdsound)
selected_files = [os.path.join(dir_birdsound, x) for x in selected_files]
results = []
for filename in selected_files:
    file_path = os.path.join(dataset_dir, filename)
    features = extract_simple_features(file_path)
    results.append(features)

df_result = pd.DataFrame(results, columns=['Filename', 'Min Pitch (Hz)', 'Min Centroid (Hz)', 'Min Energy', 'Max Pitch (Hz)', 'Max Centroid (Hz)', 'Max Energy'])

In [13]:
df_result.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Min Pitch (Hz),3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Min Centroid (Hz),3.0,4361.208336,1706.143408,2421.914369,3726.152101,5030.389834,5330.85532,5631.320806
Min Energy,3.0,0.008166,0.00123,0.007187,0.007475,0.007763,0.008655,0.009547
Max Pitch (Hz),3.0,1963.953595,145.853441,1801.787172,1903.728464,2005.669756,2045.036806,2084.403856
Max Centroid (Hz),3.0,7160.310362,1629.722354,6167.063775,6219.884046,6272.704317,7656.933656,9041.162995
Max Energy,3.0,0.104752,0.092392,0.015708,0.057046,0.098385,0.149274,0.200163


In [None]:
# Dataset Xeno-canto suara burung (3 sampel)
# min Pitch (Hz) : 0
# min Centroid (Hz) : 2421.914369
# min Energy : 0.007187
# max Pitch (Hz) : 2084.403856
# max Centroid (Hz) : 9041.162995
# max Energy : 0.200163

# Dataset Kaggle (50 sampel)
# min Pitch (Hz) : 0
# min Centroid (Hz) : 0
# min Energy : 0
# max Pitch (Hz) : 2007.286404
# max Centroid (Hz) : 9357.971365
# max Energy : 0.545093