In [8]:
import librosa
import numpy as np
import os
import random
import pandas as pd

In [9]:
def extract_simple_features(file_path, segment_duration=0.1, target_sr=48000):
    """Extract simplified audio features from a file.
    
    Args:
        file_path (str): Path to audio file
        segment_duration (float): Duration of each segment in seconds (default: 1s)
        target_sr (int): Target sample rate (default: 48000)
        
    Returns:
        tuple: (filename, mean_pitch, mean_centroid, mean_energy)
    """
    try:
        y, original_sr = librosa.load(file_path, sr=target_sr, res_type='kaiser_best')
        
        segment_length = int(segment_duration * target_sr)
        num_segments = int(np.floor(len(y) / segment_length))
        
        pitch_values = []
        centroid_values = []
        energy_values = []

        for i in range(num_segments):
            start = i * segment_length
            end = start + segment_length
            segment = y[start:end]
            
            if len(segment) < segment_length // 2:
                continue
                
            # 1. Pitch Tracking (PYIN)
            f0, _, _ = librosa.pyin(
                segment,
                fmin=librosa.note_to_hz('C2'),
                fmax=librosa.note_to_hz('C7'),
                sr=target_sr,
                frame_length=min(2048, segment_length)
            )
            pitch_mean = np.nanmean(f0) if np.any(~np.isnan(f0)) else 0
            
            # 2. Spectral Centroid
            centroid = librosa.feature.spectral_centroid(
                y=segment, 
                sr=target_sr,
                n_fft=min(2048, segment_length)
            )[0]
            centroid_mean = np.mean(centroid)
            
            # 3. RMS Energy
            rms = librosa.feature.rms(
                y=segment,
                frame_length=segment_length,
                hop_length=segment_length,
                center=False
            )[0]
            rms_mean = np.mean(rms)
            
            pitch_values.append(pitch_mean)
            centroid_values.append(centroid_mean)
            energy_values.append(rms_mean)
        
        # Calculate mean values across all segments
        min_pitch = min(pitch_values) if pitch_values else 0
        min_centroid = min(centroid_values) if centroid_values else 0
        min_energy = min(energy_values) if energy_values else 0
        max_pitch = max(pitch_values) if pitch_values else 0
        max_centroid = max(centroid_values) if centroid_values else 0
        max_energy = max(energy_values) if energy_values else 0
        
        return os.path.basename(file_path), min_pitch, min_centroid, min_energy, max_pitch, max_centroid, max_energy
    
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return os.path.basename(file_path), 0, 0, 0

In [10]:
dataset_dir = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset\noise-audio-data"
all_files = [f for f in os.listdir(dataset_dir) if f.endswith(('.wav', '.mp3', '.flac'))]

selected_files = random.sample(all_files, min(50, len(all_files)))

In [11]:
results = []
for filename in selected_files:
    file_path = os.path.join(dataset_dir, filename)
    features = extract_simple_features(file_path)
    results.append(features)

df_result = pd.DataFrame(results, columns=['Filename', 'Min Pitch (Hz)', 'Min Centroid (Hz)', 'Min Energy', 'Max Pitch (Hz)', 'Max Centroid (Hz)', 'Max Energy'])

In [12]:
df_result

Unnamed: 0,Filename,Min Pitch (Hz),Min Centroid (Hz),Min Energy,Max Pitch (Hz),Max Centroid (Hz),Max Energy
0,1-39901-B-11.wav,0,1945.701911,0.019837,68.606381,3158.063993,0.161498
1,2-117615-E-48.wav,0,1385.105509,0.001084,0.0,3008.823993,0.238786
2,2-54961-A-23.wav,0,0.0,0.0,1340.566074,5803.631529,0.265864
3,1-51805-E-33.wav,0,1037.926337,0.001167,1699.239756,4982.547941,0.058814
4,3-62878-B-42.wav,0,0.0,0.0,1488.56767,3032.988644,0.608409
5,5-222524-A-41.wav,0,2032.377583,0.160141,75.967063,2530.453546,0.246361
6,1-21189-A-10.wav,0,3506.270005,0.070842,0.0,4182.172925,0.110192
7,3-171281-A-6.wav,0,2933.78659,0.00422,1405.357794,4994.172838,0.057552
8,5-194899-D-3.wav,0,0.0,0.0,786.64845,5238.908092,0.153152
9,5-250629-A-37.wav,0,0.0,0.0,2055.084776,4904.838827,0.611875


In [13]:
df_result.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Min Pitch (Hz),50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Min Centroid (Hz),50.0,1782.166659,1297.494935,0.0,978.703865,1621.147229,2533.945005,5475.828546
Min Energy,50.0,0.032597,0.057903,0.0,0.000529,0.002663,0.036413,0.287864
Max Pitch (Hz),50.0,706.789832,693.739041,0.0,79.420802,456.170075,1371.856539,2061.035373
Max Centroid (Hz),50.0,4585.707592,1928.095921,1921.472815,3072.855192,4087.447302,5769.858589,10913.062718
Max Energy,50.0,0.223155,0.167567,0.005889,0.078454,0.230979,0.280905,0.69888


In [5]:
dir_birdsound = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset\xeno-canto"
selected_files = os.listdir(dir_birdsound)
selected_files = [os.path.join(dir_birdsound, x) for x in selected_files]
results = []
for filename in selected_files:
    file_path = os.path.join(dataset_dir, filename)
    features = extract_simple_features(file_path)
    results.append(features)

df_result = pd.DataFrame(results, columns=['Filename', 'Min Pitch (Hz)', 'Min Centroid (Hz)', 'Min Energy', 'Max Pitch (Hz)', 'Max Centroid (Hz)', 'Max Energy'])

In [7]:
df_result.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Min Pitch (Hz),3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Min Centroid (Hz),3.0,3754.645798,1242.62335,2320.022708,3384.751355,4449.480003,4471.957343,4494.434683
Min Energy,3.0,0.007059,0.000835,0.006549,0.006577,0.006605,0.007313,0.008022
Max Pitch (Hz),3.0,2002.562878,149.76332,1829.692991,1957.342056,2084.99112,2088.997821,2093.004522
Max Centroid (Hz),3.0,8014.806682,1738.430253,6931.957298,7012.193104,7092.428911,8556.231374,10020.033838
Max Energy,3.0,0.16035,0.158048,0.030435,0.072372,0.114308,0.225307,0.336306
