In [2]:
import os
import librosa
import pandas as pd
import numpy as np
import noisereduce as nr
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Define a function to extract features from a file
def extract_features(file_path):
    # Load audio file with librosa, which uses ffmpeg to handle MP3s
    y, sr = librosa.load(file_path, sr=None)
    # Apply noise reduction
    reduced_noise = nr.reduce_noise(y=y, sr=sr)

In [None]:
 # Extract features
    mfccs = librosa.feature.mfcc(y=reduced_noise, sr=sr, n_mfcc=13)
    spectral_centroids = librosa.feature.spectral_centroid(y=reduced_noise, sr=sr)[0]
    zero_crossing_rate = librosa.feature.zero_crossing_rate(reduced_noise)[0]
    chroma_stft = librosa.feature.chroma_stft(y=reduced_noise, sr=sr)

In [None]:
 # Aggregate the mean of each feature
    features = {
        'spectral_centroid_mean': np.mean(spectral_centroids),
        'zero_crossing_rate_mean': np.mean(zero_crossing_rate),
        'chroma_stft_mean': np.mean(chroma_stft, axis=1).tolist()  # Mean of chroma STFT
    }


In [None]:
# Add MFCCs mean as separate columns
mfccs_mean = np.mean(mfccs.T, axis=0)
for i, mfcc in enumerate(mfccs_mean):
        features[f'mfccs_mean_{i+1}'] = mfcc
    return features


In [None]:
# Function to process each subfolder and compile features
def process_folders(base_path):
    features_list = []
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('.mp3'):  # Ensure to process MP3 files
                file_path = os.path.join(root, file)
                features = extract_features(file_path)
                features['class'] = os.path.basename(root)  # Class label from folder name
                features_list.append(features)

    return features_list

In [None]:
# Main function to orchestrate the feature extraction
def main():
    base_path = 'F:\\NewAgePython\\Amazon Rainforst Birds'  # Update this to your path
    data = process_folders(base_path)
    df = pd.DataFrame(data)
    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
    csv_file_name = f'birdsfeature_{timestamp}.csv'
    df.to_csv(csv_file_name, index=False)
    print(f'Data saved to {csv_file_name}')

if __name__ == '__main__':
    main()