In [27]:
import librosa
import numpy as np
import pandas as pd
import os
import glob

In [28]:
# Function to extract features from an audio file
def extract_features(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, sr=None)
        # MFCC
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=20)
        mfccs_processed = np.mean(mfccs.T, axis=0)
        
        # Spectral Centroid
        spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sample_rate)
        spectral_centroids_processed = np.mean(spectral_centroids.T, axis=0)
        
        # Concatenate all features
        features = np.hstack((mfccs_processed, spectral_centroids_processed))
        return features
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}, {e}")
        return None

In [29]:
audio_directory = '../data/processed_wav_files/'
audio_files = glob.glob(os.path.join(audio_directory, '*.wav'))
audio_files[:5]

['../data/processed_wav_files\\afrikaans1.wav',
 '../data/processed_wav_files\\afrikaans2.wav',
 '../data/processed_wav_files\\afrikaans3.wav',
 '../data/processed_wav_files\\afrikaans4.wav',
 '../data/processed_wav_files\\afrikaans5.wav']

In [30]:
import re
from tqdm import tqdm  # Import tqdm

features_list = []
for file in tqdm(audio_files, desc="Processing audio files"):
    features = extract_features(file)
    if features is not None:
        # Assuming file naming convention includes the country code as the prefix
        label = os.path.basename(file)
        features_list.append([features, label])

Processing audio files: 100%|██████████| 3016/3016 [09:14<00:00,  5.43it/s]


In [31]:
features_df = pd.DataFrame(features_list, columns=['features', 'file'])
features_df.to_pickle('../data/audio_features.pkl')

In [32]:
features_df.head()

Unnamed: 0,features,file
0,"[-320.2046203613281, 113.08561706542969, 13.68...",afrikaans1.wav
1,"[-293.78033447265625, 124.66605377197266, -6.4...",afrikaans2.wav
2,"[-338.8608703613281, 132.85476684570312, 6.337...",afrikaans3.wav
3,"[-285.78668212890625, 136.85784912109375, 8.95...",afrikaans4.wav
4,"[-371.3587646484375, 134.38897705078125, 3.720...",afrikaans5.wav
