# Audio Feature Extraction (finished)

This notebook converts any `.m4a` files (if present) to `.wav` using `ffmpeg` (if installed), then extracts audio features (MFCCs, spectral, energy, chroma, contrast, tempo) for each file and saves per-person CSVs and a merged CSV under `data/processed/`.

Instructions: run cells top-to-bottom. If you don't have `ffmpeg` installed on Windows, install via Chocolatey (`choco install ffmpeg`) or download a build and add to PATH.

In [None]:
%pip install librosa soundfile resampy audioread numpy pandas matplotlib --quiet

In [5]:
import os
import glob
import shutil
import subprocess
import time
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import librosa
import librosa.display
from pathlib import Path
from scipy.io.wavfile import write
from IPython.display import Audio, display
print('Imports OK')

Imports OK


In [None]:
# Configuration - set your name/folders here
YOUR_NAME = 'Kerie'
AUDIO_FOLDER = os.path.abspath(f'audio_data/{YOUR_NAME}')
AUGMENTED_FOLDER = os.path.abspath(f'augmented_audio/{YOUR_NAME}')
OUTPUT_DIR = os.path.abspath('features_audio')
MERGED_OUTPUT = os.path.abspath(os.path.join(OUTPUT_DIR, 'audio_features_all.csv'))
os.makedirs(OUTPUT_DIR, exist_ok=True)
print('Configuration:')
print('  YOUR_NAME ->', YOUR_NAME)
print('  AUDIO_FOLDER ->', AUDIO_FOLDER)
print('  AUGMENTED_FOLDER ->', AUGMENTED_FOLDER)
print('  OUTPUT_DIR ->', OUTPUT_DIR)
print('  MERGED_OUTPUT ->', MERGED_OUTPUT)


Configuration:
  YOUR_NAME -> Kerie
  AUDIO_FOLDER -> c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\audio_data\Kerie
  AUGMENTED_FOLDER -> c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\augmented_audio\Kerie
  OUTPUT_DIR -> c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio
  MERGED_OUTPUT -> c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\data\processed\audio_features_all.csv


## Diagnostics: check folders and files
Run this cell to confirm where files are and whether any .wav/.m4a files already exist.

In [8]:
print('Notebook cwd:', os.getcwd())
print('AUDIO_FOLDER exists:', os.path.exists(AUDIO_FOLDER))
if os.path.exists(AUDIO_FOLDER):
    for fname in sorted(os.listdir(AUDIO_FOLDER)):
        full = os.path.join(AUDIO_FOLDER, fname)
        try:
            size = os.path.getsize(full)
            mtime = time.ctime(os.path.getmtime(full))
        except Exception:
            size = '<err>'
            mtime = '<err>'
        print(f'  - {fname} | size={size} | mtime={mtime} | abs={os.path.abspath(full)}')
else:
    print('  (AUDIO_FOLDER not found)')

print('Recursive search for .wav and .m4a in repo:')
wav_files = glob.glob('**/*.wav', recursive=True)
m4a_files = glob.glob('**/*.m4a', recursive=True)
print('  .wav found:', len(wav_files))
for w in sorted(wav_files)[:50]:
    print('   -', os.path.abspath(w))
print('  .m4a found:', len(m4a_files))
for m in sorted(m4a_files)[:50]:
    print('   -', os.path.abspath(m))


Notebook cwd: c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing
AUDIO_FOLDER exists: True
  - Approve 1.wav | size=262222 | mtime=Wed Nov 12 19:22:33 2025 | abs=c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\audio_data\Kerie\Approve 1.wav
  - Approve 2.wav | size=211022 | mtime=Wed Nov 12 19:22:40 2025 | abs=c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\audio_data\Kerie\Approve 2.wav
  - CT1.wav | size=231502 | mtime=Wed Nov 12 19:21:35 2025 | abs=c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\audio_data\Kerie\CT1.wav
  - CT2.wav | size=237646 | mtime=Wed Nov 12 19:22:47 2025 | abs=c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\audio_data\Kerie\CT2.wav
Recursive search for .wav and .m4a in repo:
  .wav found: 82
   - c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\audio\augmented\Fidel_confirm_transaction_aug_no

## ffmpeg check & conversion
This cell will convert `.m4a` files inside `AUDIO_FOLDER` to `.wav` using `ffmpeg` if it's available. It writes the `.wav` to the same folder with the same base name.

In [9]:
def ffmpeg_available():
    return shutil.which('ffmpeg') is not None

def convert_m4a_to_wav(input_path, output_path, show_output=False):
    ff = shutil.which('ffmpeg')
    if ff is None:
        print('ffmpeg not found on PATH; please install it to convert .m4a files')
        return False
    cmd = [ff, '-hide_banner', '-y', '-i', input_path, '-acodec', 'pcm_s16le', '-ar', '44100', output_path]
    print('Running:', ' '.join(cmd))
    proc = subprocess.run(cmd, capture_output=True, text=True)
    if proc.returncode != 0 or show_output:
        print('ffmpeg stdout:', proc.stdout)
        print('ffmpeg stderr:', proc.stderr)
    if proc.returncode == 0 and os.path.exists(output_path) and os.path.getsize(output_path) > 0:
        return True
    return False

# Perform conversion only if AUDIO_FOLDER exists
if os.path.exists(AUDIO_FOLDER):
    m4as = [f for f in os.listdir(AUDIO_FOLDER) if f.lower().endswith('.m4a')]
    print('Found', len(m4as), '.m4a files')
    conv = 0; skip = 0; fail = 0
    for fname in m4as:
        inp = os.path.join(AUDIO_FOLDER, fname)
        out = os.path.join(AUDIO_FOLDER, os.path.splitext(fname)[0] + '.wav')
        if os.path.exists(out) and os.path.getsize(out) > 0:
            print('Skipping, wav exists:', out)
            skip += 1
            continue
        ok = convert_m4a_to_wav(inp, out, show_output=True)
        if ok:
            print('Converted ->', out)
            conv += 1
        else:
            print('Failed to convert', inp)
            fail += 1
    print(f'Conversion summary: converted={conv}, skipped={skip}, failed={fail}')
else:
    print('AUDIO_FOLDER not found; skipping conversion')


Found 0 .m4a files
Conversion summary: converted=0, skipped=0, failed=0


## AudioFeatureExtractor class
The class below extracts features for a single file and helper functions for phrase/person detection.

In [None]:
class AudioFeatureExtractor:
    """Extract audio features from wav files for voice verification."""
    def __init__(self, remove_silence: bool = False, sample_rate: int = 22050):
        self.remove_silence = remove_silence
        self.sample_rate = sample_rate

    def remove_silence_from_audio(self, y, sr):
        y_trimmed, _ = librosa.effects.trim(y, top_db=20)
        return y_trimmed

    def extract_features(self, audio_path):
        try:
            y, sr = librosa.load(audio_path, sr=self.sample_rate)
            if self.remove_silence:
                y = self.remove_silence_from_audio(y, sr)
            features = {}
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
            mfcc_mean = np.mean(mfccs, axis=1)
            mfcc_std = np.std(mfccs, axis=1)
            for i in range(13):
                features[f'mfcc_{i}_mean'] = float(mfcc_mean[i])
                features[f'mfcc_{i}_std'] = float(mfcc_std[i])
            # spectral and energy features
            features['spectral_centroid_mean'] = float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)))
            features['spectral_rolloff_mean'] = float(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)))
            features['rms_energy_mean'] = float(np.mean(librosa.feature.rms(y=y)))
            features['zcr_mean'] = float(np.mean(librosa.feature.zero_crossing_rate(y)))
            chroma = librosa.feature.chroma_stft(y=y, sr=sr)
            for i, c in enumerate(np.mean(chroma, axis=1)):
                features[f'chroma_{i}_mean'] = float(c)
            try:
                tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
                features['tempo'] = float(tempo)
            except Exception:
                features['tempo'] = 0.0
            return features
        except Exception as e:
            print('Error extracting', audio_path, e)
            return None

def _extract_phrase_from_filename(filename):
    ln = filename.lower()
    if 'approve' in ln or 'yes' in ln:
        return 'approve'
    if 'confirm' in ln or 'ct' in ln:
        return 'confirm_transaction'
    return 'unknown'

def _extract_person_from_path(filepath):
    parts = Path(filepath).parts
    for candidate in ['Fidel','Kerie','Irais']:
        if candidate.lower() in filepath.lower():
            return candidate
    for p in parts:
        if p in ['Fidel','Kerie','Irais']:
            return p
    return 'Unknown'

print('Feature extractor ready')


Feature extractor ready


## Process folders and extract features
The cell below searches folders for each person, extracts features and saves per-person CSVs and a merged CSV.`

In [None]:
# Folder mappings (adjust if your project differs)
folder_mappings = {
    'Fidel': [os.path.join('.', 'audio', 'augmented')],
    'Kerie': [os.path.join('.', 'augmented_audio', 'Kerie')],
    'Irais': [os.path.join('.', 'augmented_audio', 'Irais')]
}

extractor = AudioFeatureExtractor()
all_features = {}
total_processed = 0
for person, folders in folder_mappings.items():
    rows = []
    for folder in folders:
        folder = os.path.abspath(folder)
        if not os.path.exists(folder):
            print('Folder missing, skipping:', folder)
            continue
        wavs = [os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith('.wav')]
        print(f'Found {len(wavs)} wav(s) in {folder}')
        for p in wavs:
            feats = extractor.extract_features(p)
            if feats is None:
                continue
            feats['person'] = person
            feats['audio_path'] = os.path.abspath(p)
            feats['audio_name'] = os.path.basename(p)
            feats['phrase'] = _extract_phrase_from_filename(os.path.basename(p))
            rows.append(feats)
    if rows:
        df_person = pd.DataFrame(rows)
        out_file = os.path.join(OUTPUT_DIR, f'{person}_audio_features.csv')
        df_person.to_csv(out_file, index=False)
        all_features[person] = df_person
        total_processed += len(df_person)
        print(f'Wrote {out_file} ({len(df_person)} rows)')

# Merge and save
if all_features:
    merged = pd.concat(all_features.values(), ignore_index=True)
    meta_cols = ['person', 'audio_path', 'audio_name', 'phrase']
    for c in meta_cols:
        if c not in merged.columns:
            merged[c] = ''
    cols = meta_cols + [c for c in merged.columns if c not in meta_cols]
    merged = merged[cols]
    merged.to_csv(MERGED_OUTPUT, index=False)
    print(f'Merged features saved to {MERGED_OUTPUT} (rows={len(merged)})')
    # Save feature column names (exclude metadata) for model usage
    feature_cols = [c for c in merged.columns if c not in meta_cols]
    cols_file = os.path.join(OUTPUT_DIR, 'feature_columns.txt')
    with open(cols_file, 'w', encoding='utf-8') as fh:
        fh.write('\n'.join(feature_cols))
    print(f'Feature columns saved to {cols_file} (count={len(feature_cols)})')
else:
    print('No features extracted. Check that folders contain .wav files or run the conversion cell first.')


Found 16 wav(s) in c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\audio\augmented
Wrote c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio\Fidel_audio_features.csv (16 rows)
Found 12 wav(s) in c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\augmented_audio\Kerie
Wrote c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio\Fidel_audio_features.csv (16 rows)
Found 12 wav(s) in c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\augmented_audio\Kerie
Wrote c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio\Kerie_audio_features.csv (12 rows)
Found 20 wav(s) in c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\augmented_audio\Irais
Wrote c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio\Kerie_audio_features.csv (12 rows)
Fo

## Quick checks
List the newly created feature files and show a sample row from the merged CSV (if exists).

In [None]:
print('Files in', OUTPUT_DIR)
if os.path.exists(OUTPUT_DIR):
    for f in sorted(os.listdir(OUTPUT_DIR)):
        print(' -', os.path.join(OUTPUT_DIR, f))

if os.path.exists(MERGED_OUTPUT):
    dfm = pd.read_csv(MERGED_OUTPUT)
    print('\nMerged CSV path:', MERGED_OUTPUT, 'rows:', len(dfm))
    display(dfm.head(5))
else:
    print('Merged CSV not found:', MERGED_OUTPUT)


Files in c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio
 - c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio\Fidel_audio_features.csv
 - c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio\Irais_audio_features.csv
 - c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\Formative-2-Data-Preprocessing\features_audio\Kerie_audio_features.csv
Merged CSV sample rows: 48


Unnamed: 0,mfcc_0_mean,mfcc_0_std,mfcc_1_mean,mfcc_1_std,mfcc_2_mean,mfcc_2_std,mfcc_3_mean,mfcc_3_std,mfcc_4_mean,mfcc_4_std,...,chroma_7_mean,chroma_8_mean,chroma_9_mean,chroma_10_mean,chroma_11_mean,tempo,person,audio_path,audio_name,phrase
0,-323.525208,43.516697,11.343247,36.618992,5.898495,16.233978,1.027185,13.190826,1.819397,10.621459,...,0.594353,0.580412,0.574347,0.583969,0.589356,135.999178,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_confirm_transaction_aug_noise.wav,confirm_tx
1,-612.3302,207.960693,44.700008,59.605667,21.499029,25.132103,4.432204,22.005758,15.117193,19.426517,...,0.355087,0.337085,0.377836,0.409257,0.430312,75.99954,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_confirm_transaction_aug_pitch_down.wav,confirm_tx
2,-602.238098,193.085983,41.418762,59.347988,15.62307,26.032578,5.278125,24.862335,18.880026,20.826796,...,0.435098,0.465186,0.448267,0.40338,0.385112,75.99954,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_confirm_transaction_aug_pitch_up.wav,confirm_tx
3,-605.934204,205.846344,42.851822,58.401211,21.193609,24.867525,3.396454,24.43388,19.956852,20.753639,...,0.376679,0.349646,0.319049,0.351828,0.401187,89.102909,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_confirm_transaction_aug_time_fast.wav,confirm_tx
4,-609.548828,199.078125,40.294361,60.046928,19.327871,25.396095,2.712046,24.470715,20.506405,21.316648,...,0.389045,0.389968,0.374686,0.367824,0.374536,60.09266,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_confirm_transaction_aug_time_slow.wav,confirm_tx
5,-645.265198,209.979218,47.242092,60.702068,21.397139,25.906296,3.70241,24.371967,20.725725,21.325397,...,0.414349,0.396865,0.379882,0.373591,0.392854,75.99954,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_confirm_transaction_aug_volume_down.wav,confirm_tx
6,-538.058838,210.6259,46.858982,60.889812,21.305552,25.988304,3.617964,24.404194,20.603298,21.43399,...,0.411126,0.392578,0.375115,0.371131,0.391141,75.99954,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_confirm_transaction_aug_volume_up.wav,confirm_tx
7,-578.002747,210.71405,46.788502,60.920109,21.252876,25.999146,3.567156,24.396826,20.555729,21.462627,...,0.407924,0.390816,0.375584,0.371954,0.393565,75.99954,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_confirm_transaction_original.wav,confirm_tx
8,-330.21817,31.177803,0.726351,36.349045,13.564811,20.735401,4.617391,17.92041,6.865464,12.14134,...,0.529542,0.5984,0.632485,0.567598,0.554248,112.347147,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_yes_approve_aug_noise.wav,approve_yes
9,-582.271667,157.260544,23.571775,81.554268,29.482561,26.884964,6.0198,30.107307,21.690634,25.431349,...,0.451091,0.367018,0.363716,0.445481,0.544423,135.999178,Fidel,c:\Users\LENOVO\Documents\KERIE\ML_FORMATIVE2\...,Fidel_yes_approve_aug_pitch_down.wav,approve_yes
