# Features Extraction

## Import Libraries

In [1]:
import os
import numpy as np
import importlib
import sys
import os
import pandas as pd
from tqdm import tqdm
import librosa

## Import Utils

In [2]:
sys.path.append("..")
import utils
from utils import augment_audio
from utils import extract_mfcc_features
from utils import extract_prosodic_features
from utils import extract_spectral_features
from utils import extract_mel_spectrogram
from utils import pad_features
from utils import normalize_features
from utils import compute_feature_statistics


importlib.reload(utils)

from utils import augment_audio
from utils import extract_mfcc_features
from utils import extract_prosodic_features
from utils import extract_spectral_features
from utils import extract_mel_spectrogram
from utils import pad_features
from utils import normalize_features
from utils import compute_feature_statistics

## Import metadata csv

In [3]:
df = pd.read_csv("../data/processed/metadata.csv")
df

Unnamed: 0,file_path,emotion,dataset,actor,index,sample_rate
0,../data/processed/audio_16k/savee_OAF_angry_13...,angry,savee,OAF,132,16000
1,../data/processed/audio_16k/savee_YAF_fear_152...,fear,savee,YAF,1521,16000
2,../data/processed/audio_16k/savee_OAF_neutral_...,neutral,savee,OAF,592,16000
3,../data/processed/audio_16k/savee_OAF_disgust_...,disgust,savee,OAF,1177,16000
4,../data/processed/audio_16k/savee_OAF_disgust_...,disgust,savee,OAF,709,16000
...,...,...,...,...,...,...
11313,../data/processed/audio_16k/crema-d_1065_happy...,happy,crema-d,1065,5247,16000
11314,../data/processed/audio_16k/crema-d_1031_neutr...,neutral,crema-d,1031,2474,16000
11315,../data/processed/audio_16k/crema-d_1064_happy...,happy,crema-d,1064,5203,16000
11316,../data/processed/audio_16k/crema-d_1070_angry...,angry,crema-d,1070,5662,16000


## (MFCCs + Deltas -> Spectral -> Prosodic -> Compute Statistics ) Feature Extraction

### Make sure dir exist

In [None]:
mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_metadata_dir = "../data/features/audio_16k/mfcc+deltas_spectral_prosodic_computed_statistics"
mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_dir = "../data/features/audio_16k/mfcc+deltas_spectral_prosodic_computed_statistics/audio_features"
os.makedirs(mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_metadata_dir, exist_ok=True)
os.makedirs(mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_dir, exist_ok=True)

### Feature Extraction

In [None]:
metadata = []

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate
        
        audio, sr = librosa.load(file_path, sr=sample_rate)

        # Extract time-series features
        mfcc = extract_mfcc_features(audio, sr) # (120, time)
        spectral = extract_spectral_features(audio, sr) # (12, time)
        prosodic = extract_prosodic_features(audio, sr) # (2, time) 

        # Align all features to same length
        min_len = min(mfcc.shape[1], spectral.shape[1], prosodic.shape[1])
        mfcc = mfcc[:, :min_len]
        spectral = spectral[:, :min_len]
        prosodic = prosodic[:, :min_len]

        # Stack all features
        all_features = np.vstack([mfcc, spectral, prosodic])  # (134, time_frames)

        # Compute statistics
        feature_vector = compute_feature_statistics(all_features)

        # Save as .npy
        base_name = os.path.basename(file_path).replace(".wav", ".npy")
        out_path = os.path.join(mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_dir, base_name)
        np.save(out_path, feature_vector)

        metadata.append([out_path, file_path, row.emotion, row.dataset, row.actor, row.index, row.sample_rate]) 
        
    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

## (Mel-Spectrograms -> Pad -> Normalize) Feature Extraction

### Make sure dir exist

In [None]:
augmented_mel_spectrograms_features_metadata_dir = "../data/features/audio_16k/augmented_mel_spectrograms"
augmented_mel_spectrograms_features_dir = "../data/features/audio_16k/augmented_mel_spectrograms/audio_features"
os.makedirs(augmented_mel_spectrograms_features_metadata_dir, exist_ok=True)
os.makedirs(augmented_mel_spectrograms_features_dir, exist_ok=True)

### Feature Extraction

In [None]:
metadata = []
max_len = 150 

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate

        augmented_audios = augment_audio(file_path, sample_rate)

        for aug_idx, (aug_audio, aug_name) in enumerate(augmented_audios):

            # Extract mel-spectrogram
            mel_spec = extract_mel_spectrogram(aug_audio, sample_rate) # 120 features

            # Pad to fixed length
            mel_spec = pad_features(mel_spec, max_len=max_len)
            
            # Normalize
            mel_spec = normalize_features(mel_spec)

            # Save as .npy
            original_name = os.path.basename(file_path).replace(".wav", "")
            base_name = f"{original_name}_{aug_name}.npy"
            out_path = os.path.join(augmented_mel_spectrograms_features_dir, base_name)
            np.save(out_path, mel_spec)
    
            metadata.append([out_path, file_path, row.emotion, aug_name, row.dataset, row.actor, row.index, row.sample_rate])     

    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "augmentation", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(augmented_mel_spectrograms_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

## (Data Augmentation -> MFCCs + Deltas -> Mel-Spectrograms -> Spectral -> Prosidic -> Pad -> Normalize) Feature Extraction

### Make sure dir exist

In [4]:
augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_metadata_dir = "../data/features/audio_16k/augmented_mfcc+deltas_mel-spectrograms_spectral_prosodic"
augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_dir = "../data/features/audio_16k/augmented_mfcc+deltas_mel-spectrograms_spectral_prosodic/audio_features"
os.makedirs(augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_metadata_dir, exist_ok=True)
os.makedirs(augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_dir, exist_ok=True)

### Feature Extraction

In [5]:
metadata = []
max_len=150

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate

        augmented_audios = augment_audio(file_path, sample_rate)

        for aug_idx, (aug_audio, aug_name) in enumerate(augmented_audios):
            
            mfccs = extract_mfcc_features(aug_audio, sample_rate) # 120 features
            mel_spectrogram = extract_mel_spectrogram(aug_audio, sample_rate) # 128 features
            spectral = extract_spectral_features(aug_audio, sample_rate) # 12 features
            prosodic = extract_prosodic_features(aug_audio, sample_rate) # 2 features
            
            # Align to same time dimension
            min_len = min(
                mfccs.shape[1], 
                prosodic.shape[1], 
                spectral.shape[1],
                mel_spectrogram.shape[1]
            )

            mfccs = mfccs[:, :min_len]
            mel_spectrogram = mel_spectrogram[:, :min_len]
            spectral = spectral[:, :min_len]
            prosodic = prosodic[:, :min_len]
    
            # Stack all features together
            features = np.vstack([mfccs, mel_spectrogram, spectral, prosodic])
            # Shape: (261, time)

            # Pad to fixed length
            features = pad_features(features, max_len=max_len)
            # Shape: (261, 150)
    
            # Normalize
            features = normalize_features(features)
            # Shape: (261, 150) - normalized
    
            # Save as .npy
            original_name = os.path.basename(file_path).replace(".wav", "")
            base_name = f"{original_name}_{aug_name}.npy"
            out_path = os.path.join(augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_dir, base_name)
            np.save(out_path, features)
    
            metadata.append([out_path, file_path, row.emotion, aug_name, row.dataset, row.actor, row.index, row.sample_rate])    

    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "augmentation", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        
print("Features Extraction Completed!")

Extracting features: 100%|██████████████████████████████████████████████████████| 11318/11318 [6:31:35<00:00,  2.08s/it]


Features Extraction Completed!


### Checking features number

In [3]:
import numpy as np

feature_check = np.load('../data/features/audio_16k/augmented_mel_spectrograms/audio_features/crema-d_1001_angry_1_16k_noise_0.005.npy')
print(f"Shape: {feature_check.shape}")
print(f"Number of features: {feature_check.shape[0]}")
print(f"Time frames: {feature_check.shape[1]}")

Shape: (128, 150)
Number of features: 128
Time frames: 150


## Summary

In [None]:
print("\n" + "="*60)
print("FEATURE EXTRACTION SUMMARY")
print("="*60)
print(f"\n1. SVM Features:")
print(f"   - Samples: {len(df_svm)}")
print(f"   - Shape: (938,)")
print(f"   - Augmentation: None")
print(f"   - Location: {mfcc_plus_deltas_spectral_prosodic_computed_statistics_features_dir}")

print(f"\n2. CNN Features:")
print(f"   - Samples: {len(df_cnn)}")
print(f"   - Shape: (128, 150)")
print(f"   - Augmentation: {len(df_cnn) / len(df):.1f}x")
print(f"   - Location: {augmented_mel_spectrograms_features_dir}")

print(f"\n3. CNN-LSTM Features:")
print(f"   - Samples: {len(df_cnn_lstm)}")
print(f"   - Shape: (262, 150)")
print(f"   - Augmentation: {len(df_cnn_lstm) / len(df):.1f}x")
print(f"   - Location: {augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_dir}")

print("\n" + "="*60)
print("ALL FEATURE EXTRACTION COMPLETED!")
print("="*60)