# Features Extraction

## Import Libraries

In [16]:
import os
import numpy as np
import importlib
import sys
import os
import pandas as pd
from tqdm import tqdm
import librosa

## Import Utils

In [17]:
sys.path.append("..")
import utils
from utils import augment_audio
from utils import extract_mfcc_features
from utils import extract_prosodic_features
from utils import extract_spectral_features
from utils import extract_mel_spectrogram
from utils import pad_features
from utils import normalize_features
from utils import compute_feature_statistics


importlib.reload(utils)

from utils import augment_audio
from utils import extract_mfcc_features
from utils import extract_prosodic_features
from utils import extract_spectral_features
from utils import extract_mel_spectrogram
from utils import pad_features
from utils import normalize_features
from utils import compute_feature_statistics

## Import metadata csv

In [None]:
df = pd.read_csv("../data/processed/metadata.csv")
df

## (MFCCs + Deltas -> Spectral -> Prosodic -> Compute Statistics ) Feature Extraction

### Make sure dir exist

In [None]:
mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_metadata_dir = "../data/features/audio_16k/mfcc+deltas_spectral_prosodic_computed_statistics"
mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_dir = "../data/features/audio_16k/mfcc+deltas_spectral_prosodic_computed_statistics/audio_features"
os.makedirs(mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_metadata_dir, exist_ok=True)
os.makedirs(mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_dir, exist_ok=True)

### Feature Extraction

In [None]:
metadata = []

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate
        
        audio, sr = librosa.load(file_path, sr=sample_rate)

        # Extract time-series features
        mfcc = extract_mfcc_features(audio, sr) # (120, time)
        spectral = extract_spectral_features(audio, sr) # (12, time)
        prosodic = extract_prosodic_features(audio, sr) # (2, time) 

        # Align all features to same length
        min_len = min(mfcc.shape[1], spectral.shape[1], prosodic.shape[1])
        mfcc = mfcc[:, :min_len]
        spectral = spectral[:, :min_len]
        prosodic = prosodic[:, :min_len]

        # Stack all features
        all_features = np.vstack([mfcc, spectral, prosodic])  # (134, time_frames)

        # Compute statistics
        feature_vector = compute_feature_statistics(all_features)

        # Save as .npy
        base_name = os.path.basename(file_path).replace(".wav", ".npy")
        out_path = os.path.join(mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_dir, base_name)
        np.save(out_path, feature_vector)

        metadata.append([out_path, file_path, row.emotion, row.dataset, row.actor, row.index, row.sample_rate]) 
        
    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(mfcc_plus_deltas_spectraL_prosodic_computed_statistics_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

## (Mel-Spectrograms -> Pad -> Normalize) Feature Extraction

### Make sure dir exist

In [None]:
augmented_mel_spectrograms_features_metadata_dir = "../data/features/audio_16k/augmented_mel_spectrograms"
augmented_mel_spectrograms_features_dir = "../data/features/audio_16k/augmented_mel_spectrograms/audio_features"
os.makedirs(augmented_mel_spectrograms_features_metadata_dir, exist_ok=True)
os.makedirs(augmented_mel_spectrograms_features_dir, exist_ok=True)

### Feature Extraction

In [None]:
metadata = []
max_len = 150 

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate

        augmented_audios = augment_audio(file_path, sample_rate)

        for aug_idx, (aug_audio, aug_name) in enumerate(augmented_audios):

            # Extract mel-spectrogram
            mel_spec = extract_mel_spectrogram(aug_audio, sample_rate) # 120 features

            # Pad to fixed length
            mel_spec = pad_features(mel_spec, max_len=max_len)
            
            # Normalize
            mel_spec = normalize_features(mel_spec)

            # Save as .npy
            original_name = os.path.basename(file_path).replace(".wav", "")
            base_name = f"{original_name}_{aug_name}.npy"
            out_path = os.path.join(augmented_mel_spectrograms_features_dir, base_name)
            np.save(out_path, mel_spec)
    
            metadata.append([out_path, file_path, row.emotion, aug_name, row.dataset, row.actor, row.index, row.sample_rate])     

    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "augmentation", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(augmented_mel_spectrograms_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

## (Mel-Spectrograms -> Pad -> Normalize) Feature Extraction for Gathered Dataset

### Import Gathered dataset

In [6]:
gathered_df = pd.read_csv("../data/processed/audio_16k/gathered/metadata.csv")
gathered_df

Unnamed: 0,file_path,emotion,dataset,actor,index,sample_rate
0,../data/processed/audio_16k/gathered/audio_fil...,happy,gathered,e,30,16000
1,../data/processed/audio_16k/gathered/audio_fil...,sad,gathered,e,38,16000
2,../data/processed/audio_16k/gathered/audio_fil...,fear,gathered,e,28,16000
3,../data/processed/audio_16k/gathered/audio_fil...,fear,gathered,e,29,16000
4,../data/processed/audio_16k/gathered/audio_fil...,disgust,gathered,e,26,16000
5,../data/processed/audio_16k/gathered/audio_fil...,neutral,gathered,e,34,16000
6,../data/processed/audio_16k/gathered/audio_fil...,neutral,gathered,e,33,16000
7,../data/processed/audio_16k/gathered/audio_fil...,sad,gathered,e,40,16000
8,../data/processed/audio_16k/gathered/audio_fil...,angry,gathered,e,23,16000
9,../data/processed/audio_16k/gathered/audio_fil...,happy,gathered,e,31,16000


### Make sure dir exist

In [7]:
gathered_augmented_mel_spectrograms_features_metadata_dir = "../data/features/audio_16k/gathered/augmented_mel_spectrograms"
gathered_augmented_mel_spectrograms_features_dir = "../data/features/audio_16k/gathered/augmented_mel_spectrograms/audio_features"
os.makedirs(gathered_augmented_mel_spectrograms_features_metadata_dir, exist_ok=True)
os.makedirs(gathered_augmented_mel_spectrograms_features_dir, exist_ok=True)

### Feature Extraction

In [8]:
metadata = []
max_len = 150 

for row in tqdm(gathered_df.itertuples(), total=len(gathered_df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate

        augmented_audios = augment_audio(file_path, sample_rate)

        for aug_idx, (aug_audio, aug_name) in enumerate(augmented_audios):

            # Extract mel-spectrogram
            mel_spec = extract_mel_spectrogram(aug_audio, sample_rate) # 120 features

            # Pad to fixed length
            mel_spec = pad_features(mel_spec, max_len=max_len)
            
            # Normalize
            mel_spec = normalize_features(mel_spec)

            # Save as .npy
            original_name = os.path.basename(file_path).replace(".wav", "")
            base_name = f"{original_name}_{aug_name}.npy"
            out_path = os.path.join(gathered_augmented_mel_spectrograms_features_dir, base_name)
            np.save(out_path, mel_spec)
    
            metadata.append([out_path, file_path, row.emotion, aug_name, row.dataset, row.actor, row.index, row.sample_rate])     

    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "augmentation", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(gathered_augmented_mel_spectrograms_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

Extracting features: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 22.13it/s]

Features Extraction Completed!





## (Mel-Spectrograms -> Pad -> Normalize) Feature Extraction for Cross-Lingual CNN

### Import Cross Lingual Dataset

In [23]:
four_db_df = pd.read_csv("../data/processed/metadata.csv")
subesco_df = pd.read_csv("../data/processed/audio_16k/subesco/metadata.csv")
emovo_df = pd.read_csv("../data/processed/audio_16k/emovo/metadata.csv")

four_db_df = four_db_df[four_db_df["dataset"].str.lower() == "crema-d"]

four_db_bal = four_db_df.groupby("emotion").sample(n=83, replace=True)
subesco_bal = subesco_df.groupby("emotion").sample(n=83, replace=True)
emovo_bal = emovo_df.groupby("emotion").sample(n=83, replace=True)
print("emovo", emovo_df)

combined_balanced = pd.concat([four_db_bal, subesco_bal, emovo_bal], ignore_index=True)

combined_balanced

emovo                                              file_path  emotion dataset actor  \
0    ../data/processed/audio_16k/emovo/audio_files/...  disgust   emovo    m2   
1    ../data/processed/audio_16k/emovo/audio_files/...      sad   emovo    f1   
2    ../data/processed/audio_16k/emovo/audio_files/...    happy   emovo    f1   
3    ../data/processed/audio_16k/emovo/audio_files/...  disgust   emovo    m1   
4    ../data/processed/audio_16k/emovo/audio_files/...  neutral   emovo    m3   
..                                                 ...      ...     ...   ...   
499  ../data/processed/audio_16k/emovo/audio_files/...     fear   emovo    f2   
500  ../data/processed/audio_16k/emovo/audio_files/...    angry   emovo    m3   
501  ../data/processed/audio_16k/emovo/audio_files/...    happy   emovo    f3   
502  ../data/processed/audio_16k/emovo/audio_files/...    angry   emovo    f1   
503  ../data/processed/audio_16k/emovo/audio_files/...     fear   emovo    f3   

     index  sample_ra

Unnamed: 0,file_path,emotion,dataset,actor,index,sample_rate
0,../data/processed/audio_16k/crema-d_1045_angry...,angry,crema-d,1045,3618,16000
1,../data/processed/audio_16k/crema-d_1084_angry...,angry,crema-d,1084,6827,16000
2,../data/processed/audio_16k/crema-d_1027_angry...,angry,crema-d,1027,2154,16000
3,../data/processed/audio_16k/crema-d_1082_angry...,angry,crema-d,1082,6681,16000
4,../data/processed/audio_16k/crema-d_1078_angry...,angry,crema-d,1078,6365,16000
...,...,...,...,...,...,...
1489,../data/processed/audio_16k/emovo/audio_files/...,sad,emovo,f3,543,16000
1490,../data/processed/audio_16k/emovo/audio_files/...,sad,emovo,f3,544,16000
1491,../data/processed/audio_16k/emovo/audio_files/...,sad,emovo,f3,540,16000
1492,../data/processed/audio_16k/emovo/audio_files/...,sad,emovo,m2,569,16000


### Make sure dir exist

In [24]:
cross_augmented_mel_spectrograms_features_metadata_dir = "../data/features/audio_16k/cross_lingual/augmented_mel_spectrograms"
cross_augmented_mel_spectrograms_features_dir = "../data/features/audio_16k/cross_lingual/augmented_mel_spectrograms/audio_features"
os.makedirs(cross_augmented_mel_spectrograms_features_metadata_dir, exist_ok=True)
os.makedirs(cross_augmented_mel_spectrograms_features_dir, exist_ok=True)

### Feature Extraction

In [25]:
metadata = []
max_len = 150 

for row in tqdm(combined_balanced.itertuples(), total=len(combined_balanced), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate

        augmented_audios = augment_audio(file_path, sample_rate)

        for aug_idx, (aug_audio, aug_name) in enumerate(augmented_audios):

            # Extract mel-spectrogram
            mel_spec = extract_mel_spectrogram(aug_audio, sample_rate) # 120 features

            # Pad to fixed length
            mel_spec = pad_features(mel_spec, max_len=max_len)
            
            # Normalize
            mel_spec = normalize_features(mel_spec)

            # Save as .npy
            original_name = os.path.basename(file_path).replace(".wav", "")
            base_name = f"{original_name}_{aug_name}.npy"
            out_path = os.path.join(cross_augmented_mel_spectrograms_features_dir, base_name)
            np.save(out_path, mel_spec)
    
            metadata.append([out_path, file_path, row.emotion, aug_name, row.dataset, row.actor, row.index, row.sample_rate])     

    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "augmentation", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(cross_augmented_mel_spectrograms_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

Extracting features: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1494/1494 [02:05<00:00, 11.93it/s]

Features Extraction Completed!





## (Data Augmentation -> MFCCs + Deltas -> Mel-Spectrograms -> Spectral -> Prosidic -> Pad -> Normalize) Feature Extraction

### Make sure dir exist

In [None]:
augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_metadata_dir = "../data/features/audio_16k/augmented_mfcc+deltas_mel-spectrograms_spectral_prosodic"
augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_dir = "../data/features/audio_16k/augmented_mfcc+deltas_mel-spectrograms_spectral_prosodic/audio_features"
os.makedirs(augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_metadata_dir, exist_ok=True)
os.makedirs(augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_dir, exist_ok=True)

### Feature Extraction

In [None]:
metadata = []
max_len=150

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate

        augmented_audios = augment_audio(file_path, sample_rate)

        for aug_idx, (aug_audio, aug_name) in enumerate(augmented_audios):
            
            mfccs = extract_mfcc_features(aug_audio, sample_rate) # 120 features
            mel_spectrogram = extract_mel_spectrogram(aug_audio, sample_rate) # 128 features
            spectral = extract_spectral_features(aug_audio, sample_rate) # 12 features
            prosodic = extract_prosodic_features(aug_audio, sample_rate) # 2 features
            
            # Align to same time dimension
            min_len = min(
                mfccs.shape[1], 
                prosodic.shape[1], 
                spectral.shape[1],
                mel_spectrogram.shape[1]
            )

            mfccs = mfccs[:, :min_len]
            mel_spectrogram = mel_spectrogram[:, :min_len]
            spectral = spectral[:, :min_len]
            prosodic = prosodic[:, :min_len]
    
            # Stack all features together
            features = np.vstack([mfccs, mel_spectrogram, spectral, prosodic])
            # Shape: (261, time)

            # Pad to fixed length
            features = pad_features(features, max_len=max_len)
            # Shape: (261, 150)
    
            # Normalize
            features = normalize_features(features)
            # Shape: (261, 150) - normalized
    
            # Save as .npy
            original_name = os.path.basename(file_path).replace(".wav", "")
            base_name = f"{original_name}_{aug_name}.npy"
            out_path = os.path.join(augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_dir, base_name)
            np.save(out_path, features)
    
            metadata.append([out_path, file_path, row.emotion, aug_name, row.dataset, row.actor, row.index, row.sample_rate])    

    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "augmentation", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        
print("Features Extraction Completed!")

### Checking features number

In [None]:
import numpy as np

feature_check = np.load('../data/features/audio_16k/augmented_mfcc+deltas_mel-spectrograms_spectral_prosodic/audio_features/savee_JK_sad_334_16k_original.npy')
print(f"Shape: {feature_check.shape}")
print(f"Number of features: {feature_check.shape[0]}")
print(f"Time frames: {feature_check.shape[1]}")

## Summary

In [None]:
print("\n" + "="*60)
print("FEATURE EXTRACTION SUMMARY")
print("="*60)
print(f"\n1. SVM Features:")
print(f"   - Samples: {len(df_svm)}")
print(f"   - Shape: (938,)")
print(f"   - Augmentation: None")
print(f"   - Location: {mfcc_plus_deltas_spectral_prosodic_computed_statistics_features_dir}")

print(f"\n2. CNN Features:")
print(f"   - Samples: {len(df_cnn)}")
print(f"   - Shape: (128, 150)")
print(f"   - Augmentation: {len(df_cnn) / len(df):.1f}x")
print(f"   - Location: {augmented_mel_spectrograms_features_dir}")

print(f"\n3. CNN-LSTM Features:")
print(f"   - Samples: {len(df_cnn_lstm)}")
print(f"   - Shape: (262, 150)")
print(f"   - Augmentation: {len(df_cnn_lstm) / len(df):.1f}x")
print(f"   - Location: {augmented_mfcc_plus_deltas_mel_spectrogram_spectral_prosodic_features_dir}")

print("\n" + "="*60)
print("ALL FEATURE EXTRACTION COMPLETED!")
print("="*60)