# Features Extraction

## Import Libraries

In [16]:
import os
import numpy as np
import importlib
import sys
import os
import pandas as pd
from tqdm import tqdm
import librosa

## Import Utils

In [17]:
sys.path.append("..")
import utils
from utils import extract_mfcc_features
from utils import pad_features
from utils import normalize_features
from utils import augment_audio
from utils import extract_prosodic_features

importlib.reload(utils)

from utils import extract_mfcc_features
from utils import pad_features
from utils import normalize_features
from utils import augment_audio
from utils import extract_prosodic_features

## Import metadata csv

In [18]:
df = pd.read_csv("../data/processed/metadata.csv")
df

Unnamed: 0,file_path,emotion,dataset,actor,index,sample_rate
0,../data/processed/audio_16k/savee_OAF_angry_13...,angry,savee,OAF,132,16000
1,../data/processed/audio_16k/savee_YAF_fear_152...,fear,savee,YAF,1521,16000
2,../data/processed/audio_16k/savee_OAF_neutral_...,neutral,savee,OAF,592,16000
3,../data/processed/audio_16k/savee_OAF_disgust_...,disgust,savee,OAF,1177,16000
4,../data/processed/audio_16k/savee_OAF_disgust_...,disgust,savee,OAF,709,16000
...,...,...,...,...,...,...
11313,../data/processed/audio_16k/crema-d_1065_happy...,happy,crema-d,1065,5247,16000
11314,../data/processed/audio_16k/crema-d_1031_neutr...,neutral,crema-d,1031,2474,16000
11315,../data/processed/audio_16k/crema-d_1064_happy...,happy,crema-d,1064,5203,16000
11316,../data/processed/audio_16k/crema-d_1070_angry...,angry,crema-d,1070,5662,16000


## (MFCCs + Deltas -> Normalize -> Pad) Feature Extraction

### Make sure dir exist

In [4]:
mfcc_plus_deltas_features_metadata_dir = "../data/features/audio_16k/mfcc+deltas"
mfcc_plus_deltas_features_dir = "../data/features/audio_16k/mfcc+deltas/audio_features"
os.makedirs(mfcc_plus_deltas_features_metadata_dir, exist_ok=True)
os.makedirs(mfcc_plus_deltas_features_dir, exist_ok=True)

### Feature Extraction

In [5]:
metadata = []

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate
        
        audio, sr = librosa.load(file_path, sr=sample_rate)

        # Extract MFCCs + deltas
        features = extract_mfcc_features(
            audio, 
            sr,
            n_mfcc=40,
            hop_length=512,
            n_fft=1024
        ) # Shape: (120, time_steps)

        # Stack all features together
        features = np.vstack(features)

        # Normalize
        features = normalize_features(features)
        # Shape: (120, 150) - normalized

        # Pad to fixed length
        features = pad_features(features, max_len=150)
        # Shape: (120, 150)

        # # Add channel dimension for CNN
        # features = features[np.newaxis, ...]
        # # Shape: (1, 120, 150)

        # Save as .npy
        base_name = os.path.basename(file_path).replace(".wav", ".npy")
        out_path = os.path.join(mfcc_plus_deltas_features_dir, base_name)
        np.save(out_path, features)

        metadata.append([out_path, row.emotion, row.dataset, row.actor, row.index, row.sample_rate])
        
    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path","emotion","dataset","actor", "index","sample_rate"])
csv_path = os.path.join(mfcc_plus_deltas_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

Extracting features: 100%|███████████████████████████████████████████████████████| 11318/11318 [01:41<00:00, 111.84it/s]


Features Extraction Completed!


## (MFCCs + Deltas -> Prosidic -> Normalize -> Pad) Feature Extraction

### Make sure dir exist

In [19]:
mfcc_plus_deltas_prosodic_features_metadata_dir = "../data/features/audio_16k/mfcc+deltas_prosodic"
mfcc_plus_deltas_prosodic_features_dir = "../data/features/audio_16k/mfcc+deltas_prosodic/audio_features"
os.makedirs(mfcc_plus_deltas_prosodic_features_metadata_dir, exist_ok=True)
os.makedirs(mfcc_plus_deltas_prosodic_features_dir, exist_ok=True)

### Feature Extraction

In [20]:
metadata = []

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate

        audio, sr = librosa.load(file_path, sr=sample_rate)

        mfccs = extract_mfcc_features(
            audio, 
            sr,
            n_mfcc=40,
            hop_length=512,
            n_fft=1024
        ) # Shape: (120, time_steps)

        prosodic = extract_prosodic_features(audio, sr)

        # Stack all features together
        features = np.vstack([mfccs, prosodic])

        # Normalize
        features = normalize_features(features)
        # Shape: (120, 150) - normalized

        # Pad to fixed length
        features = pad_features(features, max_len=150)
        # Shape: (120, 150)

        # # Add channel dimension for CNN
        # features = features[np.newaxis, ...]
        # # Shape: (1, 120, 150)

        # Save as .npy
        base_name = os.path.basename(file_path).replace(".wav", ".npy")
        out_path = os.path.join(mfcc_plus_deltas_prosodic_features_dir, base_name)
        np.save(out_path, features)

        metadata.append([out_path, file_path, row.emotion, row.dataset, row.actor, row.index, row.sample_rate])    

    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(mfcc_plus_deltas_prosodic_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

Extracting features: 100%|██████████████████████████████████████████████████████| 11318/11318 [1:19:30<00:00,  2.37it/s]

Features Extraction Completed!





## (Data Augmentation -> MFCCs + Deltas -> Prosidic -> Normalize -> Pad) Feature Extraction

### Make sure dir exist

In [4]:
augmented_mfcc_plus_deltas_prosodic_features_metadata_dir = "../data/features/audio_16k/augmented_mfcc+deltas_prosodic"
augmented_mfcc_plus_deltas_prosodic_features_dir = "../data/features/audio_16k/augmented_mfcc+deltas_prosodic/audio_features"
os.makedirs(augmented_mfcc_plus_deltas_prosodic_features_metadata_dir, exist_ok=True)
os.makedirs(augmented_mfcc_plus_deltas_prosodic_features_dir, exist_ok=True)

### Feature Extraction

In [5]:
metadata = []

for row in tqdm(df.itertuples(), total=len(df), desc="Extracting features"):
    try:
        file_path = row.file_path
        sample_rate = row.sample_rate

        augmented_audios = augment_audio(file_path, sample_rate)

        for aug_idx, (aug_audio, aug_name) in enumerate(augmented_audios):
            mfccs = extract_mfcc_features(
                aug_audio, 
                sample_rate,
                n_mfcc=40,
                hop_length=512,
                n_fft=1024
            ) # Shape: (120, time_steps)
    
            prosodic = extract_prosodic_features(aug_audio, sample_rate)
    
            # Stack all features together
            features = np.vstack([mfccs, prosodic])
    
            # Normalize
            features = normalize_features(features)
            # Shape: (120, 150) - normalized
    
            # Pad to fixed length
            features = pad_features(features, max_len=150)
            # Shape: (120, 150)
    
            # # Add channel dimension for CNN
            # features = features[np.newaxis, ...]
            # # Shape: (1, 120, 150)
    
            # Save as .npy
            original_name = os.path.basename(file_path).replace(".wav", "")
            base_name = f"{original_name}_{aug_name}.npy"
            out_path = os.path.join(augmented_mfcc_plus_deltas_prosodic_features_dir, base_name)
            np.save(out_path, features)
    
            metadata.append([out_path, file_path, row.emotion, aug_name, row.dataset, row.actor, row.index, row.sample_rate])    

    except Exception as e: 
        print(f"Error processing {file_path}: {e}") 
        continue

df = pd.DataFrame(metadata, columns=["file_path", "audio_file_path", "emotion", "augmentation", "dataset", "actor", "index", "sample_rate"])
csv_path = os.path.join(augmented_mfcc_plus_deltas_prosodic_features_metadata_dir, 'metadata.csv')

df.to_csv(csv_path, index=False)
df.head()

print("Features Extraction Completed!")

Extracting features: 100%|██████████████████████████████████████████████████████| 11318/11318 [5:24:33<00:00,  1.72s/it]

Features Extraction Completed!



