# Speech Emotion Recognition - Exploratory Data Analysis

This notebook explores the CREMA-D dataset and analyzes audio features for emotion recognition.

## Table of Contents
1. Dataset Overview
2. Audio Waveform Analysis
3. Spectrogram Visualization
4. Feature Extraction
5. Emotion Distribution
6. Statistical Analysis



In [None]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent / 'src'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
import glob
from IPython.display import Audio
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)



## 1. Dataset Overview



In [None]:
from dataset import CremaDDataset
from config import EMOTION_NAMES, AUDIO_WAV_DIR

# Create dataset instance
dataset = CremaDDataset()

# Create and load metadata
df = dataset.create_metadata_csv()

print(f"Total samples: {len(df)}")
print(f"\nDataset shape: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")
print(f"\nFirst few rows:")
df.head()


In [None]:
# Emotion distribution
emotion_dist = dataset.get_emotion_distribution()
print("\nEmotion Distribution:")
print(emotion_dist)

# Visualize
plt.figure(figsize=(12, 6))
plt.bar(emotion_dist['emotion'], emotion_dist['count'], color='skyblue', edgecolor='navy')
plt.xlabel('Emotion', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title('CREMA-D Dataset - Emotion Distribution', fontsize=14, fontweight='bold')
for i, (emotion, count) in enumerate(zip(emotion_dist['emotion'], emotion_dist['count'])):
    plt.text(i, count + 20, str(count), ha='center', va='bottom', fontsize=11)
plt.tight_layout()
plt.show()



## 2. Audio Waveform Analysis



In [None]:
# Sample one file per emotion
sample_files = {}
for emotion in EMOTION_NAMES:
    samples = df[df['emotion'] == emotion].head(1)
    if len(samples) > 0:
        sample_files[emotion] = samples['file_path'].values[0]

# Plot waveforms
fig, axes = plt.subplots(3, 2, figsize=(15, 12))
axes = axes.flatten()

for i, (emotion, file_path) in enumerate(sample_files.items()):
    # Load audio
    y, sr = librosa.load(file_path, sr=22050)
    
    # Plot
    axes[i].plot(np.linspace(0, len(y)/sr, len(y)), y, color='blue', alpha=0.7)
    axes[i].set_title(f'{emotion} - {Path(file_path).name}', fontsize=12, fontweight='bold')
    axes[i].set_xlabel('Time (s)')
    axes[i].set_ylabel('Amplitude')
    axes[i].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()



## 3. Mel Spectrogram Visualization



In [None]:
# Plot Mel spectrograms
fig, axes = plt.subplots(3, 2, figsize=(15, 14))
axes = axes.flatten()

for i, (emotion, file_path) in enumerate(sample_files.items()):
    # Load audio
    y, sr = librosa.load(file_path, sr=22050)
    
    # Compute Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, n_fft=2048, hop_length=512)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    
    # Plot
    img = librosa.display.specshow(mel_spec_db, sr=sr, hop_length=512, x_axis='time', y_axis='mel', 
                                    ax=axes[i], cmap='viridis')
    axes[i].set_title(f'{emotion} - Mel Spectrogram', fontsize=12, fontweight='bold')
    fig.colorbar(img, ax=axes[i], format='%+2.0f dB')

plt.tight_layout()
plt.show()



## 4. Audio Feature Statistics



In [None]:
# Analyze audio statistics for each emotion
stats = []

for emotion in EMOTION_NAMES:
    emotion_files = df[df['emotion'] == emotion]['file_path'].tolist()
    
    durations = []
    energies = []
    zero_crossing_rates = []
    
    # Sample 50 files per emotion
    for file_path in emotion_files[:50]:
        try:
            y, sr = librosa.load(file_path, sr=22050)
            
            # Duration
            duration = len(y) / sr
            durations.append(duration)
            
            # Energy
            energy = np.sum(y**2) / len(y)
            energies.append(energy)
            
            # Zero crossing rate
            zcr = np.mean(librosa.feature.zero_crossing_rate(y))
            zero_crossing_rates.append(zcr)
            
        except:
            pass
    
    stats.append({
        'emotion': emotion,
        'avg_duration': np.mean(durations),
        'avg_energy': np.mean(energies),
        'avg_zcr': np.mean(zero_crossing_rates),
        'samples': len(durations)
    })

stats_df = pd.DataFrame(stats)
print("Audio Feature Statistics by Emotion:")
print(stats_df)



In [None]:
# Visualize statistics
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Duration
axes[0].bar(stats_df['emotion'], stats_df['avg_duration'], color='lightcoral', edgecolor='darkred')
axes[0].set_title('Average Audio Duration by Emotion', fontweight='bold')
axes[0].set_xlabel('Emotion')
axes[0].set_ylabel('Duration (seconds)')

# Energy
axes[1].bar(stats_df['emotion'], stats_df['avg_energy'], color='lightgreen', edgecolor='darkgreen')
axes[1].set_title('Average Energy by Emotion', fontweight='bold')
axes[1].set_xlabel('Emotion')
axes[1].set_ylabel('Energy')

# Zero Crossing Rate
axes[2].bar(stats_df['emotion'], stats_df['avg_zcr'], color='lightblue', edgecolor='darkblue')
axes[2].set_title('Average Zero Crossing Rate by Emotion', fontweight='bold')
axes[2].set_xlabel('Emotion')
axes[2].set_ylabel('ZCR')

plt.tight_layout()
plt.show()



## 5. Interactive Audio Playback

Listen to sample audio files for each emotion:



In [None]:
for emotion, file_path in sample_files.items():
    print(f"\n{'='*60}")
    print(f"Emotion: {emotion}")
    print(f"File: {Path(file_path).name}")
    print(f"{'='*60}")
    display(Audio(file_path))



## 6. Conclusion

This exploratory analysis reveals:

1. **Dataset Balance**: The dataset is relatively balanced across emotions, with NEU having slightly fewer samples
2. **Audio Characteristics**: Different emotions show distinct patterns in energy, duration, and spectral features
3. **Spectrograms**: Mel spectrograms clearly show different patterns for different emotions
4. **Feature Variability**: Energy and zero crossing rates vary significantly across emotions

These insights will guide the model development and feature engineering process.

