# 01 — Data Exploration

Explore DEAP and IEMOCAP pre-processed features:
class distributions, feature statistics, sample visualisations.

In [None]:
import sys, os
os.chdir('/content/amers')
sys.path.insert(0, '.')

from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

DRIVE_BASE = Path('/content/drive/MyDrive/AMERS')
DEAP_DIR = DRIVE_BASE / 'data' / 'deap' / 'processed'
IEMOCAP_DIR = DRIVE_BASE / 'data' / 'iemocap' / 'processed'

In [None]:
# Load all DEAP features
from src.data.deap_loader import DEAPLoader
from src.data.label_mapper import LabelMapper

loader = DEAPLoader(processed_dir=str(DEAP_DIR), label_mapper=LabelMapper())
eeg_features, eeg_labels = loader.load_all(flatten=True)
print(f'DEAP: {eeg_features.shape}, labels: {np.bincount(eeg_labels)}')

In [None]:
# Class distribution - DEAP
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
names = ['Happy', 'Sad', 'Angry', 'Neutral']

counts = np.bincount(eeg_labels, minlength=4)
axes[0].bar(names, counts, color=['#2ecc71','#3498db','#e74c3c','#95a5a6'])
axes[0].set_title('DEAP — Class Distribution')
axes[0].set_ylabel('Samples')

# Feature statistics
axes[1].boxplot([eeg_features[eeg_labels==c].mean(axis=1) for c in range(4)], labels=names)
axes[1].set_title('DEAP — Feature Mean per Class')
plt.tight_layout()
plt.show()

In [None]:
# Load IEMOCAP
from src.data.iemocap_loader import IEMOCAPLoader

sp_loader = IEMOCAPLoader(processed_dir=str(IEMOCAP_DIR), label_mapper=LabelMapper())
sp_features, sp_labels = sp_loader.load_all()
print(f'IEMOCAP: {sp_features.shape}, labels: {np.bincount(sp_labels)}')

# Class distribution - IEMOCAP
counts_sp = np.bincount(sp_labels, minlength=4)
plt.figure(figsize=(6, 4))
plt.bar(names, counts_sp, color=['#2ecc71','#3498db','#e74c3c','#95a5a6'])
plt.title('IEMOCAP — Class Distribution')
plt.ylabel('Samples')
plt.show()

In [None]:
# Feature correlation heatmap (first 32 DEAP features)
plt.figure(figsize=(10, 8))
sns.heatmap(np.corrcoef(eeg_features[:, :32].T), cmap='RdBu_r', center=0)
plt.title('DEAP Feature Correlation (first 32 dims)')
plt.show()