In [12]:
import os
import glob
import pandas as pd
import cv2
import soundfile as sf

# üÜï YOUR EXACT MACBOOK PATH
RAVDESS_ROOT = r"/Users/anzheladavityan/Desktop/RAVDESS"

video_dirs = [
    os.path.join(RAVDESS_ROOT, 'song'),
    os.path.join(RAVDESS_ROOT, 'speech')
]
audio_dirs = [
    os.path.join(RAVDESS_ROOT, 'Audio_Song_Actors_01-24'),
    os.path.join(RAVDESS_ROOT, 'Audio_Speech_Actors_01-24')
]

def parse_ravdess_filename(filename):
    """Parse RAVDESS filename to extract all identifiers."""
    base = os.path.splitext(filename)[0]
    parts = base.split('-')
    if len(parts) != 7:
        return None
    return {
        'Modality': parts[0],
        'Vocal_channel': parts[1],
        'Emotion': parts[2],
        'Emotional_intensity': parts[3],
        'Statement': parts[4],
        'Repetition': parts[5],
        'Actor': parts[6]
    }

# Scan files and count ACTUAL frames
print("üîç Scanning files...")
mp4_files = []
wav_files = []
video_frame_count = 0  # üÜï ACTUAL frame counter

for base_dir in video_dirs:
    pattern = os.path.join(base_dir, 'Actor_*', '*.mp4')
    found = glob.glob(pattern)
    mp4_files.extend(found)
    print(f"  Videos {os.path.basename(base_dir)}: {len(found)} MP4s")

for base_dir in audio_dirs:
    pattern = os.path.join(base_dir, 'Actor_*', '*.wav')
    found = glob.glob(pattern)
    wav_files.extend(found)
    print(f"  Audio  {os.path.basename(base_dir)}: {len(found)} WAVs")

print(f"\nüìä ACTUAL COUNTS:")
print(f"üìπ Video files: {len(mp4_files)}")
print(f"üîä Audio files: {len(wav_files)}")

output_csv = os.path.join(RAVDESS_ROOT, 'rv_data_manifest.csv')
if os.path.exists(output_csv):
    os.remove(output_csv)

total_rows = 0
first_write = True
global_row_id = 1

print("\nüîÑ Processing...")

# üé• PROCESS VIDEOS: Count ACTUAL frames
print("Processing videos (frame-level)...")
for full_path in mp4_files:
    filename = os.path.basename(full_path)
    parsed = parse_ravdess_filename(filename)
    if not parsed:
        continue
    
    # Get ACTUAL frame count
    cap = cv2.VideoCapture(full_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_frame_count += total_frames  # üÜï ACTUAL COUNT
    fps = cap.get(cv2.CAP_PROP_FPS)
    duration = total_frames / fps if fps > 0 else 0
    cap.release()
    
    # 1 row per frame
    video_rows = []
    for frame_idx in range(total_frames):
        row = {
            'ID': global_row_id,
            'file_path': full_path,
            'filename': filename,
            'frame_idx': frame_idx,
            'file_type': 'video',
            'Modality': parsed['Modality'],
            'Vocal_channel': parsed['Vocal_channel'],
            'Emotion': parsed['Emotion'],
            'Emotional_intensity': parsed['Emotional_intensity'],
            'Statement': parsed['Statement'],
            'Repetition': parsed['Repetition'],
            'Actor': parsed['Actor'],
            'total_frames': total_frames,
            'fps': round(fps, 2),
            'duration_seconds': round(duration, 2)
        }
        video_rows.append(row)
        global_row_id += 1
    
    video_df = pd.DataFrame(video_rows)
    video_df.to_csv(output_csv, mode='a', header=first_write, 
                   chunksize=10000, index=False)
    total_rows += len(video_rows)
    first_write = False

print(f"‚úÖ Videos done: {video_frame_count:,} ACTUAL frame rows")

# üîä PROCESS AUDIO: 1 row per file
print("Processing audio files (1 row each)...")
for full_path in wav_files:
    filename = os.path.basename(full_path)
    parsed = parse_ravdess_filename(filename)
    if not parsed:
        continue
    
    audio_info = sf.info(full_path)
    row = {
        'ID': global_row_id,
        'file_path': full_path,
        'filename': filename,
        'frame_idx': -1,
        'file_type': 'audio',
        'Modality': parsed['Modality'],
        'Vocal_channel': parsed['Vocal_channel'],
        'Emotion': parsed['Emotion'],
        'Emotional_intensity': parsed['Emotional_intensity'],
        'Statement': parsed['Statement'],
        'Repetition': parsed['Repetition'],
        'Actor': parsed['Actor'],
        'total_samples': int(audio_info.samplerate * audio_info.duration),
        'sample_rate': audio_info.samplerate,
        'duration_seconds': round(audio_info.duration, 2)
    }
    global_row_id += 1
    
    audio_df = pd.DataFrame([row])
    audio_df.to_csv(output_csv, mode='a', header=False, 
                   chunksize=10000, index=False)
    total_rows += 1

print(f"\n‚úÖ COMPLETE!")
print(f"üìä EXACT COUNTS:")
print(f"üìπ Video files: {len(mp4_files)} ‚Üí {video_frame_count:,} ACTUAL frame rows")
print(f"üîä Audio files: {len(wav_files)} ‚Üí {len(wav_files):,} metadata rows") 
print(f"üìÑ Total rows: {total_rows:,}")
print(f"üÜî ID range: 1 ‚Üí {global_row_id-1}")
print(f"üìÅ Saved: {output_csv}")


üîç Scanning files...
  Videos song: 2024 MP4s
  Videos speech: 2880 MP4s
  Audio  Audio_Song_Actors_01-24: 1012 WAVs
  Audio  Audio_Speech_Actors_01-24: 1440 WAVs

üìä ACTUAL COUNTS:
üìπ Video files: 4904
üîä Audio files: 2452

üîÑ Processing...
Processing videos (frame-level)...
‚úÖ Videos done: 601,379 ACTUAL frame rows
Processing audio files (1 row each)...

‚úÖ COMPLETE!
üìä EXACT COUNTS:
üìπ Video files: 4904 ‚Üí 601,379 ACTUAL frame rows
üîä Audio files: 2452 ‚Üí 2,452 metadata rows
üìÑ Total rows: 603,831
üÜî ID range: 1 ‚Üí 603831
üìÅ Saved: /Users/anzheladavityan/Desktop/RAVDESS/rv_data_manifest.csv


In [13]:
df.head()

Unnamed: 0,ID,file_path,filename,frame_idx,file_type,Modality,Vocal_channel,Emotion,Emotional_intensity,Statement,Repetition,Actor,total_frames,fps,duration_seconds
0,1,/Users/anzheladavityan/Desktop/RAVDESS/song/Ac...,01-02-03-02-02-01-16.mp4,0,video,1,2,3,2,2,1,16,131,29.97,4.37
1,2,/Users/anzheladavityan/Desktop/RAVDESS/song/Ac...,01-02-03-02-02-01-16.mp4,1,video,1,2,3,2,2,1,16,131,29.97,4.37
2,3,/Users/anzheladavityan/Desktop/RAVDESS/song/Ac...,01-02-03-02-02-01-16.mp4,2,video,1,2,3,2,2,1,16,131,29.97,4.37
3,4,/Users/anzheladavityan/Desktop/RAVDESS/song/Ac...,01-02-03-02-02-01-16.mp4,3,video,1,2,3,2,2,1,16,131,29.97,4.37
4,5,/Users/anzheladavityan/Desktop/RAVDESS/song/Ac...,01-02-03-02-02-01-16.mp4,4,video,1,2,3,2,2,1,16,131,29.97,4.37


In [15]:
import pandas as pd
import numpy as np

# Load CSV & check columns first
df = pd.read_csv('/Users/anzheladavityan/Desktop/RAVDESS/rv_data_manifest.csv')
print("üìã COLUMNS FOUND:", list(df.columns))

print("üîç RAVDESS SANITY CHECK")
print("=" * 60)

# 1. BASIC STRUCTURE
print(f"\nüìä BASIC STATS")
print(f"   Total rows: {len(df):,}")
print(f"   Video rows: {len(df[df['file_type']=='video']):,}")
print(f"   Audio rows: {len(df[df['file_type']=='audio']):,}")
print(f"   Unique files: {df['filename'].nunique():,}")

# 2. RAVDESS EXPECTATIONS
print(f"\n‚úÖ EXPECTED vs ACTUAL")
print(f"   Actors: {df['Actor'].nunique()}/24 | {sorted(df['Actor'].unique())}")
print(f"   Emotions: {df['Emotion'].nunique()}/8 | {sorted(df['Emotion'].unique())}")

# 3. VIDEO SANITY 
video_df = df[df['file_type']=='video']
print(f"\nüìπ VIDEO CHECK")
print(f"   Files: {video_df['filename'].nunique():,}")

# Safe column access
if 'total_frames' in df.columns:
    print(f"   Frame range: {video_df['total_frames'].min()}-{video_df['total_frames'].max()}")
    print(f"   Avg frames: {video_df['total_frames'].mean():.0f}")
if 'fps' in df.columns:
    print(f"   FPS range: {video_df['fps'].min():.1f}-{video_df['fps'].max():.1f}")
if 'duration_seconds' in df.columns:
    print(f"   Duration: {video_df['duration_seconds'].mean():.1f}s avg")

# 4. AUDIO SANITY  
audio_df = df[df['file_type']=='audio']
print(f"\nüîä AUDIO CHECK")
print(f"   Files: {audio_df['filename'].nunique():,}")
print(f"   frame_idx all -1: {(audio_df['frame_idx'] == -1).all() if len(audio_df)>0 else 'No audio'}")

# Safe audio columns
if 'sample_rate' in df.columns and len(audio_df)>0:
    print(f"   Sample rates: {sorted(audio_df['sample_rate'].unique())}")
if 'duration_seconds' in df.columns and len(audio_df)>0:
    print(f"   Duration: {audio_df['duration_seconds'].mean():.1f}s avg")

# 5. ID INTEGRITY
print(f"\nüÜî ID SANITY")
print(f"   Range: {df['ID'].min()} ‚Üí {df['ID'].max()}")
print(f"   Unique: {df['ID'].is_unique}")
print(f"   Sequential: {df['ID'].is_monotonic_increasing}")

# 6. RAVDESS CODE VALIDATION
print(f"\nüî¢ RAVDESS CODE CHECK")
valid_modality = df['Modality'].isin([1,2,3]).all()
valid_vocal = df['Vocal_channel'].isin([1,2]).all()
valid_emotion = df['Emotion'].between(1,8).all()
valid_actor = df['Actor'].between(1,24).all()

print(f"   Modality ‚úì: {valid_modality}")
print(f"   Vocal ch ‚úì: {valid_vocal}") 
print(f"   Emotion ‚úì: {valid_emotion}")
print(f"   Actor ‚úì: {valid_actor}")

# 7. FINAL VERDICT
print(f"\n" + "="*60)
print("üéØ SANITY CHECK RESULTS")

status = [
    ("Actors (24)", df['Actor'].nunique() == 24),
    ("Emotions (8+)", df['Emotion'].nunique() >= 8),
    ("Valid codes", valid_emotion and valid_modality and valid_vocal),
    ("Sequential IDs", df['ID'].is_monotonic_increasing),
    ("Unique IDs", df['ID'].is_unique),
    ("Video frames >50", ('total_frames' in df.columns) and (video_df['total_frames'] > 50).all()),
    ("Audio frame_idx=-1", len(audio_df)==0 or (audio_df['frame_idx'] == -1).all()),
    ("Reasonable files", 1000 < df['filename'].nunique() < 8000)
]

passed = sum(1 for name, ok in status if ok)
print(f"‚úÖ {passed}/8 CHECKS PASSED")

for name, ok in status:
    mark = "‚úÖ PASS" if ok else "‚ùå FAIL"
    print(f"   {mark:<20} {name}")

if passed >= 7:
    print("\nüéâ PERFECT! Dataset is production-ready!")
elif passed >= 5:
    print("\n‚ö†Ô∏è  GOOD - Minor issues, still usable") 
else:
    print("\n‚ùå FAILED - Regenerate dataset")


üìã COLUMNS FOUND: ['ID', 'file_path', 'filename', 'frame_idx', 'file_type', 'Modality', 'Vocal_channel', 'Emotion', 'Emotional_intensity', 'Statement', 'Repetition', 'Actor', 'total_frames', 'fps', 'duration_seconds']
üîç RAVDESS SANITY CHECK

üìä BASIC STATS
   Total rows: 603,831
   Video rows: 601,379
   Audio rows: 2,452
   Unique files: 7,356

‚úÖ EXPECTED vs ACTUAL
   Actors: 24/24 | [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24)]
   Emotions: 8/8 | [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8)]

üìπ VIDEO CHECK
   Files: 4,904
   Frame range: 88-191
   Avg frames: 125
   FPS range: 30.0-30.0
   Duration: 4.2s avg

üîä AUDIO CHECK
   Files: 2