In [1]:
import os
import cv2

def verify_dataset_completeness():
    """Verify that all expression folders have 5000 images"""
    dataset_dir = 'synthetic_dataset'  # Change this to your dataset path
    expressions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
    target_count = 5000
    
    print("üîç VERIFYING DATASET COMPLETENESS")
    print("=" * 50)
    
    all_complete = True
    
    for expression in expressions:
        expr_dir = os.path.join(dataset_dir, expression)
        
        if not os.path.exists(expr_dir):
            print(f"‚ùå Directory missing: {expr_dir}")
            all_complete = False
            continue
        
        # Count images
        image_files = [f for f in os.listdir(expr_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
        count = len(image_files)
        
        status = "‚úÖ" if count >= target_count else "‚ùå"
        print(f"   {status} {expression}: {count}/{target_count} images")
        
        if count < target_count:
            all_complete = False
            
        # Check image quality (sample check)
        if count > 0:
            sample_image = os.path.join(expr_dir, image_files[0])
            img = cv2.imread(sample_image)
            if img is not None:
                print(f"      Sample: {img.shape} - ‚úì Readable")
            else:
                print(f"      Sample: ‚ùå Corrupt")
    
    if all_complete:
        print(f"\nüéâ DATASET COMPLETE! All expressions have ‚â•{target_count} images")
    else:
        print(f"\n‚ö†Ô∏è  Dataset incomplete. Some expressions need more images.")

def main():
    verify_dataset_completeness()

if __name__ == "__main__":
    main()

üîç VERIFYING DATASET COMPLETENESS
   ‚úÖ angry: 5000/5000 images
      Sample: (48, 48, 3) - ‚úì Readable
   ‚úÖ disgust: 5000/5000 images
      Sample: (48, 48, 3) - ‚úì Readable
   ‚úÖ fear: 5000/5000 images
      Sample: (48, 48, 3) - ‚úì Readable
   ‚úÖ happy: 5000/5000 images
      Sample: (48, 48, 3) - ‚úì Readable
   ‚úÖ neutral: 5000/5000 images
      Sample: (48, 48, 3) - ‚úì Readable
   ‚úÖ sad: 5000/5000 images
      Sample: (48, 48, 3) - ‚úì Readable
   ‚úÖ surprise: 5000/5000 images
      Sample: (48, 48, 3) - ‚úì Readable

üéâ DATASET COMPLETE! All expressions have ‚â•5000 images
