In [29]:
# Cell 1: Setup and Imports (Same as before)
import numpy as np
import os
import glob
from tqdm import tqdm
import gc

print("🚀 CPH Dataset Processing - FIXED VERSION")
print("=" * 50)

# Check available data
print("📁 Checking dataset files...")
base_path = '/kaggle/input'

# Find your dataset directory
dataset_path = None
for root, dirs, files in os.walk(base_path):
    if any(f.endswith('.yuv') for f in files):
        dataset_path = root
        break

print(f"📁 Dataset path: {dataset_path}")
yuv_count = len([f for f in os.listdir(dataset_path) if f.endswith('.yuv')])
dat_count = len([f for f in os.listdir(dataset_path) if f.endswith('.dat')])
print(f"✅ Found {yuv_count} YUV files and {dat_count} DAT files")

🚀 CPH Dataset Processing - FIXED VERSION
📁 Checking dataset files...
📁 Dataset path: /kaggle/input/cph-intra-dataset
✅ Found 12 YUV files and 96 DAT files


In [30]:
# Cell 2: Updated CPH Processor - NO LIMITS VERSION
class FullCPHProcessor:
    def __init__(self):
        self.QP_VALUES = [22, 27, 32, 37]
        self.RESOLUTIONS = ['768x512', '1536x1024', '2880x1920', '4928x3264']
        self.input_path = dataset_path
        self.output_path = '/kaggle/working'
        
        # NO FRAME LIMITS - Process all available frames
        self.frame_limits = None
        
    def get_total_frames_in_yuv(self, filepath, width, height):
        """Calculate total number of frames available in YUV file"""
        try:
            file_size = os.path.getsize(filepath)
            frame_size = int(width * height * 1.5)  # YUV420 format
            total_frames = file_size // frame_size
            print(f"  📊 YUV file size: {file_size:,} bytes")
            print(f"  📊 Frame size: {frame_size:,} bytes")
            print(f"  📊 Total frames available: {total_frames}")
            return total_frames
        except Exception as e:
            print(f"  ❌ Error calculating frames: {e}")
            return 0
        
    def read_yuv_frames(self, filepath, width, height, max_frames=None):
        """Read YUV frames - ALL frames if max_frames is None"""
        frame_size = int(width * height * 1.5)
        y_size = width * height
        
        # If max_frames not specified, get all available frames
        if max_frames is None:
            max_frames = self.get_total_frames_in_yuv(filepath, width, height)
        
        frames = []
        with open(filepath, 'rb') as f:
            for i in tqdm(range(max_frames), desc=f"Reading ALL {max_frames} frames"):
                f.seek(i * frame_size)
                y_data = f.read(y_size)
                if len(y_data) != y_size:
                    print(f"  ⚠️ Reached end of file at frame {i}")
                    break
                frames.append(np.frombuffer(y_data, dtype=np.uint8).reshape(height, width))
        
        print(f"  ✅ Successfully read {len(frames)} frames")
        return np.array(frames)
    
    def create_hierarchical_labels_from_depth(self, cu_depth_data, frame_idx, row, col, width, height):
        """Create hierarchical labels from CU depth information"""
        # Calculate CTU position
        ctu_cols = width // 64
        ctu_rows = height // 64
        ctus_per_frame = ctu_cols * ctu_rows
        
        # Base CTU index
        ctu_idx = frame_idx * ctus_per_frame + row * ctu_cols + col
        
        # Initialize 16-element label array for this CTU
        labels = np.zeros(16, dtype=np.uint8)
        
        # Safety check for CU depth data bounds
        if ctu_idx >= len(cu_depth_data):
            return labels
            
        # 64x64 level decision (1 decision)
        depth_64 = cu_depth_data[ctu_idx] if ctu_idx < len(cu_depth_data) else 0
        labels[0] = 1 if depth_64 > 0 else 0
        
        # 32x32 level decisions (4 decisions)
        for quad in range(4):
            sub_idx = ctu_idx + quad * max(1, len(cu_depth_data) // (ctus_per_frame * 4))
            sub_idx = min(sub_idx, len(cu_depth_data) - 1)
            depth_32 = cu_depth_data[sub_idx]
            labels[1 + quad] = 1 if depth_32 > 1 else 0
        
        # 16x16 level decisions (12 decisions, positions 5-16)
        for sub in range(12):
            sub_idx = ctu_idx + sub * max(1, len(cu_depth_data) // (ctus_per_frame * 16))
            sub_idx = min(sub_idx, len(cu_depth_data) - 1)
            depth_16 = cu_depth_data[sub_idx]
            labels[4 + sub] = 1 if depth_16 > 2 else 0
            
        return labels
    
    def extract_patches_fixed(self, frames, cu_depths, width, height):
        """Extract patches with proper hierarchical labels - ALL frames"""
        ctu_cols = width // 64
        ctu_rows = height // 64
        ctus_per_frame = ctu_cols * ctu_rows
        
        all_patches = []
        all_labels = []
        
        # Process ALL available frames
        max_frames = min(len(frames), max(1, len(cu_depths) // ctus_per_frame))
        print(f"  📊 Processing {max_frames} frames ({len(frames)} YUV frames, {len(cu_depths)} CU depth values)")
        
        for frame_idx in tqdm(range(max_frames), desc="Extracting ALL patches"):
            frame = frames[frame_idx]
            for row in range(ctu_rows):
                for col in range(ctu_cols):
                    # Extract 64x64 patch
                    patch = frame[row*64:(row+1)*64, col*64:(col+1)*64]
                    
                    # Create hierarchical labels for this patch
                    hierarchical_labels = self.create_hierarchical_labels_from_depth(
                        cu_depths, frame_idx, row, col, width, height)
                    
                    all_patches.append(patch)
                    all_labels.append(hierarchical_labels)
        
        print(f"  ✅ Total patches extracted: {len(all_patches)}")
        return np.array(all_patches), np.array(all_labels)

# Initialize the FULL processor
processor = FullCPHProcessor()

In [31]:
# Cell 3: Updated Processing Function - FULL DATASET VERSION
def process_complete_dataset(dataset_type):
    """Process COMPLETE dataset - ALL available frames"""
    print(f"\n🔄 Processing COMPLETE {dataset_type} Dataset - ALL FRAMES")
    print("=" * 60)
    
    all_samples = []
    total_expected_samples = 0
    
    for resolution in processor.RESOLUTIONS:
        width, height = map(int, resolution.split('x'))
        
        # Find YUV file
        yuv_file = None
        for root, dirs, files in os.walk(processor.input_path):
            for file in files:
                if f'Intra{dataset_type}_{resolution}.yuv' in file:
                    yuv_file = os.path.join(root, file)
                    break
        
        if not yuv_file:
            print(f"❌ YUV file not found for {resolution}")
            continue
            
        print(f"\n--- {resolution} (ALL FRAMES) ---")
        
        # Calculate expected samples for this resolution
        total_frames = processor.get_total_frames_in_yuv(yuv_file, width, height)
        ctus_per_frame = (width // 64) * (height // 64)
        samples_per_qp = total_frames * ctus_per_frame
        total_expected_samples += samples_per_qp * len(processor.QP_VALUES)
        
        print(f"  📊 Expected samples for this resolution: {samples_per_qp * len(processor.QP_VALUES):,}")
        
        for qp in processor.QP_VALUES:
            # Find CU depth file
            cudepth_files = glob.glob(f'{processor.input_path}/*AI_Intra{dataset_type}_{resolution}_qp{qp}*CUDepth.dat')
            if not cudepth_files:
                print(f"❌ CU depth file not found for QP{qp}")
                continue
                
            try:
                print(f"🔄 Processing QP{qp} - ALL FRAMES...")
                
                # Read ALL frames (no limit)
                frames = processor.read_yuv_frames(yuv_file, width, height, max_frames=None)
                cu_depths = np.frombuffer(open(cudepth_files[0], 'rb').read(), dtype=np.uint8)
                
                print(f"  📊 Loaded: {len(frames)} frames, {len(cu_depths)} CU depth values")
                
                # Extract ALL patches
                patches, labels = processor.extract_patches_fixed(frames, cu_depths, width, height)
                
                print(f"  📊 Extracted: {len(patches)} patches")
                
                # Create samples with correct structure
                for patch, patch_labels in zip(patches, labels):
                    sample = np.zeros(4992, dtype=np.uint8)
                    
                    # Image data (4096 bytes)
                    sample[:4096] = patch.flatten()
                    
                    # QP value stored in padding area
                    sample[4096] = qp
                    
                    # Labels section: 832 bytes starting at position 4160
                    for qp_idx in range(52):
                        start_pos = 4160 + qp_idx * 16
                        end_pos = start_pos + 16
                        
                        if qp_idx == qp:
                            sample[start_pos:end_pos] = patch_labels
                        else:
                            sample[start_pos:end_pos] = np.zeros(16, dtype=np.uint8)
                    
                    all_samples.append(sample)
                
                print(f"  ✅ QP{qp}: {len(patches)} samples added (COMPLETE)")
                
                # Clean memory
                del frames, cu_depths, patches, labels
                gc.collect()
                
            except Exception as e:
                print(f"  ❌ Error processing QP{qp}: {e}")
                import traceback
                traceback.print_exc()
    
    if all_samples:
        all_samples = np.array(all_samples)
        np.random.shuffle(all_samples)
        
        # Save final dataset
        filename = f'AI_{dataset_type}_COMPLETE_{len(all_samples)}.dat'
        filepath = os.path.join(processor.output_path, filename)
        all_samples.tofile(filepath)
        
        size_mb = os.path.getsize(filepath) / (1024*1024)
        size_gb = size_mb / 1024
        
        print(f"\n✅ COMPLETE DATASET SAVED: {filename}")
        print(f"📊 {len(all_samples):,} samples (vs {total_expected_samples:,} expected)")
        print(f"📊 {size_mb:.1f} MB ({size_gb:.2f} GB)")
        print(f"🔍 Sample size: {all_samples.shape[1]} bytes (expected: 4992)")
        
        # Verify sample structure
        sample = all_samples[0]
        print(f"🔍 Sample verification:")
        print(f"   Image data range: {sample[:4096].min()}-{sample[:4096].max()}")
        print(f"   QP value: {sample[4096]}")
        print(f"   Labels range: {sample[4160:].min()}-{sample[4160:].max()}")
        
        return filepath
    
    return None


In [32]:
# Cell 4: Process COMPLETE datasets
print("🚀 PROCESSING COMPLETE DATASETS - ALL FRAMES")
print("⚠️ WARNING: This will take much longer and generate much larger files!")
print("=" * 60)

# Process each dataset completely
datasets_to_process = ['Test', 'Valid']  # Start with Test first to check

for dataset_type in datasets_to_process:
    print(f"\n🎯 Processing COMPLETE {dataset_type} Dataset")
    result_file = process_complete_dataset(dataset_type)
    
    if result_file:
        print(f"✅ COMPLETE {dataset_type} dataset saved: {os.path.basename(result_file)}")
    else:
        print(f"❌ COMPLETE {dataset_type} dataset failed")
    
    print(f"🔋 Memory cleanup...")
    gc.collect()

🚀 PROCESSING COMPLETE DATASETS - ALL FRAMES

🎯 Processing COMPLETE Test Dataset

🔄 Processing COMPLETE Test Dataset - ALL FRAMES

--- 768x512 (ALL FRAMES) ---
  📊 YUV file size: 29,491,200 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 50
  📊 Expected samples for this resolution: 19,200
🔄 Processing QP22 - ALL FRAMES...
  📊 YUV file size: 29,491,200 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 4847.45it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 76800 CU depth values
  📊 Processing 50 frames (50 YUV frames, 76800 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:00<00:00, 300.12it/s]

  ✅ Total patches extracted: 4800





  📊 Extracted: 4800 patches
  ✅ QP22: 4800 samples added (COMPLETE)
🔄 Processing QP27 - ALL FRAMES...
  📊 YUV file size: 29,491,200 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 6220.60it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 76800 CU depth values
  📊 Processing 50 frames (50 YUV frames, 76800 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:00<00:00, 296.65it/s]

  ✅ Total patches extracted: 4800
  📊 Extracted: 4800 patches





  ✅ QP27: 4800 samples added (COMPLETE)
🔄 Processing QP32 - ALL FRAMES...
  📊 YUV file size: 29,491,200 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 6183.55it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 76800 CU depth values
  📊 Processing 50 frames (50 YUV frames, 76800 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:00<00:00, 293.39it/s]

  ✅ Total patches extracted: 4800
  📊 Extracted: 4800 patches





  ✅ QP32: 4800 samples added (COMPLETE)
🔄 Processing QP37 - ALL FRAMES...
  📊 YUV file size: 29,491,200 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 5780.78it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 76800 CU depth values
  📊 Processing 50 frames (50 YUV frames, 76800 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:00<00:00, 294.18it/s]

  ✅ Total patches extracted: 4800
  📊 Extracted: 4800 patches





  ✅ QP37: 4800 samples added (COMPLETE)

--- 1536x1024 (ALL FRAMES) ---
  📊 YUV file size: 117,964,800 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 50
  📊 Expected samples for this resolution: 76,800
🔄 Processing QP22 - ALL FRAMES...
  📊 YUV file size: 117,964,800 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 1918.83it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 307200 CU depth values
  📊 Processing 50 frames (50 YUV frames, 307200 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:00<00:00, 66.14it/s]


  ✅ Total patches extracted: 19200
  📊 Extracted: 19200 patches
  ✅ QP22: 19200 samples added (COMPLETE)
🔄 Processing QP27 - ALL FRAMES...
  📊 YUV file size: 117,964,800 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 2392.83it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 307200 CU depth values
  📊 Processing 50 frames (50 YUV frames, 307200 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:00<00:00, 72.75it/s]


  ✅ Total patches extracted: 19200
  📊 Extracted: 19200 patches
  ✅ QP27: 19200 samples added (COMPLETE)
🔄 Processing QP32 - ALL FRAMES...
  📊 YUV file size: 117,964,800 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 2390.71it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 307200 CU depth values
  📊 Processing 50 frames (50 YUV frames, 307200 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:00<00:00, 74.83it/s]


  ✅ Total patches extracted: 19200
  📊 Extracted: 19200 patches
  ✅ QP32: 19200 samples added (COMPLETE)
🔄 Processing QP37 - ALL FRAMES...
  📊 YUV file size: 117,964,800 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 2428.78it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 307200 CU depth values
  📊 Processing 50 frames (50 YUV frames, 307200 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:00<00:00, 74.86it/s]


  ✅ Total patches extracted: 19200
  📊 Extracted: 19200 patches
  ✅ QP37: 19200 samples added (COMPLETE)

--- 2880x1920 (ALL FRAMES) ---
  📊 YUV file size: 414,720,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 50
  📊 Expected samples for this resolution: 270,000
🔄 Processing QP22 - ALL FRAMES...
  📊 YUV file size: 414,720,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 526.30it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 1080000 CU depth values
  📊 Processing 50 frames (50 YUV frames, 1080000 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:02<00:00, 21.20it/s]


  ✅ Total patches extracted: 67500
  📊 Extracted: 67500 patches
  ✅ QP22: 67500 samples added (COMPLETE)
🔄 Processing QP27 - ALL FRAMES...
  📊 YUV file size: 414,720,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 732.20it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 1080000 CU depth values
  📊 Processing 50 frames (50 YUV frames, 1080000 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:02<00:00, 20.95it/s]


  ✅ Total patches extracted: 67500
  📊 Extracted: 67500 patches
  ✅ QP27: 67500 samples added (COMPLETE)
🔄 Processing QP32 - ALL FRAMES...
  📊 YUV file size: 414,720,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 710.72it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 1080000 CU depth values
  📊 Processing 50 frames (50 YUV frames, 1080000 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:02<00:00, 21.30it/s]


  ✅ Total patches extracted: 67500
  📊 Extracted: 67500 patches
  ✅ QP32: 67500 samples added (COMPLETE)
🔄 Processing QP37 - ALL FRAMES...
  📊 YUV file size: 414,720,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 744.72it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 1080000 CU depth values
  📊 Processing 50 frames (50 YUV frames, 1080000 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:02<00:00, 21.14it/s]


  ✅ Total patches extracted: 67500
  📊 Extracted: 67500 patches
  ✅ QP37: 67500 samples added (COMPLETE)

--- 4928x3264 (ALL FRAMES) ---
  📊 YUV file size: 1,206,374,400 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 50
  📊 Expected samples for this resolution: 785,400
🔄 Processing QP22 - ALL FRAMES...
  📊 YUV file size: 1,206,374,400 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:04<00:00, 11.98it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 3141600 CU depth values
  📊 Processing 50 frames (50 YUV frames, 3141600 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:06<00:00,  7.25it/s]


  ✅ Total patches extracted: 196350
  📊 Extracted: 196350 patches
  ✅ QP22: 196350 samples added (COMPLETE)
🔄 Processing QP27 - ALL FRAMES...
  📊 YUV file size: 1,206,374,400 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 76.41it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 3141600 CU depth values
  📊 Processing 50 frames (50 YUV frames, 3141600 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:06<00:00,  7.19it/s]


  ✅ Total patches extracted: 196350
  📊 Extracted: 196350 patches
  ✅ QP27: 196350 samples added (COMPLETE)
🔄 Processing QP32 - ALL FRAMES...
  📊 YUV file size: 1,206,374,400 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 88.05it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 3141600 CU depth values
  📊 Processing 50 frames (50 YUV frames, 3141600 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:06<00:00,  7.21it/s]


  ✅ Total patches extracted: 196350
  📊 Extracted: 196350 patches
  ✅ QP32: 196350 samples added (COMPLETE)
🔄 Processing QP37 - ALL FRAMES...
  📊 YUV file size: 1,206,374,400 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 50


Reading ALL 50 frames: 100%|██████████| 50/50 [00:00<00:00, 84.88it/s]


  ✅ Successfully read 50 frames
  📊 Loaded: 50 frames, 3141600 CU depth values
  📊 Processing 50 frames (50 YUV frames, 3141600 CU depth values)


Extracting ALL patches: 100%|██████████| 50/50 [00:06<00:00,  7.28it/s]


  ✅ Total patches extracted: 196350
  📊 Extracted: 196350 patches
  ✅ QP37: 196350 samples added (COMPLETE)

✅ COMPLETE DATASET SAVED: AI_Test_COMPLETE_1151400.dat
📊 1,151,400 samples (vs 1,151,400 expected)
📊 5481.5 MB (5.35 GB)
🔍 Sample size: 4992 bytes (expected: 4992)
🔍 Sample verification:
   Image data range: 31-238
   QP value: 22
   Labels range: 0-1
✅ COMPLETE Test dataset saved: AI_Test_COMPLETE_1151400.dat
🔋 Memory cleanup...

🎯 Processing COMPLETE Valid Dataset

🔄 Processing COMPLETE Valid Dataset - ALL FRAMES

--- 768x512 (ALL FRAMES) ---
  📊 YUV file size: 14,745,600 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 25
  📊 Expected samples for this resolution: 9,600
🔄 Processing QP22 - ALL FRAMES...
  📊 YUV file size: 14,745,600 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 137.61it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 38400 CU depth values
  📊 Processing 25 frames (25 YUV frames, 38400 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:00<00:00, 281.54it/s]


  ✅ Total patches extracted: 2400
  📊 Extracted: 2400 patches
  ✅ QP22: 2400 samples added (COMPLETE)
🔄 Processing QP27 - ALL FRAMES...
  📊 YUV file size: 14,745,600 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 4814.18it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 38400 CU depth values
  📊 Processing 25 frames (25 YUV frames, 38400 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:00<00:00, 264.14it/s]

  ✅ Total patches extracted: 2400
  📊 Extracted: 2400 patches





  ✅ QP27: 2400 samples added (COMPLETE)
🔄 Processing QP32 - ALL FRAMES...
  📊 YUV file size: 14,745,600 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 5322.45it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 38400 CU depth values
  📊 Processing 25 frames (25 YUV frames, 38400 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:00<00:00, 286.65it/s]

  ✅ Total patches extracted: 2400
  📊 Extracted: 2400 patches





  ✅ QP32: 2400 samples added (COMPLETE)
🔄 Processing QP37 - ALL FRAMES...
  📊 YUV file size: 14,745,600 bytes
  📊 Frame size: 589,824 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 6254.55it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 38400 CU depth values
  📊 Processing 25 frames (25 YUV frames, 38400 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:00<00:00, 288.62it/s]

  ✅ Total patches extracted: 2400
  📊 Extracted: 2400 patches





  ✅ QP37: 2400 samples added (COMPLETE)

--- 1536x1024 (ALL FRAMES) ---
  📊 YUV file size: 58,982,400 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 25
  📊 Expected samples for this resolution: 38,400
🔄 Processing QP22 - ALL FRAMES...
  📊 YUV file size: 58,982,400 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 49.00it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 153600 CU depth values
  📊 Processing 25 frames (25 YUV frames, 153600 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:00<00:00, 72.29it/s]


  ✅ Total patches extracted: 9600
  📊 Extracted: 9600 patches
  ✅ QP22: 9600 samples added (COMPLETE)
🔄 Processing QP27 - ALL FRAMES...
  📊 YUV file size: 58,982,400 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 1734.13it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 153600 CU depth values
  📊 Processing 25 frames (25 YUV frames, 153600 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:00<00:00, 71.84it/s]


  ✅ Total patches extracted: 9600
  📊 Extracted: 9600 patches
  ✅ QP27: 9600 samples added (COMPLETE)
🔄 Processing QP32 - ALL FRAMES...
  📊 YUV file size: 58,982,400 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 2102.49it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 153600 CU depth values
  📊 Processing 25 frames (25 YUV frames, 153600 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:00<00:00, 68.89it/s]


  ✅ Total patches extracted: 9600
  📊 Extracted: 9600 patches
  ✅ QP32: 9600 samples added (COMPLETE)
🔄 Processing QP37 - ALL FRAMES...
  📊 YUV file size: 58,982,400 bytes
  📊 Frame size: 2,359,296 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 2448.29it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 153600 CU depth values
  📊 Processing 25 frames (25 YUV frames, 153600 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:00<00:00, 71.64it/s]


  ✅ Total patches extracted: 9600
  📊 Extracted: 9600 patches
  ✅ QP37: 9600 samples added (COMPLETE)

--- 2880x1920 (ALL FRAMES) ---
  📊 YUV file size: 207,360,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 25
  📊 Expected samples for this resolution: 135,000
🔄 Processing QP22 - ALL FRAMES...
  📊 YUV file size: 207,360,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 28.50it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 540000 CU depth values
  📊 Processing 25 frames (25 YUV frames, 540000 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:01<00:00, 20.22it/s]


  ✅ Total patches extracted: 33750
  📊 Extracted: 33750 patches
  ✅ QP22: 33750 samples added (COMPLETE)
🔄 Processing QP27 - ALL FRAMES...
  📊 YUV file size: 207,360,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 515.72it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 540000 CU depth values
  📊 Processing 25 frames (25 YUV frames, 540000 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:01<00:00, 18.67it/s]


  ✅ Total patches extracted: 33750
  📊 Extracted: 33750 patches
  ✅ QP27: 33750 samples added (COMPLETE)
🔄 Processing QP32 - ALL FRAMES...
  📊 YUV file size: 207,360,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 631.46it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 540000 CU depth values
  📊 Processing 25 frames (25 YUV frames, 540000 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:01<00:00, 19.60it/s]


  ✅ Total patches extracted: 33750
  📊 Extracted: 33750 patches
  ✅ QP32: 33750 samples added (COMPLETE)
🔄 Processing QP37 - ALL FRAMES...
  📊 YUV file size: 207,360,000 bytes
  📊 Frame size: 8,294,400 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 732.10it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 540000 CU depth values
  📊 Processing 25 frames (25 YUV frames, 540000 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:01<00:00, 21.24it/s]


  ✅ Total patches extracted: 33750
  📊 Extracted: 33750 patches
  ✅ QP37: 33750 samples added (COMPLETE)

--- 4928x3264 (ALL FRAMES) ---
  📊 YUV file size: 603,187,200 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 25
  📊 Expected samples for this resolution: 392,700
🔄 Processing QP22 - ALL FRAMES...
  📊 YUV file size: 603,187,200 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:02<00:00, 10.74it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 1570800 CU depth values
  📊 Processing 25 frames (25 YUV frames, 1570800 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:03<00:00,  7.33it/s]


  ✅ Total patches extracted: 98175
  📊 Extracted: 98175 patches
  ✅ QP22: 98175 samples added (COMPLETE)
🔄 Processing QP27 - ALL FRAMES...
  📊 YUV file size: 603,187,200 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 182.07it/s]

  ✅ Successfully read 25 frames





  📊 Loaded: 25 frames, 1570800 CU depth values
  📊 Processing 25 frames (25 YUV frames, 1570800 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:03<00:00,  7.30it/s]


  ✅ Total patches extracted: 98175
  📊 Extracted: 98175 patches
  ✅ QP27: 98175 samples added (COMPLETE)
🔄 Processing QP32 - ALL FRAMES...
  📊 YUV file size: 603,187,200 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 243.31it/s]


  ✅ Successfully read 25 frames
  📊 Loaded: 25 frames, 1570800 CU depth values
  📊 Processing 25 frames (25 YUV frames, 1570800 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:03<00:00,  7.29it/s]


  ✅ Total patches extracted: 98175
  📊 Extracted: 98175 patches
  ✅ QP32: 98175 samples added (COMPLETE)
🔄 Processing QP37 - ALL FRAMES...
  📊 YUV file size: 603,187,200 bytes
  📊 Frame size: 24,127,488 bytes
  📊 Total frames available: 25


Reading ALL 25 frames: 100%|██████████| 25/25 [00:00<00:00, 271.03it/s]

  ✅ Successfully read 25 frames





  📊 Loaded: 25 frames, 1570800 CU depth values
  📊 Processing 25 frames (25 YUV frames, 1570800 CU depth values)


Extracting ALL patches: 100%|██████████| 25/25 [00:03<00:00,  7.31it/s]


  ✅ Total patches extracted: 98175
  📊 Extracted: 98175 patches
  ✅ QP37: 98175 samples added (COMPLETE)

✅ COMPLETE DATASET SAVED: AI_Valid_COMPLETE_575700.dat
📊 575,700 samples (vs 575,700 expected)
📊 2740.8 MB (2.68 GB)
🔍 Sample size: 4992 bytes (expected: 4992)
🔍 Sample verification:
   Image data range: 119-215
   QP value: 27
   Labels range: 0-1
✅ COMPLETE Valid dataset saved: AI_Valid_COMPLETE_575700.dat
🔋 Memory cleanup...


In [None]:
# Cell 4: Process COMPLETE datasets
print("🚀 PROCESSING COMPLETE DATASETS - ALL FRAMES")
print("⚠️ WARNING: This will take much longer and generate much larger files!")
print("=" * 60)

# Process each dataset completely
datasets_to_process = ['Train']  # Start with Test first to check

for dataset_type in datasets_to_process:
    print(f"\n🎯 Processing COMPLETE {dataset_type} Dataset")
    result_file = process_complete_dataset(dataset_type)
    
    if result_file:
        print(f"✅ COMPLETE {dataset_type} dataset saved: {os.path.basename(result_file)}")
    else:
        print(f"❌ COMPLETE {dataset_type} dataset failed")
    
    print(f"🔋 Memory cleanup...")
    gc.collect()

In [None]:
# Cell 7: Verification and Results
print("\n📊 Final Results (FIXED VERSION):")
print("=" * 60)

total_samples = 0
files = ['Test', 'Valid', 'Train']

for dataset in files:
    for file in os.listdir('/kaggle/working'):
        if f'AI_{dataset}_' in file and file.endswith('.dat'):
            filepath = f'/kaggle/working/{file}'
            size_mb = os.path.getsize(filepath) / (1024*1024)
            num_samples = os.path.getsize(filepath) // 4992  # Note: 4992 bytes per sample now
            
            print(f"{dataset:>5}: {file}")
            print(f"        {size_mb:>7.1f} MB | {num_samples:>8,} samples | 4992 bytes/sample")
            total_samples += num_samples
            break

print("=" * 60)
print(f"TOTAL: {total_samples:,} samples")
print(f"✅ All samples now have correct 4992-byte structure")
print(f"✅ Hierarchical labels properly implemented")
print(f"💾 Ready for download from Output tab!")

# Verify a sample from each dataset
print(f"\n🔍 Sample Verification:")
for dataset in files:
    for file in os.listdir('/kaggle/working'):
        if f'AI_{dataset}_' in file and file.endswith('.dat'):
            filepath = f'/kaggle/working/{file}'
            
            # Read first sample
            with open(filepath, 'rb') as f:
                sample = np.frombuffer(f.read(4992), dtype=np.uint8)
                
            print(f"{dataset}: Image[{sample[:4096].min()}-{sample[:4096].max()}], "
                  f"QP={sample[4096]}, Labels[{sample[4160:].min()}-{sample[4160:].max()}]")
            break


In [26]:
# import os
# import shutil

# working_dir = '/kaggle/working'

# # List all files and directories in /kaggle/working
# for filename in os.listdir(working_dir):
#     file_path = os.path.join(working_dir, filename)
#     try:
#         if os.path.isfile(file_path) or os.path.islink(file_path):
#             os.remove(file_path)  # Remove file or link
#         elif os.path.isdir(file_path):
#             shutil.rmtree(file_path)  # Remove directory and all its contents
#         print(f'Deleted: {file_path}')
#     except Exception as e:
#         print(f'Failed to delete {file_path}. Reason: {e}')
