In [1]:
from videogamegen.data.chunkstore import ChunkStore
import h5py
import lzf
import numpy as np

In [2]:
%cd VideoGen/

/teamspace/studios/this_studio/VideoGen


In [None]:
store = ChunkStore('data/chunkstore/0.0', mode='r')

In [6]:
frame = store[0]

In [7]:
frame.shape

(256, 256, 1)

In [5]:
print(store.get_metadata(0))

{'idx': 0, 'byte_offset': 0, 'byte_size': 5297, 'shape': (256, 256, 1), 'dtype': 'uint8'}


In [14]:
original_byte_size = np.prod(store.get_metadata(0)['shape']) * np.dtype(store.get_metadata(0)['dtype']).itemsize

In [15]:
print(original_size)

65536


In [3]:
with h5py.File('data/longplay_h5_files/0_0.h5', 'r') as f:
    dset = f['video_frames']
    print(f"dset.shape: {dset.shape}")
    print(f"dset.chunks: {dset.chunks}")

    frame = dset[0:5]
    print(f"Original frame shape: {frame.shape}")
    print(f"Original frame dtype: {frame.dtype}")
    print(f"Original frame size in bytes: {frame.nbytes}")
    
    # Compress
    compressed = lzf.compress(frame.tobytes())
    print(f"Compressed size: {len(compressed)}")
    
    # Try to decompress
    original_size = frame.nbytes
    decompressed = lzf.decompress(compressed, original_size)
    print(f"Decompressed size: {len(decompressed)}")
    
    # Try to reconstruct the array
    reconstructed = np.frombuffer(decompressed, dtype=frame.dtype).reshape(frame.shape)
    print(f"Reconstructed frame shape: {reconstructed.shape}")
    
    # Verify data is the same
    print(f"Data matches: {np.array_equal(frame, reconstructed)}")

dset.shape: (122413, 256, 256, 1)
dset.chunks: (1, 256, 256, 1)
Original frame shape: (5, 256, 256, 1)
Original frame dtype: uint8
Original frame size in bytes: 327680
Compressed size: 26475
Decompressed size: 327680
Reconstructed frame shape: (5, 256, 256, 1)
Data matches: True


In [5]:
with ChunkStore('data/chunkstore/0.0', 'r') as store:
    metadata = store.chunks_metadata[0]
    raw_bytes = store.mmap[metadata.byte_offset:metadata.byte_offset + metadata.byte_size].tobytes()
    print("Raw stored data:")
    print(f"Size: {len(raw_bytes)}")
    print(f"First 20 bytes: {list(raw_bytes[:20])}")

Raw stored data:
Size: 5297
First 20 bytes: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [6]:
def inspect_write_process():
    """Inspect each step of writing data to the ChunkStore."""
    with h5py.File('data/longplay_h5_files/0_0.h5', 'r') as f:
        dset = f['video_frames']
        frame = dset[0]
        
        # 1. Check original frame
        print("1. Original Frame:")
        print(f"Shape: {frame.shape}")
        print(f"Type: {frame.dtype}")
        print(f"First few values: {frame.flatten()[:10]}")
        
        # 2. Convert to bytes
        frame_bytes = frame.tobytes()
        print("\n2. Frame as bytes:")
        print(f"Size: {len(frame_bytes)}")
        print(f"First 20 bytes: {list(frame_bytes[:20])}")
        
        # 3. Compress
        compressed = lzf.compress(frame_bytes)
        print("\n3. After compression:")
        print(f"Size: {len(compressed)}")
        print(f"First 20 bytes: {list(compressed[:20])}")
        
        # 4. Write to mmap file directly
        print("\n4. Writing to mmap file:")
        mmap_path = "test_mmap.bin"
        mmap_size = len(compressed)
        
        # Try different writing methods
        print("\nMethod 1: Direct numpy memmap:")
        mmap = np.memmap(mmap_path, dtype='uint8', mode='w+', shape=(mmap_size,))
        np.copyto(mmap, np.frombuffer(compressed, dtype='uint8'))
        mmap.flush()
        del mmap  # Force flush and close
        
        # Read back and verify
        print("\n5. Reading back:")
        with open(mmap_path, 'rb') as f:
            read_data = f.read()
        print(f"Read size: {len(read_data)}")
        print(f"First 20 bytes: {list(read_data[:20])}")
        print(f"Data matches original compressed: {read_data == compressed}")
        
        # Try direct file write for comparison
        print("\nMethod 2: Direct file write:")
        with open("test_direct.bin", 'wb') as f:
            f.write(compressed)
        
        with open("test_direct.bin", 'rb') as f:
            direct_read = f.read()
        print(f"Direct write size: {len(direct_read)}")
        print(f"First 20 bytes: {list(direct_read[:20])}")
        print(f"Data matches original compressed: {direct_read == compressed}")

In [7]:
inspect_write_process()

1. Original Frame:
Shape: (256, 256, 1)
Type: uint8
First few values: [255 255 255 255 255 255 255 255 255 255]

2. Frame as bytes:
Size: 65536
First 20 bytes: [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]

3. After compression:
Size: 5297
First 20 bytes: [1, 255, 255, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255]

4. Writing to mmap file:

Method 1: Direct numpy memmap:

5. Reading back:
Read size: 5297
First 20 bytes: [1, 255, 255, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255]
Data matches original compressed: True

Method 2: Direct file write:
Direct write size: 5297
First 20 bytes: [1, 255, 255, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255, 0, 224, 255]
Data matches original compressed: True
