<a href="https://colab.research.google.com/github/SapirBashan/median-quick-select/blob/main/simulator_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# version 2 of the simulator - text for test
- channel nymber -> what channel
- block size -> size of the block that needs to be copied
- channel offset -> where in the channel the data needs to be copied to
- data offset -> where in the data do i copy from

'block = data[data + data_offset, data + data_offset + block_size]'

'data_output[channel_number + block_offset,
            channel_number + block_offset + block_size] = block'[link text](https://)


In [28]:
import numpy as np
import random

class Header:
    def __init__(self, channel_number, block_size, channel_offset, data_offset, padding_1=0, padding_2=0):
        self.padding_1 = padding_1
        self.padding_2 = padding_2
        self.channel_number = channel_number
        self.block_size = block_size
        self.channel_offset = channel_offset
        self.data_offset = data_offset

    def __str__(self):
        return f"Header {self.channel_number}: padding_1={self.padding_1}, padding_2={self.padding_2}, " \
               f"channel_number={self.channel_number}, block_size={self.block_size}, " \
               f"channel_offset={self.channel_offset}, data_offset={self.data_offset}"

def create_test_data():
    channels = 4
    rows, cols = 5, 5
    channel_size = rows * cols
    data_cube = np.zeros((channels, rows, cols), dtype=np.uint32)

    # Fill original data cube with unique values
    for c in range(channels):
        base = (c + 1) * 100
        for r in range(rows):
            for col in range(cols):
                data_cube[c, r, col] = base + (r + 1) * 10 + (col + 1)

    # Simulated shuffle plan: split every channel into pieces
    #(x,y,z)
    # x = channel number
    # y = channel offset
    # z = block size

    def generate_random_shuffle_plan(data_cube):
        channels, rows, cols = data_cube.shape
        shuffle_plan = []
        for channel in range(channels):
            flat = data_cube[channel].flatten()
            total = len(flat)
            offset = 0
            while offset < total:
                remaining = total - offset
                if remaining <= 3:
                    block_size = remaining  # take all remaining elements
                else:
                    block_size = random.randint(3, min(10, remaining))
                shuffle_plan.append((channel, offset, block_size))
                offset += block_size
        random.shuffle(shuffle_plan)  # Shuffle all chunks
        return shuffle_plan


    shuffle_plan = generate_random_shuffle_plan(data_cube)

    shuffled_data = []
    headers = []
    data_offset = 0

    for (channel_number, channel_offset, block_size) in shuffle_plan:
        channel_flat = data_cube[channel_number].flatten()
        block_data = channel_flat[channel_offset: channel_offset + block_size]
        shuffled_data.extend(block_data)

        header = Header(
            channel_number=channel_number,
            block_size=block_size,
            channel_offset=channel_offset,
            data_offset=data_offset
        )
        headers.append(header)
        data_offset += block_size

    shuffled_data = np.array(shuffled_data, dtype=np.uint32)

    # Assemble: initialize flat arrays per channel
    assembled = np.zeros_like(data_cube)
    assembled_flat = {ch: assembled[ch].flatten() for ch in range(channels)}

    for header in headers:
        block = shuffled_data[header.data_offset: header.data_offset + header.block_size]
        dst = assembled_flat[header.channel_number]
        dst[header.channel_offset: header.channel_offset + header.block_size] = block

    # Reshape
    for ch in range(channels):
        assembled[ch] = assembled_flat[ch].reshape(rows, cols)

    return data_cube, shuffled_data, headers, assembled

def write_test_files():
    original, shuffled_data, headers, assembled = create_test_data()

    # 1. Write original
    with open('original_data_cube.txt', 'w') as f:
        for c in range(original.shape[0]):
            f.write(f"Channel {c}:\n")
            for r in range(original.shape[1]):
                row = ' '.join(f"{original[c, r, col]:4d}" for col in range(original.shape[2]))
                f.write(f"  Row {r}: {row}\n")
            f.write('\n')

    # 2. Write shuffled
    with open('shuffled_data.txt', 'w') as f:
        f.write("Shuffled Data Stream:\n")
        for i in range(0, len(shuffled_data), 5):
            chunk = shuffled_data[i:i+5]
            values = ' '.join(f"{v:4d}" for v in chunk)
            f.write(f"  {values}\n")

    # 3. Write headers sorted by channel
    with open('headers.txt', 'w') as f:
        #sorted_headers = sorted(headers, key=lambda h: (h.channel_number, h.channel_offset))
        for h in headers:
            f.write(str(h) + '\n')

    # 4. Write assembled
    with open('assembled_data_cube.txt', 'w') as f:
        for c in range(assembled.shape[0]):
            f.write(f"Channel {c}:\n")
            for r in range(assembled.shape[1]):
                row = ' '.join(f"{assembled[c, r, col]:4d}" for col in range(assembled.shape[2]))
                f.write(f"  Row {r}: {row}\n")
            f.write('\n')

    # Check
    print("✅ Files created.")
    if np.array_equal(original, assembled):
        print("✅ Reconstruction MATCHES original!")
    else:
        print("❌ Reconstruction failed — mismatch detected.")

if __name__ == "__main__":
    write_test_files()


✅ Files created.
✅ Reconstruction MATCHES original!


# Main Changes:
Max block size is now 64KB (16,384 uint32 values).

Binary file writing/reading added.

Cube size is 96 channels × 1024 × 1024.

Still performs reconstruction check.



In [29]:
import numpy as np
import random
import struct

MAX_BLOCK_SIZE = 64 * 1024  # 64 KB
MAX_UINT32_VALUES_PER_BLOCK = MAX_BLOCK_SIZE // 4  # 16384

class Header:
    def __init__(self, channel_number, block_size, channel_offset, data_offset, padding_1=0, padding_2=0):
        self.padding_1 = padding_1
        self.padding_2 = padding_2
        self.channel_number = channel_number
        self.block_size = block_size
        self.channel_offset = channel_offset
        self.data_offset = data_offset

    def __str__(self):
        return f"Header {self.channel_number}: padding_1={self.padding_1}, padding_2={self.padding_2}, " \
               f"channel_number={self.channel_number}, block_size={self.block_size}, " \
               f"channel_offset={self.channel_offset}, data_offset={self.data_offset}"

def create_large_test_data():
    channels, rows, cols = 96, 1024, 1024
    data_cube = np.zeros((channels, rows, cols), dtype=np.uint32)

    print("🔢 Filling data cube...")
    for c in range(channels):
        base = (c + 1) * 1_000_000
        data_cube[c] = base + np.arange(rows * cols, dtype=np.uint32).reshape(rows, cols)

    print("🔀 Generating shuffle plan...")

    def generate_random_shuffle_plan(data_cube):
        channels, rows, cols = data_cube.shape
        shuffle_plan = []
        for channel in range(channels):
            flat = data_cube[channel].flatten()
            total = len(flat)
            offset = 0
            while offset < total:
                remaining = total - offset
                if remaining <= 3:
                    block_size = remaining
                else:
                    block_size = random.randint(3, min(MAX_UINT32_VALUES_PER_BLOCK, remaining))
                shuffle_plan.append((channel, offset, block_size))
                offset += block_size
        random.shuffle(shuffle_plan)
        return shuffle_plan

    shuffle_plan = generate_random_shuffle_plan(data_cube)

    shuffled_data = []
    headers = []
    data_offset = 0

    for (channel_number, channel_offset, block_size) in shuffle_plan:
        channel_flat = data_cube[channel_number].flatten()
        block_data = channel_flat[channel_offset: channel_offset + block_size]
        shuffled_data.extend(block_data)

        header = Header(
            channel_number=channel_number,
            block_size=block_size,
            channel_offset=channel_offset,
            data_offset=data_offset
        )
        headers.append(header)
        data_offset += block_size

    return data_cube, np.array(shuffled_data, dtype=np.uint32), headers

def write_binary_and_test():
    original, shuffled_data, headers = create_large_test_data()

    print("💾 Writing shuffled binary data...")
    with open('shuffled_data.bin', 'wb') as f:
        shuffled_data.tofile(f)


    print("💾 Writing headers to binary file...")
    with open('headers.bin', 'wb') as f:
        for h in headers:
            packed = struct.pack(
                '6I',  # 6 unsigned 32-bit integers
                h.padding_1,
                h.padding_2,
                h.channel_number,
                h.block_size,
                h.channel_offset,
                h.data_offset
            )
            f.write(packed)



    print("🔁 Reconstructing...")
    assembled = np.zeros_like(original)
    assembled_flat = {ch: assembled[ch].flatten() for ch in range(original.shape[0])}

    for header in headers:
        block = shuffled_data[header.data_offset: header.data_offset + header.block_size]
        dst = assembled_flat[header.channel_number]
        dst[header.channel_offset: header.channel_offset + header.block_size] = block

    for ch in range(original.shape[0]):
        assembled[ch] = assembled_flat[ch].reshape(original.shape[1], original.shape[2])

    print("✅ Files created.")
    if np.array_equal(original, assembled):
        print("✅ Reconstruction MATCHES original!")
    else:
        print("❌ Reconstruction failed — mismatch detected.")

if __name__ == "__main__":
    write_binary_and_test()


🔢 Filling data cube...
🔀 Generating shuffle plan...
💾 Writing shuffled binary data...
💾 Writing headers to binary file...
🔁 Reconstructing...
✅ Files created.
✅ Reconstruction MATCHES original!
