<a href="https://colab.research.google.com/github/SapirBashan/median-quick-select/blob/main/simulator_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Shuffle channel implementaion




In [None]:
import numpy as np
import struct
import os

# Define header structure
class Header:
    def __init__(self, channel_number, block_size=16384, padding_1=0, padding_2=0):
        self.padding_1 = padding_1
        self.padding_2 = padding_2
        self.channel_number = channel_number
        self.block_size = block_size  # 64 kB = 16k values of uint32
        self.channel_offset = channel_number * block_size * 4  # 4 bytes per uint32
        self.data_offset = channel_number * block_size

    def pack(self):
        return struct.pack('<HHHIII',
                          self.padding_1,
                          self.padding_2,
                          self.channel_number,
                          self.block_size,
                          self.channel_offset,
                          self.data_offset)

    @staticmethod
    def size():
        return struct.calcsize('<HHHIII')

    @staticmethod
    def unpack(data):
        unpacked = struct.unpack('<HHHIII', data)
        header = Header(channel_number=unpacked[2])
        header.padding_1 = unpacked[0]
        header.padding_2 = unpacked[1]
        header.block_size = unpacked[3]
        header.channel_offset = unpacked[4]
        header.data_offset = unpacked[5]
        return header

def generate_data_cube(num_channels=96, vector_count=1024, vector_length=1024):
    """Generate a data cube with sequential values for testing"""
    data_cube = np.zeros((num_channels, vector_count, vector_length), dtype=np.uint32)

    # Fill with sequential values for easier validation
    value = 1
    for c in range(num_channels):
        for i in range(vector_count):
            for j in range(vector_length):
                data_cube[c, i, j] = value
                value += 1

    return data_cube

def data_cube_to_shuffled_data(data_cube):
    """Convert data cube to shuffled data and headers"""
    num_channels, vector_count, vector_length = data_cube.shape

    # Create headers
    headers = []
    for channel in range(num_channels):
        headers.append(Header(channel_number=channel))

    # Create shuffled data array
    total_elements = num_channels * vector_count * vector_length
    shuffled_data = np.zeros(total_elements, dtype=np.uint32)

    # Determine random permutation of blocks
    block_indices = np.random.permutation(num_channels)

    # Fill shuffled data array
    pos = 0
    for block_idx in block_indices:
        block_data = data_cube[block_idx].flatten()
        block_size = len(block_data)
        shuffled_data[pos:pos+block_size] = block_data

        # Update header with the position in the shuffled data
        headers[block_idx].data_offset = pos
        pos += block_size

    return shuffled_data, headers

def save_data_files(data_cube, shuffled_data, headers, dimensions=None):
    """Save data cube, shuffled data, and headers to binary files"""
    if dimensions is None:
        dimensions = data_cube.shape

    # Save data cube
    with open('data_cube.bin', 'wb') as f:
        f.write(data_cube.tobytes())

    # Save shuffled data
    with open('data.bin', 'wb') as f:
        f.write(shuffled_data.tobytes())

    # Save headers
    with open('headers.bin', 'wb') as f:
        for header in headers:
            f.write(header.pack())

    # Save dimensions metadata for easier loading
    with open('dimensions.txt', 'w') as f:
        f.write(f"{dimensions[0]} {dimensions[1]} {dimensions[2]}")

def load_data_files():
    """Load data from binary files"""
    # Load dimensions
    if os.path.exists('dimensions.txt'):
        with open('dimensions.txt', 'r') as f:
            dims = f.read().strip().split()
            num_channels = int(dims[0])
            vector_count = int(dims[1])
            vector_length = int(dims[2])
    else:
        # Default dimensions
        num_channels = 96
        vector_count = 1024
        vector_length = 1024

    # Load data cube
    data_cube_size = os.path.getsize('data_cube.bin')
    expected_size = num_channels * vector_count * vector_length * 4  # 4 bytes per uint32

    if data_cube_size != expected_size:
        print(f"Warning: data_cube.bin size {data_cube_size} does not match expected size {expected_size}")
        # Try to deduce dimensions
        total_elements = data_cube_size // 4  # 4 bytes per uint32
        if os.path.exists('dimensions.txt'):
            print(f"Using dimensions from dimensions.txt: {num_channels}×{vector_count}×{vector_length}")
        else:
            print("Unable to determine correct dimensions, using default")

    with open('data_cube.bin', 'rb') as f:
        data_cube_bytes = f.read(data_cube_size)
        try:
            data_cube = np.frombuffer(data_cube_bytes, dtype=np.uint32).reshape(num_channels, vector_count, vector_length)
        except ValueError as e:
            print(f"Error reshaping data cube: {e}")
            total_elements = len(np.frombuffer(data_cube_bytes, dtype=np.uint32))
            print(f"Total elements: {total_elements}")
            # Try to guess dimensions
            if total_elements == 36:  # 4×3×3
                num_channels, vector_count, vector_length = 4, 3, 3
            elif total_elements == 27:  # 3×3×3
                num_channels, vector_count, vector_length = 3, 3, 3
            else:
                raise ValueError(f"Cannot determine dimensions for {total_elements} elements")

            data_cube = np.frombuffer(data_cube_bytes, dtype=np.uint32).reshape(num_channels, vector_count, vector_length)

    # Load shuffled data
    with open('data.bin', 'rb') as f:
        data_bytes = f.read()
        shuffled_data = np.frombuffer(data_bytes, dtype=np.uint32)

    # Load headers
    header_size = Header.size()
    num_headers = os.path.getsize('headers.bin') // header_size
    headers = []

    with open('headers.bin', 'rb') as f:
        for _ in range(num_headers):
            header_bytes = f.read(header_size)
            headers.append(Header.unpack(header_bytes))

    return data_cube, shuffled_data, headers, (num_channels, vector_count, vector_length)

def reconstruct_data_cube(shuffled_data, headers, vector_count=1024, vector_length=1024):
    """Reconstruct the data cube from shuffled data and headers"""
    num_channels = len(headers)
    reconstructed_cube = np.zeros((num_channels, vector_count, vector_length), dtype=np.uint32)

    for header in headers:
        channel = header.channel_number
        offset = header.data_offset
        block_size = vector_count * vector_length

        # Extract data from shuffled array
        channel_data = shuffled_data[offset:offset + block_size]
        reconstructed_cube[channel] = channel_data.reshape(vector_count, vector_length)

    return reconstructed_cube

def verify_reconstruction(original_cube, reconstructed_cube):
    """Verify that the reconstructed cube matches the original"""
    if not np.array_equal(original_cube, reconstructed_cube):
        print("❌ Reconstruction FAILED")
        mismatches = np.where(original_cube != reconstructed_cube)
        print(f"Found {len(mismatches[0])} mismatches")
        for i in range(min(10, len(mismatches[0]))):
            idx = (mismatches[0][i], mismatches[1][i], mismatches[2][i])
            print(f"Mismatch at {idx}: Original={original_cube[idx]}, Reconstructed={reconstructed_cube[idx]}")
        return False
    else:
        print("✅ Reconstruction successful - All values match!")
        return True

def main():
    # Use smaller dimensions for demonstration
    test_channels = 4  # Smaller for demonstration, use 96 for actual
    test_vector_count = 3  # Smaller for demonstration, use 1024 for actual
    test_vector_length = 3  # Smaller for demonstration, use 1024 for actual

    print("Generating data cube...")
    data_cube = generate_data_cube(test_channels, test_vector_count, test_vector_length)
    print(f"Data cube shape: {data_cube.shape}")

    # Show a sample of the data cube for visualization
    print("\nSample of data cube (first 2 channels):")
    for c in range(min(2, test_channels)):
        print(f"\nChannel {c}:")
        print(data_cube[c])

    print("\nShuffling data and creating headers...")
    shuffled_data, headers = data_cube_to_shuffled_data(data_cube)

    print(f"Shuffled data length: {len(shuffled_data)}")
    print(f"Number of headers: {len(headers)}")

    print("\nSaving files...")
    save_data_files(data_cube, shuffled_data, headers, (test_channels, test_vector_count, test_vector_length))

    print("\nLoading files...")
    loaded_cube, loaded_data, loaded_headers, dimensions = load_data_files()

    print(f"Loaded data cube shape: {loaded_cube.shape}")

    print("\nReconstructing data cube from loaded files...")
    reconstructed_cube = reconstruct_data_cube(loaded_data, loaded_headers, dimensions[1], dimensions[2])

    print("\nVerifying reconstruction...")
    success = verify_reconstruction(loaded_cube, reconstructed_cube)

    if success:
        print("\nDo you want to run the full-scale test (96×1024×1024)? (y/n)")
        response = input().strip().lower()
        if response == 'y':
            print("\nFull-scale test")
            print("Generating full 96×1024×1024 data cube...")
            data_cube_full = generate_data_cube()
            shuffled_data_full, headers_full = data_cube_to_shuffled_data(data_cube_full)
            save_data_files(data_cube_full, shuffled_data_full, headers_full)
            print("Full-scale files generated successfully.")
        else:
            print("Skipping full-scale test.")

if __name__ == "__main__":
    main()

Generating data cube...
Data cube shape: (4, 3, 3)

Sample of data cube (first 2 channels):

Channel 0:
[[1 2 3]
 [4 5 6]
 [7 8 9]]

Channel 1:
[[10 11 12]
 [13 14 15]
 [16 17 18]]

Shuffling data and creating headers...
Shuffled data length: 36
Number of headers: 4

Saving files...

Loading files...
Loaded data cube shape: (4, 3, 3)

Reconstructing data cube from loaded files...

Verifying reconstruction...
✅ Reconstruction successful - All values match!

Do you want to run the full-scale test (96×1024×1024)? (y/n)
y

Full-scale test
Generating full 96×1024×1024 data cube...
Full-scale files generated successfully.


# Shuffle channel implementaion test with stall text data

In [None]:
import numpy as np
import struct
import os

# Define header structure as in the original code
class Header:
    def __init__(self, channel_number, block_size=16, padding_1=0, padding_2=0):
        self.padding_1 = padding_1
        self.padding_2 = padding_2
        self.channel_number = channel_number
        self.block_size = block_size
        self.channel_offset = channel_number * block_size * 4  # 4 bytes per uint32
        self.data_offset = channel_number * block_size

    def __str__(self):
        return f"Header {self.channel_number}: padding_1={self.padding_1}, padding_2={self.padding_2}, " \
               f"channel_number={self.channel_number}, block_size={self.block_size}, " \
               f"channel_offset={self.channel_offset}, data_offset={self.data_offset}"

# Create a small test data cube: 3 channels x 4 rows x 4 columns
def create_test_data():
    channels = 3
    rows = 4
    cols = 4

    # Create data cube with easily identifiable values
    data_cube = np.zeros((channels, rows, cols), dtype=np.uint32)

    # Fill with values that clearly identify channel, row, column
    for c in range(channels):
        base = (c + 1) * 100  # Channel 0: 100s, Channel 1: 200s, Channel 2: 300s
        for r in range(rows):
            for col in range(cols):
                # Value format: (channel+1)(row+1)(col+1)
                data_cube[c, r, col] = base + (r + 1) * 10 + (col + 1)

    # Create headers
    headers = []
    for c in range(channels):
        headers.append(Header(channel_number=c, block_size=rows*cols))

    # Create shuffled data in a specific order for testing
    # We'll use a simple shuffling pattern: channel 2, then 0, then 1
    shuffle_order = [2, 0, 1]
    shuffled_data = np.zeros(channels * rows * cols, dtype=np.uint32)

    pos = 0
    for c in shuffle_order:
        block_data = data_cube[c].flatten()
        block_size = len(block_data)
        shuffled_data[pos:pos+block_size] = block_data

        # Update header with the position in the shuffled data
        headers[c].data_offset = pos
        pos += block_size

    # Reconstruct data cube from shuffled data and headers
    reconstructed = np.zeros_like(data_cube)
    for header in headers:
        channel = header.channel_number
        offset = header.data_offset
        block_size = rows * cols
        channel_data = shuffled_data[offset:offset + block_size]
        reconstructed[channel] = channel_data.reshape(rows, cols)

    return data_cube, shuffled_data, headers, reconstructed

def write_test_files():
    data_cube, shuffled_data, headers, reconstructed = create_test_data()

    # Write headers to text file
    with open('headers.txt', 'w') as f:
        for header in headers:
            f.write(str(header) + '\n')

    # Write original data cube to text file
    with open('original_data_cube.txt', 'w') as f:
        for c in range(data_cube.shape[0]):
            f.write(f"Channel {c}:\n")
            for r in range(data_cube.shape[1]):
                row_values = ' '.join(f"{data_cube[c, r, col]:4d}" for col in range(data_cube.shape[2]))
                f.write(f"  Row {r}: {row_values}\n")
            f.write('\n')

    # Write shuffled data to text file
    with open('data.txt', 'w') as f:
        f.write("Shuffled data array:\n")
        for i in range(0, len(shuffled_data), data_cube.shape[2]):
            chunk = shuffled_data[i:i+data_cube.shape[2]]
            values = ' '.join(f"{val:4d}" for val in chunk)
            f.write(f"  {values}\n")

    # Write reconstructed data cube to text file
    with open('recon_data_cube.txt', 'w') as f:
        for c in range(reconstructed.shape[0]):
            f.write(f"Channel {c}:\n")
            for r in range(reconstructed.shape[1]):
                row_values = ' '.join(f"{reconstructed[c, r, col]:4d}" for col in range(reconstructed.shape[2]))
                f.write(f"  Row {r}: {row_values}\n")
            f.write('\n')

    # Print summary
    print("Test files created:")
    print("1. headers.txt - Contains header information")
    print("2. original_data_cube.txt - Original 3×4×4 data cube")
    print("3. data.txt - Shuffled data array")
    print("4. recon_data_cube.txt - Reconstructed data cube from shuffled data")

    # check if the original data_cube is equel to the reconstroction
    if np.array_equal(data_cube, reconstructed):
        print("✅ Reconstruction successful - All values match!")
    else:
        print("❌ Reconstruction FAILED")

if __name__ == "__main__":
    write_test_files()

Test files created:
1. headers.txt - Contains header information
2. original_data_cube.txt - Original 3×4×4 data cube
3. data.txt - Shuffled data array
4. recon_data_cube.txt - Reconstructed data cube from shuffled data
✅ Reconstruction successful - All values match!


# version 2 of the simulator - text for test
- channel nymber -> what channel
- block size -> size of the block that needs to be copied
- channel offset -> where in the channel the data needs to be copied to
- data offset -> where in the data do i copy from

'block = data[data + data_offset, data + data_offset + block_size]'

'data_output[channel_number + block_offset,
            channel_number + block_offset + block_size] = block'[link text](https://)


In [3]:
import numpy as np

class Header:
    def __init__(self, channel_number, block_size, channel_offset, data_offset, padding_1=0, padding_2=0):
        self.padding_1 = padding_1
        self.padding_2 = padding_2
        self.channel_number = channel_number
        self.block_size = block_size
        self.channel_offset = channel_offset
        self.data_offset = data_offset

    def __str__(self):
        return f"Header {self.channel_number}: padding_1={self.padding_1}, padding_2={self.padding_2}, " \
               f"channel_number={self.channel_number}, block_size={self.block_size}, " \
               f"channel_offset={self.channel_offset}, data_offset={self.data_offset}"

def create_test_data():
    channels = 4
    rows, cols = 5, 5
    channel_size = rows * cols
    data_cube = np.zeros((channels, rows, cols), dtype=np.uint32)

    # Fill original data cube with unique values
    for c in range(channels):
        base = (c + 1) * 100
        for r in range(rows):
            for col in range(cols):
                data_cube[c, r, col] = base + (r + 1) * 10 + (col + 1)

    # Simulated shuffle plan: split every channel into 2 pieces
    shuffle_plan = [
        (2, 0, 12),
        (0, 0, 5),
        (1, 0, 8),
        (3, 0, 13),
        (2, 12, 13),
        (1, 8, 17),
        (0, 5, 20),
        (3, 13, 12),
    ]

    shuffled_data = []
    headers = []
    data_offset = 0

    for (channel_number, channel_offset, block_size) in shuffle_plan:
        channel_flat = data_cube[channel_number].flatten()
        block_data = channel_flat[channel_offset: channel_offset + block_size]
        shuffled_data.extend(block_data)

        header = Header(
            channel_number=channel_number,
            block_size=block_size,
            channel_offset=channel_offset,
            data_offset=data_offset
        )
        headers.append(header)
        data_offset += block_size

    shuffled_data = np.array(shuffled_data, dtype=np.uint32)

    # Assemble: initialize flat arrays per channel
    assembled = np.zeros_like(data_cube)
    assembled_flat = {ch: assembled[ch].flatten() for ch in range(channels)}

    for header in headers:
        block = shuffled_data[header.data_offset: header.data_offset + header.block_size]
        dst = assembled_flat[header.channel_number]
        dst[header.channel_offset: header.channel_offset + header.block_size] = block

    # Reshape
    for ch in range(channels):
        assembled[ch] = assembled_flat[ch].reshape(rows, cols)

    return data_cube, shuffled_data, headers, assembled

def write_test_files():
    original, shuffled_data, headers, assembled = create_test_data()

    # 1. Write original
    with open('original_data_cube.txt', 'w') as f:
        for c in range(original.shape[0]):
            f.write(f"Channel {c}:\n")
            for r in range(original.shape[1]):
                row = ' '.join(f"{original[c, r, col]:4d}" for col in range(original.shape[2]))
                f.write(f"  Row {r}: {row}\n")
            f.write('\n')

    # 2. Write shuffled
    with open('shuffled_data.txt', 'w') as f:
        f.write("Shuffled Data Stream:\n")
        for i in range(0, len(shuffled_data), 5):
            chunk = shuffled_data[i:i+5]
            values = ' '.join(f"{v:4d}" for v in chunk)
            f.write(f"  {values}\n")

    # 3. Write headers sorted by channel
    with open('headers.txt', 'w') as f:
        #sorted_headers = sorted(headers, key=lambda h: (h.channel_number, h.channel_offset))
        for h in headers:
            f.write(str(h) + '\n')

    # 4. Write assembled
    with open('assembled_data_cube.txt', 'w') as f:
        for c in range(assembled.shape[0]):
            f.write(f"Channel {c}:\n")
            for r in range(assembled.shape[1]):
                row = ' '.join(f"{assembled[c, r, col]:4d}" for col in range(assembled.shape[2]))
                f.write(f"  Row {r}: {row}\n")
            f.write('\n')

    # Check
    print("✅ Files created.")
    if np.array_equal(original, assembled):
        print("✅ Reconstruction MATCHES original!")
    else:
        print("❌ Reconstruction failed — mismatch detected.")

if __name__ == "__main__":
    write_test_files()


✅ Files created.
✅ Reconstruction MATCHES original!
