In [4]:
import gzip, numpy as np

# Original array (not a scalar)
data_original = np.array([3, 1, 1, 2, 2, 2], dtype=np.int8)  # Redundant values
compressed_original = len(.compress(data_original.tobytes()))

# Sorted array (improves run-length redundancy)
data_sorted = np.sort(data_original)  # [1, 1, 2, 2, 2, 3]
compressed_sorted = len(gzip.compress(data_sorted.tobytes()))

print(f"Original: {compressed_original} bytes")  # Example: 18 bytes
print(f"Sorted: {compressed_sorted} bytes")     # Example: 16 bytes (smaller!)

Original: 26 bytes
Sorted: 26 bytes


In [5]:
import numpy as np
import zstandard as zstd

# Example data (modify this to test different cases)
data = np.array([3, 1, 4, 1, 5, 9, 2, 6], dtype=np.int8)  # Original array
sorted_data = np.sort(data)  # Sorted array: [1, 1, 2, 3, 4, 5, 6, 9]

# Initialize Zstd compressor
cctx = zstd.ZstdCompressor(level=3)

# Compress original and sorted arrays
compressed_original = len(cctx.compress(data.tobytes()))
compressed_sorted = len(cctx.compress(sorted_data.tobytes()))

print(f"Original: {compressed_original} bytes")  # Example output: 14 bytes
print(f"Sorted:   {compressed_sorted} bytes")   # Example output: 12 bytes

Original: 17 bytes
Sorted:   17 bytes


In [14]:
import numpy as np
import zstandard as zstd

# Case 1: Redundant data (sorting helps)
data = np.array([3, 1, 1, 2, 2, 2, 3, 1, 2, 2], dtype=np.int8)
compressed_original = len(zstd.ZstdCompressor().compress(data.tobytes()))  # 14 bytes
compressed_sorted = len(zstd.ZstdCompressor().compress(np.sort(data).tobytes()))  # 12 bytes
print(f"Redunant: {compressed_original}")

# Case 2: No redundancy (sorting doesn't help)
data = np.array([5, 7, 3, 4, 1, 9, 2, 8, 6, 10], dtype=np.int8)
compressed_original = len(zstd.ZstdCompressor().compress(data.tobytes()))  # 11 bytes
compressed_sorted = len(zstd.ZstdCompressor().compress(np.sort(data).tobytes()))  # 11 bytes
print(f"Nonredundant: {compressed_original}")

Redunant: 19
Nonredundant: 19
