In [None]:
import os, random, struct, zlib
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
import shutil


try:
    from google.colab import files
    COLAB = True
except:
    COLAB = False

# --- Custom Display Function ---
def display_dataframe_to_user(title, df):
    print(f"\n## {title}")
    print(f"Shape: {df.shape}")
    display_df = df.head(5) if len(df) > 5 else df
    print("| " + " | ".join(display_df.columns) + " |")
    print("|" + "---|" * len(display_df.columns))
    for _, row in display_df.iterrows():
        row_values = [f"{v:.6f}" if isinstance(v, float) else str(v) for v in row.values]
        print("| " + " | ".join(row_values) + " |")
    print("-" * 20)

# --- Seed ---
random.seed(12345)
np.random.seed(12345)

# --- CRC Helper Functions ---
def reflect_bits(value, width):
    r = 0
    for i in range(width):
        if (value >> i) & 1:
            r |= 1 << (width - 1 - i)
    return r

def crc_bitwise(data: bytes, width: int, poly: int, init: int = 0, xorout: int = 0, reflect_in: bool = False, reflect_out: bool = False):
    topbit = 1 << (width - 1)
    mask = (1 << width) - 1
    reg = init & mask
    for b in data:
        byte = reflect_bits(b, 8) if reflect_in else b
        reg ^= (byte << (width - 8)) & mask
        for _ in range(8):
            if reg & topbit:
                reg = ((reg << 1) ^ poly) & mask
            else:
                reg = (reg << 1) & mask
    if reflect_out:
        reg = reflect_bits(reg, width)
    return (reg ^ xorout) & mask

crc_presets = {
    "CRC-8-ATM": {"width":8, "poly":0x07, "init":0x00, "xorout":0x00, "refin":False, "refout":False},
    "CRC-8-Dallas": {"width":8, "poly":0x31, "init":0x00, "xorout":0x00, "refin":True, "refout":True},
    "CRC-16-CCITT": {"width":16, "poly":0x1021, "init":0xFFFF, "xorout":0x0000, "refin":False, "refout":False},
    "CRC-16-MODBUS": {"width":16, "poly":0x8005, "init":0xFFFF, "xorout":0x0000, "refin":True, "refout":True},
    "CRC-32-IEEE": {"width":32, "poly":0x04C11DB7, "init":0xFFFFFFFF, "xorout":0xFFFFFFFF, "refin":True, "refout":True},
    "CRC-32C": {"width":32, "poly":0x1EDC6F41, "init":0xFFFFFFFF, "xorout":0xFFFFFFFF, "refin":True, "refout":True},
    "CRC-64-ECMA": {"width":64, "poly":0x42F0E1EBA9EA3693, "init":0x0000000000000000, "xorout":0x0000000000000000, "refin":False, "refout":False},
}

def compute_crc_for_preset(name, data_bytes):
    p = crc_presets[name]
    if name == "CRC-32-IEEE":
        return zlib.crc32(data_bytes) & 0xFFFFFFFF
    else:
        return crc_bitwise(data_bytes, width=p["width"], poly=p["poly"], init=p["init"], xorout=p["xorout"], reflect_in=p["refin"], reflect_out=p["refout"])

# --- Data Preparation ---
dataset_map = {}
iris = datasets.load_iris()
digits = datasets.load_digits()
wine = datasets.load_wine()
bc = datasets.load_breast_cancer()
Xsyn, ysyn = make_classification(n_samples=300, n_features=12, random_state=42)

dataset_map["iris"] = {"data": iris["data"], "target": iris["target"]}
dataset_map["digits"] = {"data": digits["data"], "target": digits["target"]}
dataset_map["wine"] = {"data": wine["data"], "target": wine["target"]}
dataset_map["breast_cancer"] = {"data": bc["data"], "target": bc["target"]}
dataset_map["synthetic"] = {"data": Xsyn, "target": ysyn}

def sample_to_bytes(sample: np.ndarray):
    if sample.dtype.kind in ('f',):
        return b"".join(struct.pack("<d", float(x)) for x in sample)
    else:
        return b"".join(struct.pack("<q", int(x)) for x in sample)

# --- Full CRC Computation ---
crc_values_store = {}
summary_rows = []
crc_test_list = list(crc_presets.keys())

for ds_name, ds in dataset_map.items():
    data = ds["data"]
    n = data.shape[0]
    crc_values_store[ds_name] = {}
    sample_indices = np.arange(n)

    for crc_name in crc_test_list:
        vals = []
        for i in sample_indices:
            b = sample_to_bytes(data[i])
            v = compute_crc_for_preset(crc_name, b)
            vals.append(int(v))

        vals_array = np.array(vals, dtype=object)
        crc_values_store[ds_name][crc_name] = vals_array

        unique_vals = [int(v) for v in np.unique(vals_array)]
        summary_rows.append({
            "dataset": ds_name,
            "crc": crc_name,
            "n_samples": n,
            "unique_crc_values": len(unique_vals)
        })

summary_df = pd.DataFrame(summary_rows)
display_dataframe_to_user("CRC summary (Full Run)", summary_df)

# --- Full Histograms ---
fig_dir_full = "/mnt/data/crc_figs_full"
os.makedirs(fig_dir_full, exist_ok=True)

for ds_name, crc_map in crc_values_store.items():
    plt.figure(figsize=(18,3))
    for i,(crc_name, vals) in enumerate(crc_map.items(), start=1):
        plt.subplot(1, len(crc_map), i)
        vals_for_hist = np.array([float(v) for v in vals])
        plt.hist(vals_for_hist, bins=25)
        plt.title(crc_name, fontsize=8)
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
        plt.xticks(rotation=45, ha='right', fontsize=7)
        plt.yticks(fontsize=7)
    plt.suptitle(f"CRC Value Distribution for {ds_name} Dataset", fontsize=10)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig(os.path.join(fig_dir_full, f"hist_{ds_name}.png"))
    plt.close()

# --- Error Models ---
def flip_random_bits(byte_arr: bytearray, n_bits=1):
    L = len(byte_arr) * 8
    if L == 0: return byte_arr
    for _ in range(n_bits):
        pos = random.randrange(0, L)
        byte_idx = pos // 8
        bit_idx = pos % 8
        byte_arr[byte_idx] ^= (1 << bit_idx)
    return byte_arr

def burst_error(byte_arr: bytearray, length_bits=8):
    L = len(byte_arr) * 8
    if L == 0: return byte_arr
    start = random.randrange(0, max(1, L - length_bits + 1))
    for pos in range(start, start + length_bits):
        if pos < L:
            byte_idx = pos // 8
            bit_idx = pos % 8
            byte_arr[byte_idx] ^= (1 << bit_idx)
    return byte_arr

def channel_bit_flip(byte_arr: bytearray, p=0.01):
    L = len(byte_arr) * 8
    for pos in range(L):
        if random.random() < p:
            byte_idx = pos // 8
            bit_idx = pos % 8
            byte_arr[byte_idx] ^= (1 << bit_idx)
    return byte_arr

# --- Full Simulation ---
trials = 120
bitflip_list = [1, 2, 3, 5]
burst_lengths = [4, 8, 16]
p_list = [0.001, 0.01]

sim_rows = []
crc_sim_list = list(crc_presets.keys())
out_dir_full = "/mnt/data/crc_results_full"
os.makedirs(out_dir_full, exist_ok=True)

for ds_name, ds in dataset_map.items():
    data = ds["data"]
    n = data.shape[0]
    sample_indices = np.arange(n)

    for crc_name in crc_sim_list:
        # Random K Bits
        for flips in bitflip_list:
            detected = 0
            for t in range(trials):
                idx = int(np.random.choice(sample_indices))
                orig = sample_to_bytes(data[idx])
                if not orig: continue
                orig_crc = compute_crc_for_preset(crc_name, orig)
                mutated = bytearray(orig)
                flip_random_bits(mutated, n_bits=flips)
                new_crc = compute_crc_for_preset(crc_name, bytes(mutated))
                if new_crc != orig_crc:
                    detected += 1
            sim_rows.append({"dataset": ds_name, "crc": crc_name, "model":"random_k_bits", "param":flips, "trials":trials, "detected":detected, "detection_rate":detected/trials})

        # Burst Error
        for b_len in burst_lengths:
            detected = 0
            for t in range(trials):
                idx = int(np.random.choice(sample_indices))
                orig = sample_to_bytes(data[idx])
                if not orig: continue
                orig_crc = compute_crc_for_preset(crc_name, orig)
                mutated = bytearray(orig)
                burst_error(mutated, length_bits=b_len)
                new_crc = compute_crc_for_preset(crc_name, bytes(mutated))
                if new_crc != orig_crc:
                    detected += 1
            sim_rows.append({"dataset": ds_name, "crc": crc_name, "model":"burst", "param":b_len, "trials":trials, "detected":detected, "detection_rate":detected/trials})

        # Channel Bit Flip
        for p_ch in p_list:
            detected = 0
            for t in range(trials):
                idx = int(np.random.choice(sample_indices))
                orig = sample_to_bytes(data[idx])
                if not orig: continue
                orig_crc = compute_crc_for_preset(crc_name, orig)
                mutated = bytearray(orig)
                channel_bit_flip(mutated, p=p_ch)
                new_crc = compute_crc_for_preset(crc_name, bytes(mutated))
                if new_crc != orig_crc:
                    detected += 1
            sim_rows.append({"dataset": ds_name, "crc": crc_name, "model":"channel_p", "param":p_ch, "trials":trials, "detected":detected, "detection_rate":detected/trials})

sim_df = pd.DataFrame(sim_rows)
display_dataframe_to_user("Simulation results (Full Run)", sim_df.head(20))

# --- Save Results to Excel using openpyxl ---
excel_path = os.path.join(out_dir_full, "crc_results_full.xlsx")
with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:
    sim_df.to_excel(writer, sheet_name="Simulation", index=False)
    summary_df.to_excel(writer, sheet_name="Summary", index=False)

# --- Zip Figures ---
shutil.make_archive("/mnt/data/crc_figs_full_zip", 'zip', fig_dir_full)

print("\nFull run complete.")
print("Excel file saved at:", excel_path)
print("Figures ZIP saved at:", "/mnt/data/crc_figs_full_zip.zip")

# --- Download in Colab if available ---
if COLAB:
    files.download(excel_path)
    files.download("/mnt/data/crc_figs_full_zip.zip")




## CRC summary (Full Run)
Shape: (35, 4)
| dataset | crc | n_samples | unique_crc_values |
|---|---|---|---|
| iris | CRC-8-ATM | 150 | 114 |
| iris | CRC-8-Dallas | 150 | 108 |
| iris | CRC-16-CCITT | 150 | 149 |
| iris | CRC-16-MODBUS | 150 | 149 |
| iris | CRC-32-IEEE | 150 | 149 |
--------------------

## Simulation results (Full Run)
Shape: (20, 7)
| dataset | crc | model | param | trials | detected | detection_rate |
|---|---|---|---|---|---|---|
| iris | CRC-8-ATM | random_k_bits | 1.000000 | 120 | 120 | 1.000000 |
| iris | CRC-8-ATM | random_k_bits | 2.000000 | 120 | 120 | 1.000000 |
| iris | CRC-8-ATM | random_k_bits | 3.000000 | 120 | 120 | 1.000000 |
| iris | CRC-8-ATM | random_k_bits | 5.000000 | 120 | 120 | 1.000000 |
| iris | CRC-8-ATM | burst | 4.000000 | 120 | 120 | 1.000000 |
--------------------

Full run complete.
Excel file saved at: /mnt/data/crc_results_full/crc_results_full.xlsx
Figures ZIP saved at: /mnt/data/crc_figs_full_zip.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>