
# Boosted ECCT: Boosting Notebook
**Purpose:** Load ECCT baseline models generated by the Training Notebook, collect uncorrected (UC) vectors, perform boosted post-stage fine-tuning, run block-wise schedules, evaluate BER/FER, and (optionally) export Uncor.txt.

**Instructions:** Switch the Colab runtime to GPU if possible (otherwise, switch 'device' in CONFIG to 'cpu'). Edit the CONFIG cell below before running any cells to select the code (LDPC or POLAR), blocklength, and other hyperparameters. Then run the notebook from top to bottom. Note that his notebook is configured to boost one transformer model at a time for a specific code, such as for LDPC(121, 60) or for POLAR(64, 32).



In [None]:
# CONFIG --- edit these values before running ---

from datetime import datetime
RUN_TIMESTAMP = datetime.now().strftime("%d%b%Y_%H%M")

# Define CONFIG with fixed and independent values
CONFIG = {
    'code_type': 'LDPC',
    'code_n': 121,
    'code_k': 60,

    'ecct_repo': 'https://github.com/yoniLc/ECCT.git',
    'boost_repo': 'https://github.com/ghy1228/LDPC_Error_Floor.git',

    'epochs_base': 50,
    'epochs_post': 40,
    'target_uc': 3000,
    'uc_snr_db': 4.5,
    'eval_ebno_list': [1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0],
    'batch_size': 128,
    'uc_batch_size': 512,
    'post_batch_size': 64,
    'seed': 42,
    'device': 'cuda'
}

# Calculate 'workdir' after 'code_type', 'code_n', and 'code_k' are defined
# Workdir should be unique per experiment to avoid UC mixing
CONFIG['workdir'] = f"/content/{CONFIG['code_type']}_n{CONFIG['code_n']}_k{CONFIG['code_k']}_{RUN_TIMESTAMP}"

print("CONFIG:", CONFIG)

## Setup: Drive Mount, Clone Repos, and Locate Baseline Model

In [None]:
import os, sys, subprocess, glob, shutil
import torch, numpy as np, matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from pathlib import Path
from google.colab import drive

drive.mount('/content/drive')
drive_base_folder = "/content/drive/MyDrive/Colab_Models"

# Create temporary local workdir and clone repos
workdir = CONFIG['workdir']
os.makedirs(workdir, exist_ok=True)
os.chdir(workdir)

for repo_name, repo_url in [('ECCT', CONFIG['ecct_repo']), ('LDPC_Error_Floor', CONFIG['boost_repo'])]:
    if not os.path.isdir(repo_name):
        print(f'Cloning {repo_name}...')
        subprocess.run(['git', 'clone', repo_url, repo_name], check=False, stdout=subprocess.DEVNULL)

print('Installing Python packages...')
subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', 'einops', 'tqdm', 'matplotlib', 'scikit-learn'], check=False)

# Add ECCT directory to Python path for imports
sys.path.insert(0, os.path.join(workdir, 'ECCT'))

# Locate the Baseline Model on Drive
model_prefix = f"{CONFIG['code_type']}_n{CONFIG['code_n']}_k{CONFIG['code_k']}"
search_pattern = os.path.join(drive_base_folder, f"{model_prefix}*_ECCT_Boosted_Results")

# Find all matching Drive root folders (e.g., LDPC_n121_k60_...) and sort by creation time (newest first)
all_model_dirs = sorted(
    glob.glob(search_pattern),
    key=os.path.getmtime,
    reverse=True
)

if not all_model_dirs:
    raise RuntimeError(
        f"Could not find any baseline model folder on Drive with prefix: {model_prefix}"
    )

# The top-level folder on Drive (e.g., LDPC_n121_k60_18Nov2025_1424_ECCT_Boosted_Results)
drive_root_folder = all_model_dirs[0]

# Locate the specific model run directory within the root folder.
# The target directory starts with the code type and contains the 'best_model'.

code_sub_folder_prefix = f"{CONFIG['code_type'].upper()}__Code"

# Search for folders matching the code prefix directly inside the drive_root_folder
code_sub_folder_search = glob.glob(os.path.join(drive_root_folder, f"{code_sub_folder_prefix}*"))
code_sub_folder_search = list(filter(os.path.isdir, code_sub_folder_search)) # Filter to ensure only directories are included

if not code_sub_folder_search:
    raise RuntimeError(f"Could not find the expected code sub-folder with prefix '{code_sub_folder_prefix}' in {drive_root_folder}")

# Select the most recently created code sub-folder as the baseline model directory
baseline_model_dir = sorted(
    code_sub_folder_search,
    key=os.path.getmtime,
    reverse=True
)[0]

baseline_model_path = os.path.join(baseline_model_dir, "best_model")

if not os.path.exists(baseline_model_path):
    raise RuntimeError(f"'best_model' not found at expected location: {baseline_model_path}")

print("\n--- Baseline Model Loaded ---")
print("Drive Root Folder:", drive_root_folder)
print("Baseline Model Path:", baseline_model_path)

# Define the model_dir to use for saving local and Drive copies
local_model_dir = os.path.join(workdir, 'ECCT', os.path.basename(baseline_model_dir))
os.makedirs(local_model_dir, exist_ok=True)

# Copy baseline model to local workdir for processing
local_baseline_model_path = os.path.join(local_model_dir, 'best_model')
shutil.copy(baseline_model_path, local_baseline_model_path)
print("Baseline model copied locally for processing.")

## Core Imports and Code Object Setup

In [None]:
# Change directory to ECCT repo for local file access
os.chdir(os.path.join(CONFIG['workdir'], 'ECCT'))

# Core Imports from ECCT
from Main import ECC_Dataset, EbN0_to_std, set_seed
from Codes import Get_Generator_and_Parity, bin_to_sign, sign_to_bin
from Model import ECC_Transformer
import torch.serialization
from torch import nn

# Setup code object
class CodeObj: pass
code = CodeObj(); code.code_type = CONFIG['code_type']; code.k = CONFIG['code_k']; code.n = CONFIG['code_n']
G, H = Get_Generator_and_Parity(code, standard_form=False)
code.generator_matrix = torch.from_numpy(G).transpose(0,1).long()
code.pc_matrix = torch.from_numpy(H).long()
set_seed(CONFIG['seed'])

device = torch.device(CONFIG['device'] if torch.cuda.is_available() else 'cpu')

torch.serialization.add_safe_globals([ECC_Transformer])

# Load the baseline model (it will be copied for each sweep)
baseline_model = torch.load(local_baseline_model_path,
                           map_location='cpu',
                           weights_only=False)
model = baseline_model # Explicitly assign to 'model' for consistency with later cells

print("Core imports and code object setup complete.")

## Sanity check: show key ECCT files (Main.py, Model.py, Codes.py)

In [None]:

# List key files and print top of Main.py to confirm flags
os.chdir(CONFIG['workdir'] + '/ECCT')
print('Current dir:', os.getcwd())
!ls -la Main.py Model.py Codes.py || true
print('\n--- Main.py head ---\n')
!sed -n '1,240p' Main.py


## Helper: locate the saved baseline model (Main.py saves `best_model` under Results_ECCT)

In [None]:
# The baseline model has already been loaded from Drive and copied to a local directory.
# We need to ensure `model_dir` and `model_path` point to this local copy for subsequent steps.
# These variables (`local_model_dir`, `local_baseline_model_path`) were established in
# the 'Setup' section cells and should already be available.

# Assign the global 'model_dir' and 'model_path' to these pre-determined local paths.
model_dir = local_model_dir # e.g., /content/workdir/ECCT/LDPC__Code_n_121_k_60__...
model_path = local_baseline_model_path # e.g., /content/workdir/ECCT/LDPC__Code_n_121_k_60__.../best_model

# drive_base_folder and RUN_TIMESTAMP are already defined from CONFIG cell and setup cell.
# drive_destination_folder is assmebled here for saving new results to Drive.
drive_destination_folder = os.path.join(
    drive_base_folder,
    f"{CONFIG['code_type']}_n{CONFIG['code_n']}_k{CONFIG['code_k']}_{RUN_TIMESTAMP}_ECCT_Boosted_Results"
)

print("Using local model directory copied from Drive:", model_dir)
print("Model path:", model_path, "Exists?", os.path.exists(model_path))
print("Drive destination folder for current run results:", drive_destination_folder)

if not os.path.exists(model_path):
    raise RuntimeError(
        f"'best_model' not found at expected local location: {model_path}. "
        "This indicates an issue during the initial model copy from Google Drive."
    )

model_dir  # Show path for notebook cell output for verification purposes

## UC collection using ECCT dataset and saved model
This uses `ECC_Dataset` from Main.py and ECCT model API (forward + loss). It saves failing samples to a local `ECCT_uc_dataset/` folder.

In [None]:

# UC collection
os.chdir(os.path.join(CONFIG['workdir'], 'ECCT'))
set_seed(CONFIG['seed'])

model.to(device).eval()
print(f"Using existing model for UC collection, moved to device: {device}")

# Recreate code object & parity/generator matrices using Codes.py helper
class CodeObj: pass
code = CodeObj(); code.code_type = CONFIG['code_type']; code.k = CONFIG['code_k']; code.n = CONFIG['code_n']
G, H = Get_Generator_and_Parity(code, standard_form=False)
code.generator_matrix = torch.from_numpy(G).transpose(0,1).long()
code.pc_matrix = torch.from_numpy(H).long()

sigma = EbN0_to_std(CONFIG['uc_snr_db'], code.k / code.n)
dataset = ECC_Dataset(code, [sigma], len=CONFIG['target_uc'] * 4, zero_cw=False)
loader = DataLoader(dataset, batch_size=CONFIG['uc_batch_size'], shuffle=False, num_workers=2)

uc_dir = Path(os.path.join(CONFIG['workdir'], 'ECCT_uc_dataset'))
uc_dir.mkdir(parents=True, exist_ok=True)

collected = 0; TARGET = CONFIG['target_uc']
from Codes import bin_to_sign as codes_bin_to_sign
for batch_idx, (m, x, z, y, magnitude, syndrome) in enumerate(loader):
    magnitude = magnitude.to(device); syndrome = syndrome.to(device); y = y.to(device); x = x.to(device)
    with torch.no_grad():
        z_pred = model(magnitude, syndrome)
        loss, x_pred = model.loss(-z_pred, (y * codes_bin_to_sign(x)).to(device), y)
    x_pred_cpu = x_pred.cpu().long(); x_cpu = x.cpu().long()
    failed_mask = (x_pred_cpu != x_cpu).any(dim=1)
    num_fail = int(failed_mask.sum().item())
    if num_fail > 0:
        failed_indices = torch.nonzero(failed_mask).squeeze().tolist()
        if isinstance(failed_indices, int):
            failed_indices = [failed_indices]
        for idx_local, i in enumerate(failed_indices):
            idx_global = collected + idx_local
            # When saving UC samples, ensure canonical format
            sample = {
                'm': m[i].cpu().long().contiguous(),
                'x': x[i].cpu().long().contiguous(),
                'y': y[i].cpu().float().contiguous(),
                'magnitude': magnitude[i].cpu().float().contiguous(),
                'syndrome': syndrome[i].cpu().float().contiguous()
            }
            torch.save(sample, uc_dir / f'uc_{idx_global}.pt')
        collected += num_fail
    print(f'Batch {batch_idx}: collected {collected}/{TARGET}', end='\r')
    if collected >= TARGET:
        break

print('\nUC collection finished. Saved to', uc_dir)

# Copy UC dataset to Google Drive
drive_uc_dir = os.path.join(drive_destination_folder, 'ECCT_uc_dataset')
try:
    shutil.copytree(str(uc_dir), drive_uc_dir, dirs_exist_ok=True)
    print(f"Successfully copied UC dataset to GDrive: {drive_uc_dir}")
except Exception as e:
    print(f"Error copying UC dataset to GDrive: {e}")


## Optional: Export Uncor.txt for LDPC_Error_Floor (so TensorFlow post script can be run if desired)

In [None]:

# Export Uncor.txt compatible with LDPC_Error_Floor expected format
uc_dir = Path(os.path.join(CONFIG['workdir'], 'ECCT_uc_dataset'))
out_dir = Path(os.path.join(CONFIG['workdir'], 'LDPC_Error_Floor', 'Inputs'))
out_dir.mkdir(parents=True, exist_ok=True)
uncor_file = out_dir / "[Uncor]_ecct_uc.txt"
count = 0
with open(uncor_file, 'w') as f:
    for p in sorted(uc_dir.glob('uc_*.pt')):
        d = torch.load(p)
        y = d.get('y', None)
        if y is None:
            continue
        y_np = y.numpy().reshape(-1)
        # Write header zeros then -y as in Print_Functions.write_uncor_file
        row = np.concatenate((np.zeros(3), -y_np))
        f.write('\t'.join([f'{v:.3f}' for v in row]) + '\n')
        count += 1
print(f'Wrote {count} lines to', uncor_file)

# Assuming 'drive_destination_folder' is defined from the setup cell
if 'drive_destination_folder' in locals() or 'drive_destination_folder' in globals():
    # Create a subfolder for these inputs in the Drive directory for neatness
    drive_uncor_dir = os.path.join(drive_destination_folder, 'LDPC_Error_Floor_Inputs')
    os.makedirs(drive_uncor_dir, exist_ok=True)
    drive_uncor_file = os.path.join(drive_uncor_dir, os.path.basename(uncor_file))

    try:
        shutil.copy(uncor_file, drive_uncor_file)
        print(f"Successfully copied Uncor.txt to GDrive: {drive_uncor_file}")
    except Exception as e:
        print(f"Error copying Uncor.txt to GDrive: {e}. Check if drive_destination_folder is correctly defined.")
else:
    print("Skipping GDrive copy: 'drive_destination_folder' variable is missing.")


In [None]:
# ---------- TRANSFER LEARNING CELL ----------
import torch, os
from torch.serialization import add_safe_globals

# Make sure the ECC_Transformer class is available for safe loads if needed
try:
    from Model import ECC_Transformer
    add_safe_globals([ECC_Transformer])
except Exception as e:
    print("Model class safe-globals not registered (ok if loading state_dict only):", e)

transfer_model_path = None  # <-- set to desired path or None

if transfer_model_path and os.path.exists(transfer_model_path):
    print("Loading pretrained (weights_only=True recommended) for transfer:", transfer_model_path)
    pretrained = torch.load(transfer_model_path, map_location='cpu', weights_only=False)
    pre_sd = {}
    if hasattr(pretrained, 'state_dict'):
        pre_sd = pretrained.state_dict()
    else:
        # pretrained might be a raw state-dict saved with torch.save(model.state_dict())
        pre_sd = pretrained

    cur_sd = model.state_dict()
    # copy matching keys only
    for k in cur_sd.keys():
        if k in pre_sd and pre_sd[k].shape == cur_sd[k].shape:
            cur_sd[k] = pre_sd[k].clone()
    model.load_state_dict(cur_sd)


    print(f"Transfer complete.")
else:
    print("No transfer model provided or file not found. Skipping transfer.")


In [None]:
# PER-ITERATION SCALAR WEIGHTS
# Wrapper around a trained ECC_Transformer baseline model.
# It uses small per-layer parameters w_bar (channel), w_sc (satisfied-CN), w_uc (unsatisfied-CN).

import torch.nn as nn

class ECCT_ScalarShared(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        if isinstance(base_model, ECCT_ScalarShared):
            base_model = base_model.base
        self.base = base_model

        self.src_embed = base_model.src_embed
        self.oned_final_embed = base_model.oned_final_embed
        self.out_fc = base_model.out_fc
        if hasattr(base_model, 'src_mask'):
            self.register_buffer('src_mask', base_model.src_mask.clone())

        # keep decoder and layers
        self.decoder = base_model.decoder
        self.decoder_layers = base_model.decoder.layers       # list[EncoderLayer]
        self.L = len(self.decoder_layers)


        # small per-iteration scalars (initialize to 1)
        # paper initializes weights to 1 (so initial decoder is MS-equivalent)
        self.w_bar = nn.Parameter(torch.ones(self.L))  # applied to VN (channel) positions
        self.w_sc  = nn.Parameter(torch.ones(self.L))  # CN weight for satisfied CNs
        self.w_uc  = nn.Parameter(torch.ones(self.L))  # CN weight for unsatisfied CNs

    def forward(self, magnitude, syndrome):
        # magnitude: (B, n), syndrome: (B, m)
        B, n = magnitude.shape
        m = syndrome.shape[1]
        seq_len = n + m
        x = torch.cat([magnitude, syndrome], dim=-1).unsqueeze(-1)  # (B, seq_len, 1)
        emb = self.src_embed.unsqueeze(0) * x                      # (B, seq_len, d)
        out = emb

        for ell in range(self.L):
            device = out.device
            wm = torch.ones((B, seq_len, 1), dtype=out.dtype, device=device)

            # VN scale
            wm[:, :n, :] *= float(self.w_bar[ell].item())

            # CN mask
            cn_mask = (syndrome.abs() > 0).float().unsqueeze(-1)

            # SCN default weight
            scw = float(self.w_sc[ell].item())
            ucw = float(self.w_uc[ell].item())

            cn_default = torch.full((B, m, 1), scw, dtype=out.dtype, device=device)
            cn_default[cn_mask.bool()] = ucw
            wm[:, n:, :] = cn_default

            out = out * wm
            out = self.decoder_layers[ell](out, self.src_mask)

        logits = self.out_fc(self.oned_final_embed(out).squeeze(-1))
        return logits
    def loss(self, *args, **kwargs):
        return self.base.loss(*args, **kwargs)



# Replace model
wrapped = ECCT_ScalarShared(model)
model = wrapped.to(device)  # continue using `model` variable


print("Dynamic weight sharing applied.")


In [None]:
# ---------- DATA AUGMENTATION / IMPORTANCE SAMPLING ----------
from Codes import bin_to_sign, sign_to_bin

beta = 0.7   # boosting paper suggests 0.7
D = 10       # augmentations per UC vector

def augment_uc_vector(magnitude, syndrome, x_true, y, H, sigma=None):
    """
    Inputs: 1-D tensors:
      magnitude (n,), syndrome (m,), x_true (n,), y (n,)
      H: parity-check matrix (m x n) as torch.LongTensor
      sigma: noise std used during UC collection (required for correct AWGN generation)

    Returns list of D tuples: (mag_aug, syn_aug, x_true, y_aug)
    """
    if not isinstance(H, torch.Tensor):
        H = torch.from_numpy(H).long()

    n = magnitude.shape[0]

    # Identify error positions E
    pred_bits = (torch.sign(y) < 0).long()
    E = (pred_bits != x_true.long()).nonzero(as_tuple=False).flatten().tolist()

    if sigma is None:
        raise ValueError("augment_uc_vector now requires sigma (AWGN std).")

    augmented = []
    for _ in range(D):

        # Generate a fresh AWGN sample
        # This is the channel model: y = sign(x) + N(0, sigma^2)
        noise = torch.randn_like(y) * sigma
        y_aug = bin_to_sign(x_true) + noise

        # Apply importance sampling bias toward error positions
        if len(E) > 0:
            y_aug[E] -= beta

        # Rebuild magnitude, bits, and syndrome
        mag_aug = torch.abs(y_aug)
        bits = sign_to_bin(torch.sign(y_aug)).long().unsqueeze(0)  # (1,n)
        syn = (bits @ H.t()) % 2
        syn = bin_to_sign(syn.squeeze(0))

        augmented.append((mag_aug, syn, x_true.clone(), y_aug))

    return augmented


## Post-stage fine-tune (PyTorch) on UC dataset
This cell fine-tunes the loaded ECCT model on UC samples (adds small FER surrogate); uses block-wise freezing by default.

In [None]:
# POST-STAGE FINE-TUNE â€” BOOSTING + BLOCK-WISE
# Assuming a 6-layer ECCT Transformer model is provided (generalized for any L)

# ---------- BUILD AUGMENTED UC DATASET ----------
uc_dir = Path(os.path.join(CONFIG['workdir'], 'ECCT_uc_dataset'))
files = sorted([str(p) for p in uc_dir.glob('uc_*.pt')])
print("UC files found:", len(files))
if len(files) == 0:
    raise RuntimeError("No UC samples found; run UC collection first.")

# get H
class C: pass
code = C()
code.code_type = CONFIG['code_type']; code.n = CONFIG['code_n']; code.k = CONFIG['code_k']
G, H = Get_Generator_and_Parity(code, standard_form=False)
H = torch.from_numpy(H).long()

augmented_files = []
for f in files:
    d = torch.load(f)
    mag, syn, x, y = d['magnitude'], d['syndrome'], d['x'], d['y']
    sigma = EbN0_to_std(CONFIG['uc_snr_db'], code.k / code.n)

    aug_list = augment_uc_vector(mag, syn, x, y, H, sigma)
    for j, (mag_aug, syn_aug, x_aug, y_aug) in enumerate(aug_list):
        out = {'magnitude': mag_aug, 'syndrome': syn_aug, 'x': x_aug, 'y': y_aug}
        out_path = uc_dir / f"aug_{Path(f).stem}_{j}.pt"
        torch.save(out, out_path)
        augmented_files.append(str(out_path))

print("Augmented UC vectors generated:", len(augmented_files))

# ---------- BLOCK-WISE POST-STAGE FINE-TUNE ----------
uc_dir = Path(os.path.join(CONFIG['workdir'], 'ECCT_uc_dataset'))
files_uc = sorted([str(p) for p in uc_dir.glob('uc_*.pt')])
files_aug = sorted([str(p) for p in uc_dir.glob('aug_*.pt')])
files = files_uc + files_aug
assert len(files) > 0, "No UC samples. Run UC collection or augmentation first."

class UCDataset(Dataset):
    def __init__(self, files): self.files = files
    def __len__(self): return len(self.files)
    def __getitem__(self, idx):
        d = torch.load(self.files[idx])
        return d['magnitude'], d['syndrome'], d['x'], d['y']

def collate_uc(batch):
    mags, syns, xs, ys = zip(*batch)
    mags = torch.stack([torch.as_tensor(t).float() for t in mags], 0)
    syns = torch.stack([torch.as_tensor(t).float() for t in syns], 0)
    xs   = torch.stack([torch.as_tensor(t).long()  for t in xs], 0)
    ys   = torch.stack([torch.as_tensor(t).float() for t in ys], 0)
    return mags, syns, xs, ys

ds = UCDataset(files)
loader = DataLoader(ds, batch_size=CONFIG['post_batch_size'], shuffle=True, num_workers=0, collate_fn=collate_uc)

# block-wise schedule parameters (paper suggests DELTA1=5, DELTA2=10)
DELTA1 = 5   # iter_step
DELTA2 = 10  # retrain window length
ITERS_MAX = len(model.decoder.layers)  # this is L (conceptual mapping)
EPOCHS_PER_BLOCK = max(1, CONFIG.get('epochs_post', 3))  # adapt
LR = 1e-4

training_iter_start = ITERS_MAX // 2
training_iter_end   = min(training_iter_start + DELTA1, ITERS_MAX)

stages = []
while training_iter_start < ITERS_MAX:
    retrain_start = max(training_iter_start,
                        training_iter_end - DELTA2)
    stages.append(list(range(retrain_start, training_iter_end)))

    training_iter_start += DELTA1
    training_iter_end = min(training_iter_start + DELTA1, ITERS_MAX)

print("Stages:", stages)


# helper to set trainable layers (freeze others)
def set_trainable_layers(model, train_layer_indices):
    for i, layer in enumerate(model.decoder.layers):
        requires = (i in train_layer_indices)
        for p in layer.parameters():
            p.requires_grad = requires

from Codes import bin_to_sign as codes_bin_to_sign
for stage_idx, stage in enumerate(stages):
    print(f"=== Stage {stage_idx+1}/{len(stages)}: training layers {stage} ===")
    set_trainable_layers(model, stage)
    opt = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR)
    for ep in range(EPOCHS_PER_BLOCK):
        tot_loss = 0.0
        for magnitude, syndrome, x_true, y in loader:
            magnitude = magnitude.to(CONFIG['device']); syndrome = syndrome.to(CONFIG['device'])
            x_true = x_true.to(CONFIG['device']); y = y.to(CONFIG['device'])
            z_mul = (y * codes_bin_to_sign(x_true)).to(CONFIG['device'])
            z_pred = model(magnitude, syndrome)
            loss_bce, x_pred = model.loss(-z_pred, z_mul, y)
            fer_surr = (x_pred != x_true).any(dim=1).float().mean()
            loss = fer_surr + 0.01 * loss_bce
            opt.zero_grad(); loss.backward(); opt.step()
            tot_loss += loss.item() * magnitude.size(0)
        print(f" Stage {stage_idx} epoch {ep+1}/{EPOCHS_PER_BLOCK} avg_loss={tot_loss/len(ds):.4e}")

# final polish: unfreeze all and small lr training
for p in model.parameters(): p.requires_grad = True
opt = torch.optim.Adam(model.parameters(), lr=LR/10)
for ep in range(2):
    tot_loss = 0.0
    for magnitude, syndrome, x_true, y in loader:
        magnitude = magnitude.to(CONFIG['device']); syndrome = syndrome.to(CONFIG['device'])
        x_true = x_true.to(CONFIG['device']); y = y.to(CONFIG['device'])
        z_mul = (y * codes_bin_to_sign(x_true)).to(CONFIG['device'])
        z_pred = model(magnitude, syndrome)
        loss_bce, x_pred = model.loss(-z_pred, z_mul, y)
        fer_surr = (x_pred != x_true).any(dim=1).float().mean()
        loss = fer_surr + 0.01 * loss_bce
        opt.zero_grad(); loss.backward(); opt.step()
        tot_loss += loss.item() * magnitude.size(0)
    print(f" Final polish epoch {ep+1} avg_loss={tot_loss/len(ds):.4e}")

# Save boosted model
boosted_model_path = os.path.join(model_dir, 'boosted_model.pt')
torch.save(model, boosted_model_path)
print('Saved boosted model to', boosted_model_path)
# try copying to drive:
try:
    shutil.copy(boosted_model_path, os.path.join(drive_destination_folder, 'boosted_model.pt'))
    print("Copied boosted model to drive.")
except Exception as e:
    print("Drive copy skipped/error:", e)



## Evaluation: BER & FER plots for baseline vs boosted

In [None]:
from Codes import Get_Generator_and_Parity, bin_to_sign

# Prepare code object
class CodeObj: pass
code = CodeObj(); code.code_type = CONFIG['code_type']; code.k = CONFIG['code_k']; code.n = CONFIG['code_n']
G,H = Get_Generator_and_Parity(code, standard_form=False)
code.generator_matrix = torch.from_numpy(G).transpose(0,1).long(); code.pc_matrix = torch.from_numpy(H).long()


def evaluate_model(model_obj, ebno_list, samples_per_point=3000):
    device_eval = torch.device(CONFIG['device'] if torch.cuda.is_available() else 'cpu')
    model_obj = model_obj.to(device_eval)
    model_obj.eval()

    results = {'eb': [], 'BER': [], 'FER': []}

    for eb in ebno_list:
        sigma = EbN0_to_std(eb, code.k/code.n)
        # Assuming ECC_Dataset and DataLoader (with collate_fn) handle variable/fixed code length correctly
        ds = ECC_Dataset(code, [sigma], len=samples_per_point, zero_cw=False)

        loader = DataLoader(ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0) # Set to 0 for stability

        total_bits = 0; bit_errors = 0; total_frames = 0; frame_errors = 0
        with torch.no_grad():
            # The structure of the yielded batch depends on ECC_Dataset implementation
            for m, x, z, y, magnitude, syndrome in loader:
                magnitude = magnitude.to(device_eval); syndrome = syndrome.to(device_eval); x = x.to(device_eval)
                y = y.to(device_eval) # Move y to the correct device
                z_pred = model_obj(magnitude, syndrome)
                _, x_pred = model_obj.loss(-z_pred, (y * bin_to_sign(x)), y)
                x_pred = x_pred.cpu().long(); x_cpu = x.cpu().long()
                total_frames += x_cpu.size(0)
                frame_errors += (x_pred != x_cpu).any(dim=1).sum().item()
                bit_errors += (x_pred != x_cpu).sum().item()
                total_bits += x_cpu.numel()
        ber = bit_errors / total_bits; fer = frame_errors / total_frames
        results['eb'].append(eb); results['BER'].append(ber); results['FER'].append(fer)
        print(f"Eb/N0={eb}: BER={ber:.3e}, FER={fer:.3e}")
    return results


# Load models
if 'model_dir' not in locals() and 'model_dir' not in globals() or not model_dir:
    print("ERROR: 'model_dir' is not defined. Please run the Helper cell first.")
    # Fallback to finding the latest dir if model_dir is somehow lost
    res_dirs = sorted(glob.glob(os.path.join(CONFIG['workdir'], 'ECCT', 'Results_ECCT', '*')), key=os.path.getmtime, reverse=True)
    if len(res_dirs) > 0:
        model_dir = res_dirs[0]
    else:
        raise EnvironmentError("Cannot find 'model_dir' or any results folder.")


baseline_model_path = os.path.join(model_dir, 'best_model')
boosted_model_path = os.path.join(model_dir, 'boosted_model.pt')

device = torch.device(CONFIG['device'] if torch.cuda.is_available() else 'cpu')
print('Loading model from', baseline_model_path, 'to', device)

baseline_model = torch.load(baseline_model_path,
                           map_location=device,
                           weights_only=False)
print('Loading model from', boosted_model_path, 'to', device)

boosted_model = torch.load(boosted_model_path,
                           map_location=device,
                           weights_only=False)


# Evaluation
res_base = evaluate_model(baseline_model, CONFIG['eval_ebno_list'], samples_per_point=2000)
res_boost = evaluate_model(boosted_model, CONFIG['eval_ebno_list'], samples_per_point=2000)

# --- DATA SAVING (NPZ) ---
# Create a dictionary for saving both results
data_to_save = {
    'baseline_eb': np.array(res_base['eb']),
    'baseline_BER': np.array(res_base['BER']),
    'baseline_FER': np.array(res_base['FER']),
    'boosted_eb': np.array(res_boost['eb']),
    'boosted_BER': np.array(res_boost['BER']),
    'boosted_FER': np.array(res_boost['FER']),
    'config': str(CONFIG) # Save config for context
}

data_filepath = os.path.join(model_dir, 'evaluation_results.npz')
np.savez(data_filepath, **data_to_save)
print(f"Saved evaluation data to: {data_filepath}")

# Ensure the drive destination folder exists before saving plots
os.makedirs(drive_destination_folder, exist_ok=True)

# --- PLOTTING ---
plt.figure(figsize=(6,4))
plt.semilogy(res_base['eb'], res_base['BER'], marker='o', label='BER baseline')
plt.semilogy(res_boost['eb'], res_boost['BER'], marker='o', label='BER boosted')
plt.xlabel('Eb/N0 (dB)'); plt.ylabel('BER'); plt.legend(); plt.grid(True)
# Save BER plot to Drive
ber_plot_path = os.path.join(drive_destination_folder, 'BER_baseline_vs_boosted.png')
plt.savefig(ber_plot_path)
print(f"Saved BER plot to GDrive: {ber_plot_path}")
plt.show()

plt.figure(figsize=(6,4))
plt.semilogy(res_base['eb'], res_base['FER'], marker='x', label='FER baseline')
plt.semilogy(res_boost['eb'], res_boost['FER'], marker='x', label='FER boosted')
plt.xlabel('Eb/N0 (dB)'); plt.ylabel('FER'); plt.legend(); plt.grid(True)
# Save FER plot to Drive
fer_plot_path = os.path.join(drive_destination_folder, 'FER_baseline_vs_boosted.png')
plt.savefig(fer_plot_path)
print(f"Saved FER plot to GDrive: {fer_plot_path}")
plt.show()

# --- GDrive Copy for NPZ file ---
if 'drive_destination_folder' in locals() or 'drive_destination_folder' in globals():
    drive_data_filepath = os.path.join(drive_destination_folder, os.path.basename(data_filepath))
    try:
        # Copy the newly created .npz file
        import shutil
        shutil.copy(data_filepath, drive_data_filepath)
        print(f"Successfully copied evaluation data to GDrive: {drive_data_filepath}")
    except Exception as e:
        print(f"Error copying evaluation data to GDrive: {e}")