In [1]:
# =========================
# Imports, seed, device
# =========================
import os, struct, random
import numpy as np
import torch
from sklearn.preprocessing import Normalizer

seed = 42
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(seed); np.random.seed(seed); random.seed(seed)
if device.type == "cuda":
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
# =========================
# Parameters
# =========================
DATASET       = "UCIHAR"   # "MNIST" | "UCIHAR" | "ISOLET"
Dataset_Split = "non-IID"      # "IID"   | "non-IID"
CLIENTS       = 100  # number of clients; for non-IID each client gets ≈2 classes (some 3 if needed).
                     # If nClasses < 2*CLIENTS, class assignments will repeat across clients.
EPSILON       = 0.5
DELTA_Coef    = 1e-3       # δ = DELTA_Coef / N_client
D             = 2000       # hypervector dimension
ROUND         = 30         # number of communication round
nClasses      = None       # auto-set from file unless you override

In [3]:
# =========================
# Data loader (.choir_dat)
# =========================
def read_choir_file(filepath):
    with open(filepath, 'rb') as f:
        nFeatures = struct.unpack('i', f.read(4))[0]
        nClasses_ = struct.unpack('i', f.read(4))[0]
        X, y = [], []
        while True:
            try:
                vec = [struct.unpack('f', f.read(4))[0] for _ in range(nFeatures)]
                lab = struct.unpack('i', f.read(4))[0]
                X.append(vec); y.append(lab)
            except Exception:
                break
    return nFeatures, nClasses_, np.array(X), np.array(y)

In [4]:
# =========================
# Load + normalize
# =========================
train_path = os.path.join("Dataset", f"{DATASET}_train.choir_dat")
test_path  = os.path.join("Dataset", f"{DATASET}_test.choir_dat")
nF1, nC1, X_train, y_train = read_choir_file(train_path)
nF2, nC2, X_test,  y_test  = read_choir_file(test_path)
assert nF1 == nF2 and nC1 == nC2, "Feature or class count mismatch train/test"

# L2 normalize (fit on train)
norm = Normalizer(norm="l2")
X_train = norm.fit_transform(X_train)
X_test  = norm.transform(X_test)

# Set nClasses if not provided
if nClasses is None:
    nClasses = int(nC1)

print(f"{DATASET} loaded. Train {X_train.shape}, Test {X_test.shape}, Classes {nClasses}")

UCIHAR loaded. Train (7352, 561), Test (2947, 561), Classes 6


In [5]:
# =========================
# Split to clients (IID or non-IID)
# =========================
all_traindata = {}

if Dataset_Split == "IID":
    idx = np.arange(len(X_train))
    np.random.shuffle(idx)  # random but still IID
    Xs = X_train[idx]; ys = y_train[idx]
    per = len(Xs) // CLIENTS
    for k in range(CLIENTS):
        s, e = k*per, (k+1)*per
        all_traindata[str(k+1)] = {"traindata": Xs[s:e], "trainlabels": ys[s:e]}

elif Dataset_Split == "non-IID":
    # Each client gets ~2 classes (some may get 3 if needed)
    classes = np.arange(nClasses)
    np.random.shuffle(classes)

    # How many classes per client: start with 2 each
    classes_per_client = [2] * CLIENTS
    leftover = nClasses - 2 * CLIENTS
    i = 0
    while leftover > 0:
        classes_per_client[i] += 1
        leftover -= 1
        i = (i + 1) % CLIENTS

    # Assign distinct class IDs to clients (wrap around if CLIENTS > nClasses)
    assigned = []
    ptr = 0
    for k in range(CLIENTS):
        take = classes_per_client[k]
        pick = []
        for _ in range(take):
            pick.append(classes[ptr % nClasses])
            ptr += 1
        assigned.append(pick)

    # Collect indices for each class
    class_to_idx = {c: np.where(y_train == c)[0] for c in range(nClasses)}
    for c in class_to_idx:
        np.random.shuffle(class_to_idx[c])

    # Build client datasets: draw equal portions from each assigned class
    per_client = len(X_train) // CLIENTS  # aim for balanced client sizes
    for k in range(CLIENTS):
        clist = assigned[k]
        take_each = max(1, per_client // len(clist))
        idxs = []
        for c in clist:
            pool = class_to_idx[c]
            take = min(take_each, len(pool))
            idxs.extend(pool[:take])
            class_to_idx[c] = pool[take:]
        idxs = np.array(idxs, dtype=int)
        np.random.shuffle(idxs)
        all_traindata[str(k+1)] = {"traindata": X_train[idxs], "trainlabels": y_train[idxs]}
else:
    raise ValueError("Dataset_Split must be 'IID' or 'non-IID'")

In [6]:
#Comoute the diffenrtial privacy nosie for the first clients in the first round
def Noise_first_round(chv,eps,D,len_data,DELTA_Coef):
    
    std_noise = np.sqrt(2*D*np.log((1.25*len_data)/DELTA_Coef))/eps
    
    class_noisy = torch.zeros(chv.shape, dtype=torch.float32).device()
    for i_class in range(len(chv)):
        class_noisy[i_class] = chv[i_class] + torch.normal(mean=0, std = std_noise, size=(D,)).device()

    return class_noisy 

In [7]:
# =========================
# DP noise schedules (paper theorems)
# =========================
def Noise_first_round(chv, eps, Ddim, N_client, delta_coef):
    # Theorem 2: Γ_1^1 ~ N(0, 2D/ε^2 * ln[1.25 N / δ0])
    std = np.sqrt((2.0 * Ddim * np.log((1.25 * N_client) / delta_coef))) / eps
    noise = torch.normal(0.0, std, size=chv.shape, device=device)
    return chv + noise


In [8]:
def Pert_Noise(chv, eps, Ddim, rnd, K, k_idx):
    # Theorem 5: Γ_k^r ~ N(0, 2D/ε^2 * ln[(K(r-1)+k)/(K(r-1)+k-1)])
    num = (rnd - 1) * K + k_idx
    den = (rnd - 1) * K + (k_idx - 1)
    std = np.sqrt((2.0 * Ddim * np.log(num / den))) / eps
    noise = torch.normal(0.0, std, size=chv.shape, device=device)
    return chv + noise

In [9]:
# =========================
# HD encode, train, update, infer
# =========================
def hd_encode(X, basis):
    # X: [N, nFeatures], basis: [nFeatures, D] -> cos(X @ basis) -> [N, D]
    return torch.cos(X @ basis)

def Train_HD(data, labels, basis, nClasses_, Ddim, eps, clt_num, K, delta_coef):
    # Build local class HVs, then add DP noise for round 1 at client clt_num
    H = hd_encode(data, basis)  # [N, D]
    class_hvs = torch.zeros((nClasses_, Ddim), dtype=torch.float32, device=device)
    class_hvs.index_add_(0, labels, H)  # sum rows by class index

    if clt_num == 1:
        class_noisy = Noise_first_round(class_hvs, eps, Ddim, len(data), delta_coef)
    else:
        class_noisy = Pert_Noise(class_hvs, eps, Ddim, 1, K, clt_num)
    return class_noisy

def Update_HD(chv, data, labels, basis, eps, Ddim, rnd, K, clt_num):
    # Perceptron-like correction with received secure model, then add Γ_k^r
    H = hd_encode(data, basis)
    order = list(range(len(data)))
    random.shuffle(order)
    for i in order:
        guess = (chv @ H[i]).argmax()
        if guess != labels[i]:
            chv[labels[i]] += H[i]
            chv[guess]      -= H[i]
    chv = Pert_Noise(chv, eps, Ddim, rnd, K, clt_num)
    return chv

@torch.no_grad()
def Infer(chv, Xte, yte, basis):
    Xt  = torch.tensor(Xte, dtype=torch.float32, device=device)
    yt  = torch.tensor(yte, dtype=torch.long,    device=device)
    Ht  = hd_encode(Xt, basis)
    pred = (Ht @ chv.T).argmax(dim=1)
    return (pred == yt).float().mean().item()

In [10]:
# =========================
# Build basis and run PrivateDFL
# =========================
nFeatures = X_train.shape[1]
basis = torch.randn((nFeatures, D), dtype=torch.float32, device=device)
class_hvs = torch.zeros((nClasses, D), dtype=torch.float32, device=device)

# ---- Round 1: sequential clients build secure model
for clt_num, client in enumerate(all_traindata, start=1):
    train_data   = all_traindata[client]['traindata'].reshape(-1, nFeatures)
    train_labels = all_traindata[client]['trainlabels']
    Xc = torch.tensor(train_data,   dtype=torch.float32, device=device)
    yc = torch.tensor(train_labels, dtype=torch.long,    device=device)
    client_hvs = Train_HD(Xc, yc, basis, nClasses, D, EPSILON, clt_num, CLIENTS, DELTA_Coef)
    class_hvs += client_hvs

acc = Infer(class_hvs, X_test, y_test, basis)
print(f" Round : 1/{ROUND}, Test Accuracy: {acc*100:.2f}%")

# ---- Rounds 2..R: each client updates in sequence (decentralized pass-through)
for rnd in range(2, ROUND + 1):
    for clt_num, client in enumerate(all_traindata, start=1):
        train_data   = all_traindata[client]['traindata'].reshape(-1, nFeatures)
        train_labels = all_traindata[client]['trainlabels']
        Xc = torch.tensor(train_data,   dtype=torch.float32, device=device)
        yc = torch.tensor(train_labels, dtype=torch.long,    device=device)
        class_hvs = Update_HD(class_hvs, Xc, yc, basis, EPSILON, D, rnd, CLIENTS, clt_num)
    acc = Infer(class_hvs, X_test, y_test, basis)
    if rnd%10==0:
        print(f" Round : {rnd}/{ROUND}, Test Accuracy: {acc*100:.2f}%")

print(f" Final Round, Test Accuracy: {acc*100:.2f}%")

 Round : 1/30, Test Accuracy: 41.84%
 Round : 10/30, Test Accuracy: 86.63%
 Round : 20/30, Test Accuracy: 88.26%
 Round : 30/30, Test Accuracy: 91.38%
 Final Round, Test Accuracy: 91.38%
