In [1]:
%pip install torch==2.1.0 numpy==1.26.0 scikit-learn==1.3.0 matplotlib==3.8.0 pandas==2.1.0

Note: you may need to restart the kernel to use updated packages.


In [2]:
# # Import required libraries
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# import numpy as np
# import copy
# import os
# import glob
# from collections import deque
# from sklearn.preprocessing import MinMaxScaler

# # Set seed for reproducibility
# torch.manual_seed(7)
# np.random.seed(7)

# # Check for CUDA
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

# General imports
import sys
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler

# # Allow importing from utils directory if needed (not strictly required in this notebook since we define code inline)
# sys.path.append("utils/")

# Flag to use dummy data instead of real files (set False if real data is available)
USE_DUMMY_DATA = False

# Set a global random seed for reproducibility (if needed)
np.random.seed(7)


In [3]:
# Model architecture: 1D CNN for N-BaIoT data
class CNN(nn.Module):
    def __init__(self, output_len):
        """CNN model for intrusion detection. output_len is the number of output classes."""
        super().__init__()
        # Convolutional layers
        self.conv1 = nn.Conv1d(in_channels=23, out_channels=64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(64, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(64, 64, kernel_size=3, padding=1)
        self.dropoutcv2 = nn.Dropout(p=0.3)   # Dropout after 3rd conv
        self.conv4 = nn.Conv1d(64, 64, kernel_size=3, padding=1)
        self.conv5 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.conv6 = nn.Conv1d(128, 128, kernel_size=3, padding=1)
        self.conv7 = nn.Conv1d(128, 128, kernel_size=3, stride=2, padding=1)
        self.dropoutcv3 = nn.Dropout(p=0.3)   # Dropout after 7th conv
        self.conv8 = nn.Conv1d(128, 128, kernel_size=3, stride=2, padding=1)
        self.dropoutcv4 = nn.Dropout(p=0.3)   # Dropout after 8th conv
        # Fully connected layers
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, output_len)
        self.output_cnt = output_len  # number of output classes (used to handle binary vs multi-class output)
    
    def forward(self, x):
        # Pass input through the convolutional layers with ReLU activations and occasional dropout
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.dropoutcv2(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = F.relu(x)
        x = self.conv7(x)
        x = F.relu(x)
        x = self.dropoutcv3(x)
        x = self.conv8(x)
        x = F.relu(x)
        x = self.dropoutcv4(x)
        # Flatten and pass through fully-connected layers
        x = x.view(x.size(0), -1)   # flatten to [batch_size, features]
        x = self.fc1(x)
        x = self.fc2(x)
        # For binary classification (output_len == 1), squeeze the output to a 1D tensor
        if self.output_cnt == 1:
            x = x.squeeze(dim=-1)
        return x

def GetNbaIotModel(output_len):
    """Helper to instantiate the CNN model for N-BaIoT with given output length."""
    model = CNN(output_len)
    return model


In [4]:
def GetAllFeatureLabel(filenames, label_start_idx):
    """
    Load data from multiple CSV files and return combined feature and label arrays.
    Each file in 'filenames' is assumed to contain samples of a single class (with label included at column index label_start_idx).
    """
    all_features = []
    all_labels = []
    for idx, filepath in enumerate(filenames):
        if USE_DUMMY_DATA:
            # If using dummy data, generate random features and labels instead of reading files
            sample_count = 100  # number of dummy samples per file (class)
            feature_dim = label_start_idx  # number of feature columns
            # Generate random feature values
            dummy_features = np.random.rand(sample_count, feature_dim)
            # Assign label = idx for all samples of this file (class index by order)
            dummy_labels = np.full(shape=(sample_count,), fill_value=idx, dtype=float)
            data_features = dummy_features
            data_labels = dummy_labels
        else:
            # Load CSV data from file
            # Assuming the CSV has no header and the label is at column 'label_start_idx'
            data = np.loadtxt(filepath, delimiter=',', skiprows=0)
            # Split features and label
            data_features = data[:, :label_start_idx]
            data_labels = data[:, label_start_idx:]
            # If label column is one-hot or multi-column, flatten it
            if data_labels.ndim > 1 and data_labels.shape[1] == 1:
                data_labels = data_labels.flatten()
        all_features.append(data_features)
        all_labels.append(data_labels)
    # Concatenate all files' data
    all_features = np.vstack(all_features)
    all_labels = np.concatenate(all_labels)
    return all_features, all_labels

def GetDataset(features, labels):
    """
    Convert feature and label NumPy arrays into a PyTorch TensorDataset.
    Features are cast to float32 tensors and labels to float (double) tensors.
    """
    features_tensor = torch.tensor(features, dtype=torch.float32)
    labels_tensor = torch.tensor(labels)  # labels will be float64 if labels array was float64
    # Ensure labels tensor is 1D (flatten) if it's two-dimensional with a single column
    if labels_tensor.ndim > 1:
        labels_tensor = labels_tensor.view(-1)
    return torch.utils.data.TensorDataset(features_tensor, labels_tensor)

def ShuffleDataset(dataset):
    """
    Return a shuffled version of a TensorDataset.
    """
    # Extract tensors from the dataset
    features, labels = dataset[:]
    # `dataset[:]` returns a tuple of (all_features_tensor, all_labels_tensor)
    # Convert to NumPy for shuffling indices (or we can shuffle using torch directly)
    indices = np.random.permutation(len(labels))
    shuffled_features = features[indices]
    shuffled_labels = labels[indices]
    return torch.utils.data.TensorDataset(shuffled_features, shuffled_labels)

def SplitPrivateOpen(train_features, train_labels, private_percent, open_percent, class_count, random_selection):
    """
    Split the combined training data of a device into private (labeled) data and open (unlabeled) data.
    - private_percent: fraction of data to allocate to private (per class).
    - open_percent: fraction to allocate to open (unlabeled) data.
    - class_count: number of classes for this device.
    - random_selection: if True, select open data randomly; if False, do stratified selection by taking the first portion of each class.
    Returns: (private_features, private_labels, open_features, open_labels) as NumPy arrays.
    """
    total_samples = len(train_labels)
    private_features_list = []
    private_labels_list = []
    open_features_list = []
    open_labels_list = []
    # Compute split per class for stratification
    for class_label in range(class_count):
        # Indices for this class
        class_indices = np.where(train_labels == class_label)[0]
        if len(class_indices) == 0:
            continue
        class_size = len(class_indices)
        open_count = int(class_size * open_percent)
        if open_count < 1 and open_percent > 0:
            open_count = 1  # ensure at least 1 sample if open_percent is >0 but class has few samples
        # Shuffle class indices if random selection is enabled
        if random_selection:
            np.random.shuffle(class_indices)
        # Split indices into private and open
        open_idx = class_indices[:open_count]   # take first part as open
        private_idx = class_indices[open_count:]  # rest as private
        # Gather data for open
        open_features_list.append(train_features[open_idx])
        open_labels_list.append(train_labels[open_idx])
        # Gather data for private
        private_features_list.append(train_features[private_idx])
        private_labels_list.append(train_labels[private_idx])
    # Concatenate per-class splits back together
    if len(private_features_list) > 0:
        private_features = np.vstack(private_features_list)
        private_labels = np.concatenate(private_labels_list)
    else:
        private_features = np.array([]).reshape(0, train_features.shape[1])
        private_labels = np.array([])
    if len(open_features_list) > 0:
        open_features = np.vstack(open_features_list)
        open_labels = np.concatenate(open_labels_list)
    else:
        open_features = np.array([]).reshape(0, train_features.shape[1])
        open_labels = np.array([])
    return private_features, private_labels, open_features, open_labels

def SplitPrivate(features, labels, client_cnt, class_count, iid, data_average):
    """
    Split a device's private data among its local clients.
    - If iid is True: distribute data evenly and randomly among clients (IID split).
    - If iid is False (non-IID 'equally' scenario): assign each class (or classes) to specific clients.
    - data_average: if True, attempt to give each client equal number of samples (where possible).
    Returns a list of TensorDataset objects, one per client.
    """
    datasets = []
    num_samples = len(labels)
    features = torch.tensor(features, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.float64)
    if iid:
        # IID distribution: shuffle and partition roughly equally
        indices = torch.randperm(num_samples)
        # Split indices into `client_cnt` chunks
        splits = torch.chunk(indices, client_cnt)
        for split in splits:
            split_feat = features[split]
            split_label = labels[split]
            datasets.append(torch.utils.data.TensorDataset(split_feat, split_label))
    else:
        # Non-IID 'equally': try to allocate one class per client (or each class to a subset of clients)
        # Determine how many clients per class if classes < clients
        if class_count <= client_cnt:
            # Number of clients each class should occupy (at least floor, some classes may occupy an extra client if not perfectly divisible)
            base_clients_per_class = client_cnt // class_count
            extra = client_cnt % class_count
        else:
            # More classes than clients (not expected in our setup); in this case, some clients will handle multiple classes
            base_clients_per_class = 1
            extra = 0
        client_allocations = [[] for _ in range(client_cnt)]
        current_client = 0
        # Assign each class's data to one or more clients
        for class_label in range(class_count):
            class_indices = (labels == float(class_label)).nonzero(as_tuple=True)[0]
            if len(class_indices) == 0:
                continue
            # Determine number of clients to assign this class
            k = base_clients_per_class + (1 if class_label < extra else 0)
            if k < 1: 
                k = 1
            # Split class samples into k parts (as equal as possible) for k clients
            if k == 1:
                # All samples of this class go to one client
                client_allocations[current_client].extend(class_indices.tolist())
                current_client = (current_client + 1) % client_cnt
            else:
                # Shuffle class indices for fair distribution
                perm = class_indices[torch.randperm(len(class_indices))]
                parts = torch.chunk(perm, k)
                for part in parts:
                    client_allocations[current_client].extend(part.tolist())
                    current_client = (current_client + 1) % client_cnt
        # Now create dataset for each client from allocated indices
        for alloc in client_allocations:
            if len(alloc) == 0:
                # If a client got no data, give an empty dataset
                datasets.append(torch.utils.data.TensorDataset(torch.empty((0, features.shape[1])), torch.empty((0,), dtype=torch.float64)))
            else:
                alloc_idx = torch.tensor(alloc, dtype=torch.long)
                datasets.append(torch.utils.data.TensorDataset(features[alloc_idx], labels[alloc_idx]))
    return datasets

def DilSplitPrivate(features, labels, client_cnt, class_count, alpha, seed):
    """
    Split a device's private data among clients using a Dirichlet distribution (non-IID).
    Each class's samples are divided among clients according to a Dirichlet(alpha) random proportion.
    - alpha: Dirichlet concentration parameter (smaller => more skewed/non-iid distribution).
    - seed: random seed for reproducibility.
    Returns a list of TensorDataset objects for each client.
    """
    if seed is not None:
        np.random.seed(int(seed))
    # Initialize empty index list for each client
    client_indices = [[] for _ in range(client_cnt)]
    # Convert to NumPy for easier grouping
    labels_np = labels if isinstance(labels, np.ndarray) else np.array(labels)
    # For each class, allocate its samples to clients based on Dirichlet draw
    for class_label in range(class_count):
        class_indices = np.where(labels_np == class_label)[0]
        if len(class_indices) == 0:
            continue
        # Draw random proportions for this class among clients
        proportions = np.random.dirichlet([alpha] * client_cnt)
        # Number of samples per client for this class (round down)
        class_counts = (proportions * len(class_indices)).astype(int)
        # Adjust counts to ensure total equals len(class_indices)
        diff = len(class_indices) - class_counts.sum()
        # Distribute any rounding difference
        for i in range(diff):
            # assign one extra sample to clients with largest remaining proportion
            class_counts[np.argmax(proportions)] += 1
        # Shuffle class indices and split according to counts
        np.random.shuffle(class_indices)
        start = 0
        for client_id, count in enumerate(class_counts):
            if count > 0:
                subset = class_indices[start : start + count]
                client_indices[client_id].extend(subset.tolist())
                start += count
    # Create TensorDataset for each client's indices
    datasets = []
    features_tensor = torch.tensor(features, dtype=torch.float32)
    labels_tensor = torch.tensor(labels, dtype=torch.float64)
    for idx_list in client_indices:
        if len(idx_list) == 0:
            datasets.append(torch.utils.data.TensorDataset(torch.empty((0, features_tensor.shape[1])), torch.empty((0,), dtype=torch.float64)))
        else:
            idx_tensor = torch.tensor(idx_list, dtype=torch.long)
            datasets.append(torch.utils.data.TensorDataset(features_tensor[idx_tensor], labels_tensor[idx_tensor]))
    return datasets

def GetFeatureFromOpenDataset(open_dataset, start_idx, end_idx):
    """
    Extract a slice [start_idx:end_idx] from the open (unlabeled) dataset.
    Used to obtain a batch of open data for processing in each communication round.
    Returns (feature_tensor, label_tensor) for that slice (labels are just dummy ground-truth labels, since open data is unlabeled).
    """
    total = len(open_dataset)
    if end_idx > total:
        end_idx = total
    # open_dataset[:] gives (features_tensor, labels_tensor) of the whole dataset
    features_tensor, labels_tensor = open_dataset[:]
    # Slice the tensors
    feature_slice = features_tensor[start_idx:end_idx]
    label_slice = labels_tensor[start_idx:end_idx]
    return feature_slice, label_slice


In [5]:
def Predict(dev, feature, model, model_out_len):
    """
    Generate hard predictions (labels) from the model for the given features.
    Returns a tensor of predicted labels.
    """
    with torch.no_grad():
        model = model.to(dev)
        feature = feature.to(dev)
        logits = model(feature)
        pred_label = Logits2PredLabel(logits, model_out_len)
        return pred_label

def Logits2PredLabel(logits, model_out_len):
    """Convert raw model outputs (logits) to hard labels."""
    with torch.no_grad():
        if model_out_len == 1:
            # Binary classification: apply sigmoid and threshold at 0.5 (round)
            prediction = torch.round(torch.sigmoid(logits))
        else:
            # Multi-class: take the index of the max logit as the predicted class
            _, prediction = torch.max(logits, dim=1)
        return prediction

def Predict2SoftLabel(dev, feature, model, model_out_len):
    """
    Generate soft label predictions (probability distribution or logits) from the model.
    Returns a tensor of soft labels (probabilities for each class).
    """
    with torch.no_grad():
        model = model.to(dev)
        feature = feature.to(dev)
        logits = model(feature)
        soft_logits = Logits2Soft(logits, model_out_len)
        return soft_logits

def Logits2Soft(logits, model_out_len):
    """
    Convert logits to a normalized probability distribution (soft labels).
    For binary, produce a 2-dimensional probability (for class 0 and class 1).
    For multi-class, apply softmax.
    """
    sigmoid = torch.nn.Sigmoid()
    softmax = torch.nn.Softmax(dim=1)
    with torch.no_grad():
        if model_out_len == 1:
            # Binary case: apply sigmoid to get probability of class "1"
            logits = sigmoid(logits)
            # Construct 2-column softmax-like output [P(class0), P(class1)] for each sample
            soft_max_logits = torch.zeros(len(logits), 2)
            for i in range(len(logits)):
                soft_max_logits[i] = torch.tensor([1 - logits[i].item(), logits[i].item()])
            probabilities = soft_max_logits
        else:
            # Multi-class: softmax to get probability distribution over classes
            probabilities = softmax(logits)
        return probabilities

def HardLabel(soft_label):
    """
    Convert a soft label (probability vector) to a hard label index or 'unknown'.
    If the highest probability is > 1/num_classes, returns that class index; 
    otherwise returns 'class_cat' (an index used to denote uncertain/unknown).
    """
    sample_cnt = len(soft_label)
    class_cat = len(soft_label[0])  # number of classes
    boundary = 1.0 / class_cat
    hard_label = [0] * sample_cnt
    for i in range(sample_cnt):
        cur_soft = soft_label[i]
        pred_label = torch.argmax(cur_soft).item()
        pred_proba = torch.max(cur_soft).item()
        # If the highest probability is greater than the uniform probability threshold, accept it; otherwise label as 'unknown' (class_cat index)
        hard_label[i] = pred_label if pred_proba > boundary else class_cat
    return hard_label

def HardLabelVoteHard(all_client_hard_label, class_cat):
    """
    Perform majority vote across clients for each sample's hard label.
    all_client_hard_label: list of hard label lists from each client.
    class_cat: number of normal classes (for multi-class, equals number of classes; an extra index is used to denote 'unknown').
    Returns a tensor of voted labels for each sample.
    """
    client_cnt = len(all_client_hard_label)
    sample_cnt = len(all_client_hard_label[0])
    voted_labels = []
    for i in range(sample_cnt):
        # Tally votes for each class (not counting 'unknown' votes which are denoted by class_cat index)
        votes = [0] * class_cat
        for j in range(client_cnt):
            lbl = all_client_hard_label[j][i]
            if lbl != class_cat:  # ignore 'unknown' votes
                votes[int(lbl)] += 1
        # Determine the class with maximum votes (default 0 if all votes are 'unknown')
        if max(votes) == 0:
            # No client had a confident label (all marked unknown)
            voted_labels.append(class_cat)  # mark as unknown
        else:
            voted_labels.append(int(np.argmax(votes)))
    voted_labels = torch.tensor(voted_labels)
    return voted_labels

def HardLabelVoteOneHot(all_client_hard_label, class_cat):
    """
    Similar to HardLabelVoteHard, but returns one-hot encoded vectors of the voted label for each sample.
    """
    # Get hard voted label per sample
    hard_votes = HardLabelVoteHard(all_client_hard_label, class_cat)
    # Convert to one-hot representation
    one_hot_results = []
    for label in hard_votes:
        one_hot = [0.0] * class_cat
        if label < class_cat:
            one_hot[int(label)] = 1.0
        # if label == class_cat (unknown), one-hot will remain all zeros (or conceptually an 'unknown' category)
        one_hot_results.append(one_hot)
    one_hot_tensor = torch.tensor(one_hot_results)
    return one_hot_tensor

def OneHot2Label(one_hot_vectors):
    """
    Convert one-hot encoded label vectors back to class indices.
    """
    _, labels = torch.max(one_hot_vectors, dim=1)
    labels = labels.double()  # convert to double for consistency with other labels in this codebase
    return labels

def PredictAvg(dev, dataset, bounds, model):
    """
    Compute the average prediction (softmax output) for each class from a dataset slice.
    Not used in main training; possibly for analysis.
    - bounds: list of (start_idx, end_idx) for each class's range in the dataset.
    Returns a dictionary mapping class label -> average probability vector for that class.
    """
    print("\npred avg")
    print(bounds)
    each_label_avg_logit = {}
    soft_max = torch.nn.Softmax(dim=1)
    model = model.to(dev)
    for i in range(len(bounds)):
        start_idx, end_idx = bounds[i]
        if start_idx == end_idx:
            continue  # no samples of this class
        # Extract features and labels for this class range
        class_features, class_labels = dataset[start_idx:end_idx]
        with torch.no_grad():
            class_features = class_features.to(dev)
            preds = model(class_features)
            preds = soft_max(preds)
            mean_pred = torch.mean(preds, dim=0)  # average probability vector
            true_label = class_labels[0].item() if len(class_labels) > 0 else i
            each_label_avg_logit[int(true_label)] = mean_pred.detach().clone()
    print("end pred avg")
    return each_label_avg_logit

def PredictFilter(dev, open_feature, classify_model, classify_model_len_out_tensor, class_cat, theta):
    """
    Filter out uncertain predictions from a model on open data given a threshold theta.
    If model's confidence (max probability) for a sample is below theta, that sample's soft label is replaced with a uniform distribution (treated as unknown).
    - theta: threshold for confidence. If theta < 0, it's dynamically set as median of max probabilities.
    """
    print("\nin predict filter")
    print(f"theta = {theta}")
    classify_model = classify_model.to(dev)
    average_tensor = torch.tensor([1.0 / class_cat] * class_cat)  # uniform distribution vector
    with torch.no_grad():
        open_feature = open_feature.to(dev)
        soft_labels = Logits2Soft(classify_model(open_feature), classify_model_len_out_tensor)
        if theta < 0:
            # if theta is negative, use median of confidence scores as threshold
            max_vals, _ = torch.max(soft_labels, 1)
            theta = max_vals.median()
        # Replace any soft label whose max probability is below theta with a uniform distribution (unknown)
        for i in range(len(soft_labels)):
            max_proba = torch.max(soft_labels[i])
            if max_proba < theta:
                soft_labels[i] = average_tensor.clone()
    return soft_labels


In [6]:
def TrainWithFeatureLabel(dev, feature, label, batchsize, model, opt, loss_func):
    """
    Train the model for one epoch on the given feature & label tensors (treated as a dataset).
    Returns the average loss over the dataset.
    """
    # Create a TensorDataset on-the-fly from the features and labels
    dataset = torch.utils.data.TensorDataset(feature, label)
    avg_loss = TrainWithDataset(dev, dataset, batchsize, model, opt, loss_func)
    return avg_loss

def TrainWithDataset(dev, dataset, batchsize, model, opt, loss_func):
    """
    Train the model for one epoch on the given dataset (torch.utils.data.TensorDataset).
    Uses given optimizer and loss function. Returns the average loss.
    """
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=batchsize, shuffle=True)
    model = model.to(dev)
    model.train()
    total_loss = 0.0
    for batch_feature, batch_label in data_loader:
        opt.zero_grad()
        batch_feature = batch_feature.to(dev)
        batch_label = batch_label.to(dev)
        preds = model(batch_feature)
        loss = loss_func(preds, batch_label)
        loss.backward()
        opt.step()
        total_loss += loss.item() * batch_label.size(0)
    avg_loss = total_loss / len(dataset)
    return avg_loss

def EvalWithFeatureLabel(dev, feature, label, batchsize, model, loss_func):
    """
    Evaluate the model on the given feature & label tensors. Returns the average loss.
    """
    dataset = torch.utils.data.TensorDataset(feature, label)
    avg_loss = EvalWithDataset(dev, dataset, batchsize, model, loss_func)
    return avg_loss

def EvalWithDataset(dev, dataset, batchsize, model, loss_func):
    """
    Compute the loss of the model on the entire dataset (without updating weights).
    """
    test_loader = torch.utils.data.DataLoader(dataset, batch_size=batchsize, shuffle=True)
    total_loss = 0.0
    with torch.no_grad():
        for feature, label in test_loader:
            feature = feature.to(dev)
            label = label.to(dev)
            out = model(feature)
            loss = loss_func(out, label)
            total_loss += loss.item() * label.size(0)
    avg_loss = total_loss / len(dataset)
    return avg_loss

def Metrics(true_label, pred_label):
    """
    Compute simple accuracy given true labels and predicted labels.
    Returns (correct_count, accuracy).
    Also prints an 'Evaluation...' message (as in original code).
    """
    print("Evaluation...")
    all_preds = pred_label.cpu()
    all_labels = true_label.cpu()
    correct_num = (all_labels == all_preds).sum().item()
    test_acc = correct_num / len(true_label)
    return correct_num, test_acc

def PredictWithDisUnknown(dev, open_feature, classify_model, classify_model_len_out_tensor, discri_model, discri_model_len_out_tensor, class_cat):
    """
    Use both classifier and discriminator to predict labels for open_feature.
    - classify_model produces soft labels (probabilities) for classes.
    - discri_model predicts which samples are 'unknown' (for binary discriminator, output 1 means unknown).
    For each sample: if discriminator predicts it as unknown (1), replace its soft label with a uniform 'average' vector.
    Returns a tensor of soft labels for the open_feature (with unknowns adjusted).
    """
    discri_model = discri_model.to(dev)
    classify_model = classify_model.to(dev)
    # A uniform average probability vector for 'unknown' cases
    average_tensor = torch.tensor([1.0 / class_cat] * class_cat)
    with torch.no_grad():
        open_feature = open_feature.to(dev)
        # Get soft class probabilities from the classifier
        class_logits = classify_model(open_feature)
        soft_labels = Logits2Soft(class_logits, classify_model_len_out_tensor)
        # Get discriminator predictions (hard labels: 0 for known, 1 for unknown if binary output)
        dis_logits = discri_model(open_feature)
        dis_pred = Logits2PredLabel(dis_logits, discri_model_len_out_tensor)
        # For each sample, if discriminator says "unknown", replace its soft label with uniform distribution
        for i in range(len(dis_pred)):
            if dis_pred[i].item() == 1:  # 1 indicates the sample is predicted as 'unknown' by discriminator
                soft_labels[i] = average_tensor.clone()
    return soft_labels

def GetDeviceClientCnt(device_name, client_cnt, classify_model_out_len):
    """
    Determine how many clients to instantiate for a given device.
    If binary classification, return 4 (we limit to 4 clients per device).
    If multi-class:
      - For certain devices (Ennio Doorbell, Samsung Webcam), use roughly half the number of clients (since they have fewer attack types).
      - Otherwise, use the default client_cnt from config.
    """
    if classify_model_out_len == 1:
        return 4
    else:
        if device_name in ["Ennio_Doorbell/", "Samsung_SNH_1011_N_Webcam/"]:
            return int(client_cnt / 2) + 1
        else:
            return client_cnt

def GetDeviceClassCat(device_name, classify_model_out_len):
    """
    Determine the number of classes ('class_cat') for a given device.
    If binary (output_len==1): 2 classes (benign vs attack).
    If multi-class:
      - For Ennio Doorbell and Samsung Webcam devices, return 6 (these devices have only 5 attacks + benign in the dataset).
      - Otherwise, return 11 (all 10 attacks + benign).
    """
    if classify_model_out_len == 1:
        return 2
    if device_name in ["Ennio_Doorbell/", "Samsung_SNH_1011_N_Webcam/"]:
        return 6
    else:
        return 11

def reshape_sample(feature):
    """
    Reshape the feature matrix into shape (-1, 23, 5).
    This organizes 115 features into a 23x5 matrix for CNN input.
    """
    # -1 infers the batch dimension, 23 channels, 5 timesteps
    reshaped = np.reshape(feature, (-1, 23, 5))
    return reshaped

def PredUnknown(dev, feature, model, theta, model_out_len):
    """
    Identify 'unknown' samples from a feature batch using the given model (classifier).
    - It passes the features through the model and obtains probabilities.
    - If model_out_len == 1 (binary classifier for known vs unknown), use sigmoid to get probability of 'attack'.
      Then construct a 2-class probability [P(known), P(unknown)] for each sample.
    - If multi-class, uses softmax to get class probabilities.
    - Then finds samples where the highest probability is below threshold theta (meaning the model is not confident in any known class).
    Returns a tensor of features that are considered 'sure unknown'. If none found, returns None.
    """
    sure_unknown = []
    wait_to_distill = []
    soft_max = torch.nn.Softmax(dim=1)
    sigmoid = torch.nn.Sigmoid()
    model = model.to(dev)
    feature = feature.to(dev)
    with torch.no_grad():
        out = model(feature)
        if model_out_len == 1:
            # Binary case: model output is logit for "attack", compute probability and form 2-class output
            out = sigmoid(out)
            prob2 = torch.zeros(len(out), 2)
            for i in range(len(out)):
                prob2[i] = torch.tensor([1 - out[i].item(), out[i].item()])
            out = prob2
        else:
            out = soft_max(out)
        # Determine threshold if theta is dynamic
        if theta < 0:
            theta = torch.median(torch.max(out, dim=1).values)
        # Separate features into 'unknown' and 'confident' sets based on threshold
        max_vals, pred_labels = torch.max(out, 1)
        for i in range(len(max_vals)):
            if max_vals[i] < theta:
                sure_unknown.append(feature[i])
            else:
                wait_to_distill.append(feature[i])
        if len(sure_unknown) == 0:
            return None
        sure_unknown = torch.stack(sure_unknown)
    return sure_unknown

def LabelFeature(feature, label):
    """
    Create a label tensor for all samples in 'feature', filled with the given label value.
    Returns a tuple (feature, labels_tensor).
    """
    labels = torch.tensor([label] * len(feature), dtype=torch.double)
    return feature, labels

def DisUnknown(dev, client, dis_rounds, batchsize, dis_train_feature, theta):
    """
    Train a client's discriminator model to distinguish 'unknown' samples from known ones.
    Process:
      - Identify 'sure unknown' samples from the provided open data using client's classifier (PredUnknown).
      - Label all 'sure unknown' samples with unknown_label (1) and all client's known private samples as known_label (0).
      - Combine these into a training set for the discriminator.
      - Train the discriminator for 'dis_rounds' epochs on this dataset.
    Returns True if training succeeded (unknown samples found), or False if no unknown samples were identified.
    """
    # Make a copy of the feature data to avoid modifying original
    dis_feature_pool = dis_train_feature.detach().clone()
    # Use the client's classifier to predict which open samples are definitely unknown
    sure_unknown_feature = PredUnknown(dev, dis_feature_pool, client.classify_model, theta, client.classify_model_out_len)
    if sure_unknown_feature is None:
        # No unknown samples detected, hence skip discriminator training
        return False
    # Determine labels for discriminator: unknown samples = 1, known samples = 0
    if client.discri_model_out_len == 1:
        unknown_label_num = 1.0
        known_label_num = 0.0
    else:
        unknown_label_num = 1
        known_label_num = 0
    # Create labeled datasets for unknown and known
    unknown_feat, unknown_lbl = LabelFeature(sure_unknown_feature, unknown_label_num)
    known_feat, _ = client.classify_dataset[:]  # all known private data features
    known_feat = known_feat.detach().clone()
    known_feat, known_lbl = LabelFeature(known_feat, known_label_num)
    # Combine known and unknown samples for discriminator training
    combined_features = torch.cat((known_feat.to(dev), unknown_feat.to(dev)), dim=0)
    combined_labels = torch.cat((known_lbl.to(dev), unknown_lbl.to(dev)), dim=0)
    # To avoid GPU memory issues during assembly, move combined to CPU for shuffling
    combined_features = combined_features.to(torch.device("cpu"))
    combined_labels = combined_labels.to(torch.device("cpu"))
    dis_dataset = torch.utils.data.TensorDataset(combined_features, combined_labels)
    # Shuffle the combined dataset before training
    dis_dataset = ShuffleDataset(dis_dataset)
    # Train the discriminator model for the specified number of rounds (epochs)
    for r in range(dis_rounds):
        TrainWithDataset(dev, dis_dataset, batchsize, client.discri_model, client.discri_opt, client.discri_loss_func)
    return True


In [7]:
def SSFL_IDS(conf, dev, clients, server, test_dataset, open_dataset):
    """
    Core Semi-Supervised Federated Learning (SSFL-IDS) algorithm.
    - conf: dictionary of configuration parameters.
    - dev: torch.device (CPU or CUDA).
    - clients: list of client objects.
    - server: server object.
    - test_dataset: global test dataset (for evaluation).
    - open_dataset: the unlabeled open dataset accessible to all clients.
    
    The training proceeds in communication rounds. Each round consists of:
      Stage I: Supervised client training on private data + Unknown detection training.
      Stage II: Knowledge distillation on open data (clients and server).
    """
    comm_cnt = conf["comm_cnt"]                # total number of communication rounds
    open_idx_set_cnt = conf["open_idx_set_cnt"]  # number of open samples to use each round
    batchsize = conf["batchsize"]
    train_rounds = conf["train_rounds"]        # epochs of classifier training per round (after first)
    dis_rounds = conf["discri_rounds"]         # epochs for discriminator training
    dist_rounds = conf["dist_rounds"]          # epochs for distillation training
    theta = conf["theta"]                      # threshold for unknown detection (if -1, dynamic thresholding)
    labels = conf["labels"]                    # list of class labels (not used directly in code logic)
    first_train_rounds = conf["first_train_rounds"]  # extra epochs of classifier training in first round
    class_cat = conf["classify_model_out_len"] if conf["classify_model_out_len"] > 1 else 2
    dis_train_cnt = 10000  # number of open samples to use for discriminator training each round (hardcoded)
    start_idx = 0
    end_idx = start_idx + open_idx_set_cnt
    open_len = len(open_dataset)
    
    for e in range(comm_cnt):
        sure_unknown_none = set()   # set of client indices that found no unknown samples
        all_client_hard_label = []
        # --- Stage I: Clients training and labeling ---
        print(f"Round {e+1} Stage I")
        client_cnt = len(clients)
        participate = 0  # count how many clients actually participated (had non-zero private data)
        # Slice a batch of open data for this round (or entire open data if smaller)
        open_feature, open_label = GetFeatureFromOpenDataset(open_dataset, start_idx, end_idx)
        # If the requested open batch is larger than the available open set, adjust global_logits size
        if open_idx_set_cnt > open_len:
            global_logits = torch.zeros(open_len, len(labels))
        else:
            global_logits = torch.zeros(open_idx_set_cnt, len(labels))
        # Each client performs local training on its labeled data and uses its discriminator to label open data
        for c_idx, client in enumerate(clients):
            print(f"Client {c_idx+1} Training...")
            # Determine how many epochs to train classifier this round
            cur_train_rounds = train_rounds if e != 0 else first_train_rounds
            # If a client has no private data (just in case), skip it
            if len(client.classify_dataset) == 0:
                continue
            # Supervised training on client's labeled private dataset
            for _ in range(cur_train_rounds):
                TrainWithDataset(dev, client.classify_dataset, batchsize, client.classify_model, client.classify_opt, client.hard_label_loss_func)
            # Mark this client as having participated (has labeled data)
            if sum(count > 0 for count in client.each_class_cnt) > 0:
                participate += 1
            # Prepare data for discriminator training (we use a fixed number of open samples dis_train_cnt for unknown detection)
            dis_train_feature, _ = GetFeatureFromOpenDataset(open_dataset, 0, dis_train_cnt)
            # Train client's discriminator to identify unknown samples (DisUnknown performs this training)
            success = DisUnknown(dev, client, dis_rounds, batchsize, dis_train_feature, theta)
            if not success:
                # If no unknown found, note this client (its predictions will be trusted fully, no unknown filtering)
                sure_unknown_none.add(c_idx)
            # Now use client's classifier & (updated) discriminator to predict labels for the current open_feature batch
            client_open_feature = open_feature.detach().clone()  # copy open feature tensor
            if c_idx not in sure_unknown_none:
                # If the client has a working discriminator (found unknowns), filter unknowns
                local_soft = PredictWithDisUnknown(dev, client_open_feature, client.classify_model, client.classify_model_out_len, client.discri_model, client.discri_model_out_len, len(labels))
            else:
                # If no unknowns were found by this client, just use classifier's soft predictions (no filtering)
                local_soft = Predict2SoftLabel(dev, client_open_feature, client.classify_model, client.classify_model_out_len)
            # Convert the client's soft predictions to hard labels (with 'unknown' possibility)
            hard_label = HardLabel(local_soft)
            all_client_hard_label.append(hard_label)
            print()  # newline for readability
        
        # Perform majority vote across all clients' hard labels for the open batch
        global_hard_labels = HardLabelVoteHard(all_client_hard_label, class_cat)
        # Convert the voted hard labels to one-hot (for distillation training if needed)
        global_logits = HardLabelVoteOneHot(all_client_hard_label, class_cat)
        # --- Stage II: Distillation training ---
        print(f"Round {e+1} Stage II")
        # Each client performs distillation training on the open data with the global pseudo-labels
        for c_idx, client in enumerate(clients):
            print(f"Client {c_idx+1} Distillation Training...")
            for _ in range(dist_rounds):
                # Use global one-hot labels as targets for client's classifier on open_feature
                # If classifier output is binary, convert one-hot targets to 0/1 labels
                if client.classify_model_out_len != 1:
                    TrainWithFeatureLabel(dev, open_feature.detach().clone(), global_logits.detach().clone(), batchsize, client.classify_model, client.classify_opt, client.hard_label_loss_func)
                else:
                    # For binary classifier, convert one-hot global logits to label 0/1
                    binary_targets = OneHot2Label(global_logits)
                    TrainWithFeatureLabel(dev, open_feature.detach().clone(), binary_targets, batchsize, client.classify_model, client.classify_opt, client.hard_label_loss_func)
        # The server also performs distillation on the open data using the aggregated labels (the server's model learns from the consensus of clients)
        print("Server Training...")
        for _ in range(dist_rounds):
            if server.model_out_len != 1:
                TrainWithFeatureLabel(dev, open_feature.detach().clone(), global_logits.detach().clone(), batchsize, server.model, server.dist_opt, server.hard_label_loss_func)
            else:
                binary_targets = OneHot2Label(global_logits)
                TrainWithFeatureLabel(dev, open_feature.detach().clone(), binary_targets, batchsize, server.model, server.dist_opt, server.hard_label_loss_func)
        # Evaluate the server's global model on the test dataset and report accuracy
        test_feature, test_label = test_dataset[:]
        pred_label = Predict(dev, test_feature, server.model, server.model_out_len)
        _, test_acc = Metrics(test_label, pred_label)
        print(f"Round {e+1} Test Acc = {test_acc} \n")


In [8]:
def SSFL_IDS_NBaIoT():
    """
    Setup and execute SSFL-IDS training for the N-BaIoT dataset scenario.
    This function initializes the dataset, clients, and server, then calls SSFL_IDS().
    """
    # Configuration parameters for N-BaIoT experiment
    configs = {
        "comm_cnt": 201,             # total communication rounds
        "device_client_cnt": 11,     # base number of clients per device
        "private_percent": 0.9,      # 90% of each device's data is private (labeled)
        "batchsize": 100,
        "iid": False,               # data distribution is non-IID among clients
        "need_dist": True,          # whether distillation is needed (True in our method)
        "open_percent": 0.1,        # 10% of data is open (unlabeled)
        "label_lr": 0.0001,         # learning rate for classifier models
        "dist_lr": 0.0001,          # learning rate for server model (distillation)
        "discri_lr": 0.0001,        # learning rate for discriminator models
        "train_rounds": 3,          # epochs of classifier training each round (after first)
        "discri_rounds": 3,         # epochs of discriminator training each round
        "dist_rounds": 10,          # epochs of distillation training each round
        "first_train_rounds": 3,    # epochs of classifier training in round 1 (may be higher to prime the model)
        "open_idx_set_cnt": 10000,  # number of open-set samples to use in each round for label aggregation
        "discri_cnt": 10000,        # (not explicitly used above, possibly same as open_idx_set_cnt for discrimination)
        "dist_T": 0.1,              # temperature for distillation (not explicitly used in this code, possibly for an alternate approach)
        "need_SA": False,           # (possibly for another method like spectral augmentation, not used here)
        "test_batch_size": 256,
        "label_start_idx": 115,     # index where label starts in the CSV (features are 0-114)
        "test_round": 1,            # possibly how often to test (not used in code, we test every round)
        "data_average": True,       # whether to equalize data among clients in splitting
        "labels": list(range(11)),  # list of label indices [0..10] for classes
        "clien_need_dist_opt": False, # (unused flag, possibly if clients need distinct optimizers for distillation)
        "discri_model_out_len": 1,  # output length for discriminator (binary output)
        "classify_model_out_len": 11, # output length for classifier (11 classes)
        "sample_cnt": 1000,         # number of samples per class (if using full dataset, here indicates the dataset was limited to 1000 per class)
        "random": True,
        "vote": True,
        "seed": 7,                  # random seed for reproducibility
        "load_data_from_pickle": False,
        "soft_label": False,        # whether using soft labels directly (we use hard labels in voting)
        "num_after_float": 4,
        "theta": -1,                # threshold for unknown detection; -1 means use dynamic median-based threshold
        "split": "dile",            # type of data split: "dile" for Dirichlet, "equally" for each-class-per-client, or "iid"
        "alpha_of_dile": 0.1,       # alpha parameter for Dirichlet split
    }
    # Set random seed if specified
    if configs["seed"] is not None:
        np.random.seed(configs["seed"])
    # Device: use GPU if available, else CPU
    dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    # Prepare datasets for N-BaIoT
    test_dataset, private_datasets, open_dataset = CreateDataset(configs, "NBaIoT")
    # Initialize clients and server
    device_names = [
        "Danmini_Doorbell/", "Ecobee_Thermostat/", "Philips_B120N10_Baby_Monitor/",
        "Provision_PT_737E_Security_Camera/", "Provision_PT_838_Security_Camera/",
        "SimpleHome_XCS7_1002_WHT_Security_Camera/", "SimpleHome_XCS7_1003_WHT_Security_Camera/",
        "Ennio_Doorbell/", "Samsung_SNH_1011_N_Webcam/",
    ]
    device_cnt = len(device_names)
    clients = []
    client_idx = 0
    # Iterate through each IoT device and create its clients
    for d_idx in range(device_cnt):
        cur_device_client_cnt = GetDeviceClientCnt(device_names[d_idx], configs["device_client_cnt"], configs["classify_model_out_len"])
        cur_device_private_datasets = private_datasets[d_idx]
        for i in range(cur_device_client_cnt):
            classify_model_out_len = configs["classify_model_out_len"]
            classify_model = GetNbaIotModel(classify_model_out_len)  # new classifier CNN for client
            discri_model_out_len = configs["discri_model_out_len"]
            discri_model = GetNbaIotModel(discri_model_out_len)     # reuse CNN architecture for discriminator (binary output)
            # Create a client object with its private data and models
            client = Create_SSFL_IDS_Client(client_idx, cur_device_private_datasets[i], classify_model, classify_model_out_len, 
                                            configs["label_lr"], discri_model, discri_model_out_len, configs["discri_lr"])
            clients.append(client)
            client_idx += 1
    # Initialize the server's global model (with same architecture as classifier)
    server_model = GetNbaIotModel(configs["classify_model_out_len"])
    server = Create_SSFL_IDS_Server(server_model, configs["classify_model_out_len"], clients, configs["dist_lr"])
    # Run the federated training process
    SSFL_IDS(configs, dev, clients, server, test_dataset, open_dataset)


In [9]:
class SSFL_IDS_Client:
    def __init__(self, idx, classify_dataset: torch.utils.data.Dataset,
                 classify_model: nn.Module, classify_model_out_len, classify_lr: float,
                 discri_model: nn.Module, discri_model_out_len, discri_lr: float):
        # Save the models and dataset
        self.classify_model = classify_model        # classifier model for known classes
        self.classify_dataset = classify_dataset    # private labeled dataset (TensorDataset) for training
        # Determine number of classes for classification (for multi-class vs binary)
        self.class_cat = classify_model_out_len if classify_model_out_len > 1 else 2
        # Count labeled samples per class in this client's dataset
        self.each_class_cnt = [0] * self.class_cat
        for _, label in self.classify_dataset:
            # label.item() works if label tensor is 0-d or 1-d; ensure label is int
            lbl = int(label.item()) if hasattr(label, "item") else int(label)
            if lbl < len(self.each_class_cnt):
                self.each_class_cnt[lbl] += 1
        self.classify_lr = classify_lr
        self.c_idx = idx
        # Optimizer for classifier model
        self.classify_opt = optim.Adam(self.classify_model.parameters(), lr=self.classify_lr)
        # Save discriminator and its properties
        self.discri_model = discri_model
        self.discri_lr = discri_lr
        self.discri_opt = optim.Adam(self.discri_model.parameters(), lr=self.discri_lr)
        self.discri_model_out_len = discri_model_out_len
        # Loss function for discriminator
        if discri_model_out_len == 1:
            self.discri_loss_func = nn.BCEWithLogitsLoss()
        else:
            self.discri_loss_func = nn.CrossEntropyLoss()
        self.classify_model_out_len = classify_model_out_len
        # Loss function for classifier (hard label loss)
        if classify_model_out_len == 1:
            self.hard_label_loss_func = nn.BCEWithLogitsLoss()
            # If binary output, ensure dataset labels are float (double) for BCE loss
            features, labels = self.classify_dataset[:]
            labels = labels.double()
            self.classify_dataset = torch.utils.data.TensorDataset(features, labels)
        else:
            self.hard_label_loss_func = nn.CrossEntropyLoss()
        # Loss function for classifier (soft label loss) - used for distillation
        self.soft_label_loss_func = SSFL_IDS_CELoss()

class SSFL_IDS_CELoss(nn.Module):
    """
    Custom loss for soft labels (used in knowledge distillation).
    Essentially computes cross-entropy between a probability distribution target (soft labels) and the predictions.
    """
    def __init__(self):
        super().__init__()
    def forward(self, pred_pro, target_tensor):
        # pred_pro: model predictions (logits) for each class.
        # target_tensor: soft target probabilities for each class.
        pred_pro = F.log_softmax(pred_pro, dim=1)    # log-softmax of predictions
        # Multiply by target probabilities and take negative sum (cross entropy)
        out = -1 * pred_pro * target_tensor
        return out.sum() / len(pred_pro)

class SSFL_IDS_Server:
    def __init__(self, model: nn.Module, model_out_len, clients, dist_lr: float):
        self.model = model               # global model
        self.clients = clients           # reference to list of clients (not used in training directly)
        self.client_cnt = len(clients)
        self.model_out_len = model_out_len
        self.dist_lr = dist_lr
        # Optimizer for server model (for distillation updates)
        self.dist_opt = optim.Adam(self.model.parameters(), lr=self.dist_lr)
        # Loss function for distillation on server
        self.soft_label_loss_func = SSFL_IDS_CELoss()
        # Loss for hard labels on server model (though server uses only soft labels in this code)
        if model_out_len != 1:
            self.hard_label_loss_func = nn.CrossEntropyLoss()
        else:
            self.hard_label_loss_func = nn.BCEWithLogitsLoss()

def Create_SSFL_IDS_Client(client_idx, private_dataset, classify_model, classify_model_out_len,
                           lr, discri_model, discri_model_out_len, discri_lr):
    """Factory function to create a SSFL_IDS_Client instance."""
    client = SSFL_IDS_Client(client_idx, private_dataset, classify_model, classify_model_out_len,
                              lr, discri_model, discri_model_out_len, discri_lr)
    return client

def Create_SSFL_IDS_Server(server_model, classify_model_out_len, clients, dist_lr):
    """Factory function to create a SSFL_IDS_Server instance."""
    server = SSFL_IDS_Server(server_model, classify_model_out_len, clients, dist_lr)
    return server


In [None]:
def CreateDataset(configs, dataset_name="NBaIoT"):
    if dataset_name == "NBaIoT":
        return create_NBaIoT(configs)

def create_NBaIoT(configs):
    prefix = "C:/Users/Iotworkstation/Desktop/majorproject/USSFL/data/nba_iot_1000/"
    # Device and attack type names as in the dataset directory
    device_names = [
        "Danmini_Doorbell/", "Ecobee_Thermostat/", "Philips_B120N10_Baby_Monitor/",
        "Provision_PT_737E_Security_Camera/", "Provision_PT_838_Security_Camera/",
        "SimpleHome_XCS7_1002_WHT_Security_Camera/", "SimpleHome_XCS7_1003_WHT_Security_Camera/",
        "Ennio_Doorbell/", "Samsung_SNH_1011_N_Webcam/",
    ]
    attack_names = [
        "benign", "g_combo", "g_junk", "g_scan", "g_tcp", "g_udp",
        "m_ack", "m_scan", "m_syn", "m_udp", "m_udpplain"
    ]
    # If doing binary classification (classify_model_out_len==1), only use "benign" and combined "attack"
    if configs["classify_model_out_len"] == 1:
        attack_names = ["benign", "attack"]
    # Initialize containers for combined data
    all_device_train_feature = None
    all_device_train_label = None
    all_device_open_feature = None
    all_device_open_label = None
    all_device_private_feature = []  # list of private features for each device (before splitting to clients)
    all_device_private_label = []
    all_device_test_feature = None
    all_device_test_label = None
    device_cnt = len(device_names)
    # Loop over each device to read data
    if not configs.get("load_data_from_pickle", False):
        for d_idx in range(device_cnt):
            # Determine how many classes this device has (some devices might not have all attacks)
            cur_device_class_cat = GetDeviceClassCat(device_names[d_idx], configs["classify_model_out_len"])
            train_filenames = []
            test_filenames = []
            # Prepare file list for this device's train and test data
            for i in range(len(attack_names)):
                if i < cur_device_class_cat:
                    train_filename = prefix + device_names[d_idx] + attack_names[i] + "_train.csv"
                    test_filename = prefix + device_names[d_idx] + attack_names[i] + "_test.csv"
                    train_filenames.append(train_filename)
                    test_filenames.append(test_filename)
            # Read and aggregate all training data for this device
            train_feature, train_label = GetAllFeatureLabel(train_filenames, configs["label_start_idx"])
            # Split into private and open sets for this device
            private_feature, private_label, open_feature, open_label = SplitPrivateOpen(
                train_feature, train_label,
                configs["private_percent"], configs["open_percent"],
                cur_device_class_cat, False
            )
            all_device_private_feature.append(private_feature)
            all_device_private_label.append(private_label)
            # Accumulate open data from this device into the global open pool
            if all_device_open_feature is None:
                all_device_open_feature = open_feature
                all_device_open_label = open_label
            else:
                all_device_open_feature = np.concatenate((all_device_open_feature, open_feature), axis=0)
                all_device_open_label = np.concatenate((all_device_open_label, open_label), axis=0)
            # (all_device_train_feature/label are not extensively used except perhaps for debugging)
            if all_device_train_feature is None:
                all_device_train_feature = train_feature
                all_device_train_label = train_label
            # Read and aggregate all test data for this device
            test_feature, test_label = GetAllFeatureLabel(test_filenames, configs["label_start_idx"])
            if all_device_test_feature is None:
                all_device_test_feature = test_feature
                all_device_test_label = test_label
            else:
                all_device_test_feature = np.concatenate((all_device_test_feature, test_feature), axis=0)
                all_device_test_label = np.concatenate((all_device_test_label, test_label), axis=0)
    # Scale the features using Min-Max normalization (fit on open data, apply to all)
    scaler = MinMaxScaler()
    scaler.fit(all_device_open_feature)
    # Transform open and test features
    all_device_open_feature = scaler.transform(all_device_open_feature)
    all_device_open_feature = reshape_sample(all_device_open_feature)
    open_dataset = GetDataset(all_device_open_feature, all_device_open_label)
    open_dataset = ShuffleDataset(open_dataset)
    all_device_test_feature = scaler.transform(all_device_test_feature)
    all_device_test_feature = reshape_sample(all_device_test_feature)
    test_dataset = GetDataset(all_device_test_feature, all_device_test_label)
    # Split each device's private data among its clients
    private_datasets = []
    for d_idx in range(device_cnt):
        cur_device_class_cat = GetDeviceClassCat(device_names[d_idx], configs["classify_model_out_len"])
        cur_device_client_cnt = GetDeviceClientCnt(device_names[d_idx], configs["device_client_cnt"], configs["classify_model_out_len"])
        # Get this device's private features and labels
        cur_device_private_feature = all_device_private_feature[d_idx]
        cur_device_private_label = all_device_private_label[d_idx]
        # Apply the same normalization to private data
        cur_device_private_feature = scaler.transform(cur_device_private_feature)
        cur_device_private_feature = reshape_sample(cur_device_private_feature)
        # Partition the private data into client subsets according to the config
        if configs["iid"] == True:
            # Fully IID split among clients
            cur_device_private_datasets = SplitPrivate(cur_device_private_feature, cur_device_private_label,
                                                      cur_device_client_cnt, cur_device_class_cat,
                                                      iid=True, data_average=configs["data_average"])
        elif configs["split"] == "dile":
            # Dirichlet distribution split among clients
            cur_device_private_datasets = DilSplitPrivate(cur_device_private_feature, cur_device_private_label,
                                                         cur_device_client_cnt, cur_device_class_cat,
                                                         configs["alpha_of_dile"], configs["seed"])
        elif configs["split"] == "equally":
            # Equal class split (each class to specific clients)
            cur_device_private_datasets = SplitPrivate(cur_device_private_feature, cur_device_private_label,
                                                      cur_device_client_cnt, cur_device_class_cat,
                                                      iid=False, data_average=configs["data_average"])
        else:
            # Default to IID if unspecified
            cur_device_private_datasets = SplitPrivate(cur_device_private_feature, cur_device_private_label,
                                                      cur_device_client_cnt, cur_device_class_cat,
                                                      iid=True, data_average=configs["data_average"])
        private_datasets.append(cur_device_private_datasets)
    return test_dataset, private_datasets, open_dataset


In [18]:
# Run the main training execution for USSFL-IDS on N-BaIoT dataset
SSFL_IDS_NBaIoT()


ValueError: Found array with 0 sample(s) (shape=(0, 115)) while a minimum of 1 is required by MinMaxScaler.