In [None]:

import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split


In [None]:
def load_data(data_dir):
    csv_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.csv')]
    df_list = []
    for file in csv_files:
        print(f"Reading {file}")
        df = pd.read_csv(file, low_memory=False)
        df_list.append(df)
    data = pd.concat(df_list, ignore_index=True)
    return data


In [None]:
def preprocess_data(data):
    # Drop columns with all zero values or NaNs
    data = data.replace([np.inf, -np.inf], np.nan)
    data = data.dropna(axis=1, how='all')
    data = data.dropna()

    # Drop irrelevant columns if any
    data = data.loc[:, ~data.columns.str.contains('^Unnamed')]

    # Separate features and labels
    X = data.drop('Label', axis=1)
    y = data['Label']

    # Encode labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Normalize numerical features
    numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
    scaler = StandardScaler()
    X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

    return X, y_encoded, label_encoder


In [None]:
def partition_data(X, y, label_encoder, num_companies=5):
    # Get unique attack types
    attack_types = label_encoder.classes_

    # Shuffle attack types
    np.random.shuffle(attack_types)

    # Assign attack types to companies
    company_attack_types = {
        f'Company_{i+1}': attack_types[i::num_companies] for i in range(num_companies)
    }

    # Split data for each company
    company_data = {}
    for company, attacks in company_attack_types.items():
        # Get indices of the selected attack types
        indices = [i for i, label in enumerate(y) if label_encoder.inverse_transform([label])[0] in attacks]
        X_company = X.iloc[indices]
        y_company = y[indices]
        company_data[company] = (X_company, y_company)

    return company_data


In [None]:
def save_company_data(company_data, output_dir='company_data'):
    os.makedirs(output_dir, exist_ok=True)
    for company, (X_company, y_company) in company_data.items():
        company_dir = os.path.join(output_dir, company)
        os.makedirs(company_dir, exist_ok=True)
        X_company.to_csv(os.path.join(company_dir, 'X.csv'), index=False)
        np.save(os.path.join(company_dir, 'y.npy'), y_company)


In [None]:
if __name__ == '__main__':
    data_dir = '/path/to/CICDDoS2019/CSV/'  # Update this path
    data = load_data(data_dir)
    X, y_encoded, label_encoder = preprocess_data(data)
    company_data = partition_data(X, y_encoded, label_encoder, num_companies=5)
    save_company_data(company_data)
    # Save the label encoder for later use
    import joblib
    joblib.dump(label_encoder, 'label_encoder.joblib')


In [None]:
# model.py
import torch
import torch.nn as nn
import torch.nn.functional as F

class DDoSDetectionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DDoSDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        # Personalization layer
        self.personal_layer = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, output_dim)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # Personalization layer
        x = F.relu(self.personal_layer(x))
        x = self.fc3(x)
        return x


In [None]:
# client.py
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import flwr as fl
import numpy as np
from model import DDoSDetectionModel
from sklearn.model_selection import train_test_split
import joblib

# Training and evaluation functions
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = model(data.float())
        loss = criterion(outputs, target.long())
        loss.backward()
        optimizer.step()

def test(model, test_loader, criterion, device):
    model.eval()
    correct = 0
    total = 0
    loss_total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data.float())
            loss = criterion(outputs, target.long())
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            loss_total += loss.item()
    accuracy = 100 * correct / total
    return accuracy, loss_total / len(test_loader)

# Flower client
class FLClient(fl.client.NumPyClient):
    def __init__(self, model, train_loader, test_loader, device):
        self.model = model.to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
    
    def get_parameters(self):
        # Exclude personal layers from being sent to the server
        parameters = []
        for name, param in self.model.state_dict().items():
            if 'personal_layer' not in name:
                parameters.append(param.cpu().numpy())
        return parameters
    
    def set_parameters(self, parameters):
        # Update only the global layers
        state_dict = self.model.state_dict()
        global_layers = [name for name in state_dict.keys() if 'personal_layer' not in name]
        params_dict = zip(global_layers, parameters)
        for k, v in params_dict:
            state_dict[k] = torch.tensor(v)
        self.model.load_state_dict(state_dict, strict=False)
    
    def fit(self, parameters, config):
        self.set_parameters(parameters)
        train(self.model, self.train_loader, self.criterion, self.optimizer, self.device)
        return self.get_parameters(), len(self.train_loader.dataset), {}
    
    def evaluate(self, parameters, config):
        self.set_parameters(parameters)
        accuracy, loss = test(self.model, self.test_loader, self.criterion, self.device)
        return float(loss), len(self.test_loader.dataset), {"accuracy": float(accuracy)}

def load_data(company_dir):
    X = pd.read_csv(os.path.join(company_dir, 'X.csv'))
    y = np.load(os.path.join(company_dir, 'y.npy'))
    return X, y

def start_client(company_name):
    # Load data
    company_dir = os.path.join('company_data', company_name)
    X, y = load_data(company_dir)

    # Split into training and testing
    X_train, X_test, y_train, y_test = train_test_split(X.values, y, test_size=0.2, random_state=42)
    
    # Create data loaders
    train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
    test_dataset = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)
    
    # Load label encoder
    label_encoder = joblib.load('label_encoder.joblib')
    input_dim = X.shape[1]
    output_dim = len(label_encoder.classes_)
    
    # Initialize model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DDoSDetectionModel(input_dim, output_dim)
    
    # Start Flower client
    client = FLClient(model, train_loader, test_loader, device)
    fl.client.start_numpy_client(server_address="localhost:8080", client=client)
    
if __name__ == '__main__':
    import sys
    company_name = sys.argv[1]  # Pass company name as command-line argument
    start_client(company_name)


In [None]:
# server.py
import flwr as fl
import torch
from model import DDoSDetectionModel
import joblib

if __name__ == '__main__':
    # Load label encoder to get input/output dimensions
    label_encoder = joblib.load('label_encoder.joblib')
    input_dim = 80  # Update with actual number of features
    output_dim = len(label_encoder.classes_)
    
    # Initialize global model
    model = DDoSDetectionModel(input_dim, output_dim)
    initial_parameters = [val.cpu().numpy() for _, val in model.state_dict().items() if 'personal_layer' not in _]
    
    # Define strategy
    strategy = fl.server.strategy.FedAvg(
        fraction_fit=1.0,
        fraction_evaluate=1.0,
        min_fit_clients=5,
        min_evaluate_clients=5,
        min_available_clients=5,
        initial_parameters=fl.common.ndarrays_to_parameters(initial_parameters)
    )
    
    # Start Flower server
    fl.server.start_server(server_address="localhost:8080", strategy=strategy, config={"num_rounds": 10})


How TO Run it:
# In Terminal 1
python server.py


# In Terminals 2-6 (one for each company)
python client.py Company_1
python client.py Company_2
python client.py Company_3
python client.py Company_4
python client.py Company_5


In [None]:
# global_evaluation.py
import torch
from model import DDoSDetectionModel
from client import load_data
import joblib
import numpy as np
from sklearn.metrics import classification_report, roc_auc_score

def evaluate_global_model():
    # Load label encoder
    label_encoder = joblib.load('label_encoder.joblib')
    input_dim = 80  # Update with actual number of features
    output_dim = len(label_encoder.classes_)
    
    # Initialize model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DDoSDetectionModel(input_dim, output_dim).to(device)
    
    # Load global model parameters
    global_parameters = fl.common.parameters_to_ndarrays(strategy.parameters)
    state_dict = model.state_dict()
    global_layers = [name for name in state_dict.keys() if 'personal_layer' not in name]
    params_dict = zip(global_layers, global_parameters)
    for k, v in params_dict:
        state_dict[k] = torch.tensor(v)
    model.load_state_dict(state_dict, strict=False)
    
    # Load test data from all companies
    X_list = []
    y_list = []
    for company in ['Company_1', 'Company_2', 'Company_3', 'Company_4', 'Company_5']:
        company_dir = os.path.join('company_data', company)
        X, y = load_data(company_dir)
        _, X_test, _, y_test = train_test_split(X.values, y, test_size=0.2, random_state=42)
        X_list.append(X_test)
        y_list.append(y_test)
    X_test_global = np.concatenate(X_list)
    y_test_global = np.concatenate(y_list)
    
    # Create test loader
    test_dataset = TensorDataset(torch.tensor(X_test_global), torch.tensor(y_test_global))
    test_loader = DataLoader(test_dataset, batch_size=32)
    
    # Evaluate
    model.eval()
    y_true = []
    y_pred = []
    y_scores = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data.float())
            _, predicted = torch.max(outputs.data, 1)
            y_true.extend(target.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
            y_scores.extend(outputs.cpu().numpy())
    
    report = classification_report(y_true, y_pred, target_names=label_encoder.classes_)
    print(report)

if __name__ == '__main__':
    evaluate_global_model()
