In [1]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder, RobustScaler
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader
import logging
from sklearn.preprocessing import label_binarize
import copy
from collections import defaultdict
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score
import json
from datetime import datetime


class HierarchicalFederatedServer:
    def __init__(self, global_model):
        self.global_model = global_model
        self.client_updates = []

    def receive_update(self, update):
        self.client_updates.append(update)

    def aggregate_updates(self):
        if not self.client_updates:
            return
        aggregated = copy.deepcopy(self.client_updates[0])
        for key in aggregated.keys():
            params = torch.stack([update[key] for update in self.client_updates])
            aggregated[key] = torch.mean(params, dim=0)
        self.global_model.load_state_dict(aggregated)
        self.client_updates = []

class FederatedClient:
    def __init__(self, model, train_data, train_labels, device, learning_rate=0.0005):
        self.model = copy.deepcopy(model).to(device)
        self.train_data = train_data
        self.train_labels = train_labels
        self.device = device
        self.optimizer = torch.optim.AdamW(
            self.model.parameters(),
            lr=learning_rate,
            weight_decay=0.02
        )
        self.scheduler = torch.optim.lr_scheduler.OneCycleLR(
            self.optimizer,
            max_lr=learning_rate,
            epochs=15,
            steps_per_epoch=len(train_data) // 32 + 1
        )
        self.criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

    def train(self, epochs=15):
        self.model.train()
        dataset = DataLoader(
            torch.utils.data.TensorDataset(self.train_data, self.train_labels),
            batch_size=32,
            shuffle=True
        )
        best_loss = float('inf')
        patience = 3
        no_improve = 0

        for epoch in range(epochs):
            epoch_loss = 0
            for data, labels in dataset:
                data, labels = data.to(self.device), labels.to(self.device)
                self.optimizer.zero_grad()
                outputs = self.model(data.float())
                loss = self.criterion(outputs, labels)
                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                self.optimizer.step()
                self.scheduler.step()
                epoch_loss += loss.item()

            avg_loss = epoch_loss / len(dataset)
            logging.info(f'Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}')
            
            if avg_loss < best_loss:
                best_loss = avg_loss
                no_improve = 0
            else:
                no_improve += 1
                
            if no_improve >= patience:
                logging.info(f'Early stopping at epoch {epoch + 1}')
                break

        return self.model.state_dict()

class AttentionModule(nn.Module):
    def __init__(self, hidden_dim, num_heads=4, dropout=0.2):
        super().__init__()
        self.attention = nn.MultiheadAttention(hidden_dim, num_heads)
        self.layer_norm1 = nn.LayerNorm(hidden_dim)
        self.layer_norm2 = nn.LayerNorm(hidden_dim)
        self.feed_forward = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        attn_output, _ = self.attention(x, x, x)
        x = self.layer_norm1(x + attn_output)
        ff_output = self.feed_forward(x)
        x = self.layer_norm2(x + ff_output)
        return x

class ImprovedRNNLSTM(nn.Module):
    def __init__(self, input_size, hidden_dim, num_classes, num_layers=2, dropout=0.2):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_size, hidden_dim * 2),
            nn.LayerNorm(hidden_dim * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )

        self.lstm = nn.LSTM(
            hidden_dim, hidden_dim, num_layers=num_layers,
            batch_first=True, bidirectional=True, dropout=dropout
        )

        self.attention_layers = nn.ModuleList([
            AttentionModule(hidden_dim * 2, dropout=dropout) for _ in range(2)
        ])

        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        features = self.feature_extractor(x).unsqueeze(1)
        lstm_out, _ = self.lstm(features)

        for attention_layer in self.attention_layers:
            lstm_out = attention_layer(lstm_out)

        pooled = F.adaptive_max_pool1d(lstm_out.transpose(1, 2), 1).squeeze(-1)
        return self.classifier(pooled)
    
# Base model without attention and with simplified architecture
class BaseRNNLSTM(nn.Module):
    def __init__(self, input_size, hidden_dim, num_classes, num_layers=2, dropout=0.2):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_size, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        
        self.lstm = nn.LSTM(
            hidden_dim, hidden_dim, num_layers=num_layers,
            batch_first=True, bidirectional=False, dropout=dropout
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        features = self.feature_extractor(x).unsqueeze(1)
        lstm_out, _ = self.lstm(features)
        pooled = lstm_out[:, -1, :]
        return self.classifier(pooled)

# Model with attention but without bidirectional LSTM
class RNNLSTMWithAttention(nn.Module):
    def __init__(self, input_size, hidden_dim, num_classes, num_layers=2, dropout=0.2):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_size, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        
        self.lstm = nn.LSTM(
            hidden_dim, hidden_dim, num_layers=num_layers,
            batch_first=True, bidirectional=False, dropout=dropout
        )
        
        self.attention = nn.MultiheadAttention(hidden_dim, num_heads=4)
        self.layer_norm = nn.LayerNorm(hidden_dim)
        
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        features = self.feature_extractor(x).unsqueeze(1)
        lstm_out, _ = self.lstm(features)
        
        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out)
        lstm_out = self.layer_norm(lstm_out + attn_out)
        pooled = F.adaptive_max_pool1d(lstm_out.transpose(1, 2), 1).squeeze(-1)
        return self.classifier(pooled)

# Model variants for ablation study
def get_model_variants(input_size, hidden_dim, num_classes, train_data=None, train_labels=None, device=None):
    # Basic model variants
    base_variants = {
        'base_model': BaseRNNLSTM(input_size, hidden_dim, num_classes),
        'with_attention': RNNLSTMWithAttention(input_size, hidden_dim, num_classes),
        'with_bidirectional': ImprovedRNNLSTM(input_size, hidden_dim, num_classes, num_layers=2, dropout=0.2),
    }
    
    # Initialize full model
    full_model = ImprovedRNNLSTM(input_size, hidden_dim, num_classes)
    
    # Add standard full model
    variants = {
        **base_variants,
        'full_model': full_model,
    }
    
    # Add federated learning variants if training data is provided
    if train_data is not None and train_labels is not None and device is not None:
        # Split data for federated learning
        split_data = distribute_data(train_data, train_labels)
        
        # Federated Learning setup
        fed_model = ImprovedRNNLSTM(input_size, hidden_dim, num_classes)
        fed_clients = [
            FederatedClient(fed_model, client_data, client_labels, device)
            for client_data, client_labels in split_data
        ]
        variants['federated_model'] = {
            'model': fed_model,
            'clients': fed_clients
        }
        
        # Hierarchical Federated Learning setup
        hier_fed_model = ImprovedRNNLSTM(input_size, hidden_dim, num_classes)
        hier_fed_server = HierarchicalFederatedServer(hier_fed_model)
        hier_fed_clients = [
            FederatedClient(hier_fed_model, client_data, client_labels, device)
            for client_data, client_labels in split_data
        ]
        variants['hierarchical_federated_model'] = {
            'model': hier_fed_model,
            'server': hier_fed_server,
            'clients': hier_fed_clients
        }
    
    return variants

class ModelTrainer:
    def __init__(self, train_loader, test_loader, device):
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device

    def train_standard_model(self, model, model_name, epochs=15):
        """Train a standard (non-federated) model"""
        return self._train_single_model(model, model_name, epochs)

    def train_federated_model(self, model_info, model_name, epochs=15):
        """Train a federated learning model"""
        model = model_info['model']
        clients = model_info['clients']
        
        # Train each client
        for client_id, client in enumerate(clients):
            logging.info(f"{model_name} - Training client {client_id + 1}/{len(clients)}")
            client_state = client.train(epochs=epochs)
            
            # Aggregate client updates by averaging parameters
            if client_id == 0:
                aggregated_state = copy.deepcopy(client_state)
            else:
                for key in aggregated_state:
                    aggregated_state[key] += client_state[key]
        
        # Average the parameters
        for key in aggregated_state:
            aggregated_state[key] /= len(clients)
            
        # Update global model
        model.load_state_dict(aggregated_state)
        return evaluate_model(model, self.test_loader, self.device)

    def train_hierarchical_federated_model(self, model_info, model_name, epochs=15):
        """Train a hierarchical federated learning model"""
        server = model_info['server']
        clients = model_info['clients']
        
        # Train each client and send updates to server
        for client_id, client in enumerate(clients):
            logging.info(f"{model_name} - Training client {client_id + 1}/{len(clients)}")
            client_state = client.train(epochs=epochs)
            server.receive_update(client_state)
        
        # Aggregate updates at server
        server.aggregate_updates()
        return evaluate_model(server.global_model, self.test_loader, self.device)

    def _train_single_model(self, model, model_name, epochs=15):
        """Helper method for training standard models"""
        model = model.to(self.device)
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=0.0005,
            weight_decay=0.02
        )
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=0.0005,
            epochs=epochs,
            steps_per_epoch=len(self.train_loader)
        )
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
        
        best_metrics = None
        best_loss = float('inf')
        
        for epoch in range(epochs):
            model.train()
            epoch_loss = 0
            
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(data.float())
                loss = criterion(outputs, labels)
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 0.5)
                optimizer.step()
                scheduler.step()
                epoch_loss += loss.item()
            
            avg_loss = epoch_loss / len(self.train_loader)
            metrics = evaluate_model(model, self.test_loader, self.device)
            
            if avg_loss < best_loss:
                best_loss = avg_loss
                best_metrics = metrics
            
            logging.info(f'{model_name} - Epoch {epoch + 1}/{epochs}, '
                        f'Loss: {avg_loss:.4f}, '
                        f'Accuracy: {metrics["accuracy"]:.4f}')
        
        return best_metrics

# Updated AblationStudy class to handle federated variants
class AblationStudy:
    def __init__(self, X_train, y_train, X_test, y_test, device):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.device = device
        self.results = defaultdict(dict)
        
        # Create data loaders
        self.train_loader = DataLoader(
            torch.utils.data.TensorDataset(
                torch.from_numpy(X_train), 
                torch.from_numpy(y_train)
            ),
            batch_size=32,
            shuffle=True
        )
        
        self.test_loader = DataLoader(
            torch.utils.data.TensorDataset(
                torch.from_numpy(X_test), 
                torch.from_numpy(y_test)
            ),
            batch_size=64,
            shuffle=False
        )
        
        self.trainer = ModelTrainer(self.train_loader, self.test_loader, device)

    def run_ablation_study(self):
        input_size = self.X_train.shape[2]
        hidden_dim = 128
        num_classes = len(np.unique(self.y_train))
        
        model_variants = get_model_variants(
            input_size, hidden_dim, num_classes,
            self.X_train, self.y_train, self.device
        )
        
        for model_name, model_info in model_variants.items():
            logging.info(f"\nTraining {model_name}")
            
            if model_name in ['base_model', 'with_attention', 'with_bidirectional', 'full_model']:
                metrics = self.trainer.train_standard_model(model_info, model_name)
            elif model_name == 'federated_model':
                metrics = self.trainer.train_federated_model(model_info, model_name)
            elif model_name == 'hierarchical_federated_model':
                metrics = self.trainer.train_hierarchical_federated_model(model_info, model_name)
            
            self.results[model_name] = metrics
        
        self.save_results()
        return self.results

class AblationStudy:
    def __init__(self, X_train, y_train, X_test, y_test, device):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.device = device
        self.results = defaultdict(dict)
        
        # Create data loaders
        self.train_loader = DataLoader(
            torch.utils.data.TensorDataset(
                torch.from_numpy(X_train), 
                torch.from_numpy(y_train)
            ),
            batch_size=32,
            shuffle=True
        )
        
        self.test_loader = DataLoader(
            torch.utils.data.TensorDataset(
                torch.from_numpy(X_test), 
                torch.from_numpy(y_test)
            ),
            batch_size=64,
            shuffle=False
        )

    def train_model(self, model, model_name, epochs=15):
        model = model.to(self.device)
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=0.0005,
            weight_decay=0.02
        )
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=0.0005,
            epochs=epochs,
            steps_per_epoch=len(self.train_loader)
        )
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
        
        best_metrics = None
        best_loss = float('inf')
        
        for epoch in range(epochs):
            model.train()
            epoch_loss = 0
            
            for data, labels in self.train_loader:
                data, labels = data.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(data.float())
                loss = criterion(outputs, labels)
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 0.5)
                optimizer.step()
                scheduler.step()
                epoch_loss += loss.item()
            
            avg_loss = epoch_loss / len(self.train_loader)
            metrics = evaluate_model(model, self.test_loader, self.device)
            
            if avg_loss < best_loss:
                best_loss = avg_loss
                best_metrics = metrics
            
            logging.info(f'{model_name} - Epoch {epoch + 1}/{epochs}, '
                        f'Loss: {avg_loss:.4f}, '
                        f'Accuracy: {metrics["accuracy"]:.4f}')
        
        return best_metrics

    def run_ablation_study(self):
        input_size = self.X_train.shape[2]
        hidden_dim = 128
        num_classes = len(np.unique(self.y_train))
        
        model_variants = get_model_variants(input_size, hidden_dim, num_classes)
        
        for model_name, model in model_variants.items():
            logging.info(f"\nTraining {model_name}")
            metrics = self.train_model(model, model_name)
            self.results[model_name] = metrics
        
        self.save_results()
        return self.results

    def save_results(self):
        # Convert results to serializable format
        results_dict = {
            model_name: {
                metric: float(value) 
                for metric, value in metrics.items()
            }
            for model_name, metrics in self.results.items()
        }
        
        # Add timestamp
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f'ablation_results_{timestamp}.json'
        
        with open(filename, 'w') as f:
            json.dump(results_dict, f, indent=4)
        
        logging.info(f"Results saved to {filename}")
        
        # Create comparison table in log
        logging.info("\nAblation Study Results:")
        logging.info("\nModel\t\tAccuracy\tF1\t\tAUC\t\tAP")
        logging.info("-" * 70)
        
        for model_name, metrics in self.results.items():
            logging.info(
                f"{model_name:<15}"
                f"{metrics['accuracy']:.4f}\t"
                f"{metrics['f1']:.4f}\t"
                f"{metrics['auc']:.4f}\t"
                f"{metrics['ap']:.4f}"
            )

def load_and_preprocess_data():
    data = pd.read_csv('data/S-FFSD.csv', encoding='utf-8')
    
    # Feature engineering
    data['hour_sin'] = np.sin(2 * np.pi * data['Time'] % 24 / 24)
    data['hour_cos'] = np.cos(2 * np.pi * data['Time'] % 24 / 24)
    data['amount_log'] = np.log1p(data['Amount'])
    
    # Define feature columns
    categorical_cols = ['Source', 'Target', 'Location', 'Type']
    numeric_cols = ['Amount', 'Time', 'amount_log', 'hour_sin', 'hour_cos']

    # Process categorical columns
    for col in categorical_cols:
        data[f'{col}_freq'] = data[col].map(data[col].value_counts(normalize=True))
        data[col] = LabelEncoder().fit_transform(data[col])

    # Combine all features
    feature_cols = numeric_cols + categorical_cols + [f'{col}_freq' for col in categorical_cols]
    X = data[feature_cols].values
    y = data['Labels'].values

    # Scale features
    scaler = RobustScaler()
    X = scaler.fit_transform(X)
    X = np.clip(X, -5, 5)  # Clip extreme values
    X = X.reshape(-1, 1, X.shape[1]).astype(np.float32)
    X = np.nan_to_num(X, nan=0.0)  # Handle any NaN values
    
    return X, y


def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for data, labels in data_loader:
            data, labels = data.to(device), labels.to(device)
            outputs = model(data.float())
            probs = F.softmax(outputs, dim=1)
            all_preds.extend(outputs.argmax(dim=1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    all_probs = np.array(all_probs)

    # Metrics computation
    accuracy = (all_preds == all_labels).mean()
    f1 = f1_score(all_labels, all_preds, average='weighted')
    auc = roc_auc_score(all_labels, all_probs, multi_class='ovr', average='weighted')
    
    # Convert labels to one-hot encoding for average precision score
    all_labels_onehot = label_binarize(all_labels, classes=np.unique(all_labels))
    ap = average_precision_score(all_labels_onehot, all_probs, average='weighted')

    return {
        'accuracy': accuracy,
        'f1': f1,
        'auc': auc,
        'ap': ap
    }



def run_ablation_experiment():
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)
    
    # Load and preprocess data
    X, y = load_and_preprocess_data()
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42
    )
    
    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logging.info(f"Using device: {device}")
    
    # Initialize and run ablation study
    study = AblationStudy(X_train, y_train, X_test, y_test, device)
    results = study.run_ablation_study()
    
    return results

if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler('ablation_study.log'),
            logging.StreamHandler()
        ]
    )
    
    results = run_ablation_experiment()
    print("\nAblation study completed. Check ablation_study.log for detailed results.")

2025-02-18 21:52:12,044 - INFO - NumExpr defaulting to 8 threads.
2025-02-18 21:52:12,263 - INFO - Using device: cpu
2025-02-18 21:52:12,296 - INFO - 
Training base_model
2025-02-18 21:52:29,083 - INFO - base_model - Epoch 1/15, Loss: 0.9093, Accuracy: 0.7525
2025-02-18 21:52:46,077 - INFO - base_model - Epoch 2/15, Loss: 0.6583, Accuracy: 0.8577
2025-02-18 21:53:03,672 - INFO - base_model - Epoch 3/15, Loss: 0.5764, Accuracy: 0.8687
2025-02-18 21:53:21,598 - INFO - base_model - Epoch 4/15, Loss: 0.5403, Accuracy: 0.8808
2025-02-18 21:53:39,222 - INFO - base_model - Epoch 5/15, Loss: 0.5212, Accuracy: 0.8873
2025-02-18 21:53:55,980 - INFO - base_model - Epoch 6/15, Loss: 0.5075, Accuracy: 0.8916
2025-02-18 21:54:12,808 - INFO - base_model - Epoch 7/15, Loss: 0.4993, Accuracy: 0.8968
2025-02-18 21:54:29,557 - INFO - base_model - Epoch 8/15, Loss: 0.4945, Accuracy: 0.8966
2025-02-18 21:54:46,181 - INFO - base_model - Epoch 9/15, Loss: 0.4898, Accuracy: 0.9009
2025-02-18 21:55:02,782 - IN


Ablation study completed. Check ablation_study.log for detailed results.
