In [None]:
import tensorflow as tf
import tensorflow_federated as tff
import tensorflow.keras as keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Tuple, Dict

print(f"TensorFlow version: {tf.__version__}")
print(f"TFF version: {tff.__version__}")

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

---

## Section 1: Federated Learning Fundamentals

### Traditional vs. Federated Learning

**Centralized Learning:**
- All data collected to central server
- Train on complete dataset
- Privacy concerns: data exposure
- Communication: One-way (download model)

**Federated Learning:**
- Data stays on client devices
- Train locally on each client
- Aggregate updates at server
- Privacy: Only model updates shared
- Communication: Model gradients exchanged

### Federated Averaging (FedAvg) Algorithm

1. **Server initialization:** Send initial model to all clients
2. **Client training:** Each client trains locally on own data
3. **Model aggregation:** Server averages weight updates
4. **Update distribution:** Send aggregated model back to clients
5. **Repeat:** Multiple communication rounds

### Key Challenges
- **Non-IID Data:** Clients have different data distributions
- **Communication Cost:** Limited bandwidth between clients and server
- **Privacy:** Protecting individual client data
- **Convergence:** Proving theoretical convergence guarantees

In [None]:
# Simulate federated learning dataset
def generate_federated_dataset(n_clients: int = 10, samples_per_client: int = 100,
                              feature_dim: int = 20, n_classes: int = 3,
                              non_iid: bool = True):
    """
    Generate federated learning dataset.
    
    Args:
        n_clients: Number of clients
        samples_per_client: Samples per client
        feature_dim: Feature dimension
        n_classes: Number of classes
        non_iid: Whether data is non-IID across clients
    
    Returns:
        List of (X, y) tuples for each client
    """
    client_data = []
    
    if non_iid:
        # Non-IID: Each client specializes in 1-2 classes
        for client_id in range(n_clients):
            # Client specializes in specific classes
            assigned_classes = [
                client_id % n_classes,
                (client_id + 1) % n_classes
            ]
            
            X = np.random.randn(samples_per_client, feature_dim)
            y = np.random.choice(assigned_classes, samples_per_client)
            
            # Add class-specific signal
            for class_id in assigned_classes:
                mask = y == class_id
                X[mask] += class_id * 2
            
            client_data.append((X.astype(np.float32), y.astype(np.int32)))
    else:
        # IID: Random samples for each client
        for _ in range(n_clients):
            X = np.random.randn(samples_per_client, feature_dim).astype(np.float32)
            y = np.random.randint(0, n_classes, samples_per_client).astype(np.int32)
            client_data.append((X, y))
    
    return client_data

# Generate federated dataset
n_clients = 10
client_data = generate_federated_dataset(n_clients=n_clients, non_iid=True)

print(f"‚úÖ Federated Dataset Generated")
print(f"Number of clients: {n_clients}")
print(f"Samples per client: {len(client_data[0][0])}")
print(f"Feature dimension: {client_data[0][0].shape[1]}")

---

## Section 2: Centralized Model Baseline

In [None]:
def create_keras_model(feature_dim: int = 20, n_classes: int = 3):
    """Create a simple Keras model."""
    return keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=(feature_dim,)),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(32, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(n_classes, activation='softmax')
    ])

# Centralized training baseline
print("üöÄ Training Centralized Model...\n")

# Combine all client data
all_X = np.concatenate([X for X, _ in client_data])
all_y = np.concatenate([y for _, y in client_data])

# Create and train model
central_model = create_keras_model()
central_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
)

central_history = central_model.fit(
    all_X, all_y,
    epochs=20,
    batch_size=32,
    validation_split=0.2,
    verbose=0
)

central_val_acc = central_history.history['val_sparse_categorical_accuracy'][-1]
print(f"\n‚úÖ Centralized Model Training Complete")
print(f"Final validation accuracy: {central_val_acc:.4f}")

---

## Section 3: Federated Learning Implementation

In [None]:
# Create federated datasets
def create_tf_dataset(X, y, batch_size=20):
    """Convert numpy arrays to tf.data.Dataset."""
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    return dataset.batch(batch_size).repeat()

# Create federated data for TFF
federated_train_data = [
    create_tf_dataset(X, y, batch_size=20)
    for X, y in client_data
]

print(f"‚úÖ Federated datasets created")
print(f"Number of client datasets: {len(federated_train_data)}")

In [None]:
# Federated model simulation (simplified without full TFF)
class FederatedLearner:
    """Simplified Federated Learning implementation."""
    
    def __init__(self, n_clients: int, model_fn):
        self.n_clients = n_clients
        self.model_fn = model_fn
        self.global_model = model_fn()
        self.global_model.compile(
            optimizer=keras.optimizers.SGD(learning_rate=0.01),
            loss=keras.losses.SparseCategoricalCrossentropy(),
            metrics=[keras.metrics.SparseCategoricalAccuracy()]
        )
    
    def client_update(self, client_model, X, y, epochs=1):
        """
        Perform local training on client.
        
        Returns:
            Updated weights
        """
        client_model.fit(X, y, epochs=epochs, verbose=0)
        return client_model.get_weights()
    
    def aggregate_weights(self, client_weights_list):
        """
        Average weights across clients.
        
        Returns:
            Aggregated weights
        """
        # Federated averaging
        n_layers = len(client_weights_list[0])
        aggregated_weights = []
        
        for layer_idx in range(n_layers):
            layer_weights = [w[layer_idx] for w in client_weights_list]
            avg_weight = np.mean(layer_weights, axis=0)
            aggregated_weights.append(avg_weight)
        
        return aggregated_weights
    
    def train_round(self, client_data_list, epochs=1):
        """
        Execute one federated learning round.
        """
        client_weights_list = []
        
        # Local training on each client
        for client_idx in range(self.n_clients):
            # Create local copy of global model
            client_model = self.model_fn()
            client_model.set_weights(self.global_model.get_weights())
            client_model.compile(
                optimizer=keras.optimizers.SGD(learning_rate=0.01),
                loss=keras.losses.SparseCategoricalCrossentropy(),
                metrics=[keras.metrics.SparseCategoricalAccuracy()]
            )
            
            # Local training
            X, y = client_data_list[client_idx]
            weights = self.client_update(client_model, X, y, epochs=epochs)
            client_weights_list.append(weights)
        
        # Aggregate weights
        aggregated_weights = self.aggregate_weights(client_weights_list)
        self.global_model.set_weights(aggregated_weights)
    
    def evaluate(self, X_test, y_test):
        """Evaluate global model."""
        loss, accuracy = self.global_model.evaluate(X_test, y_test, verbose=0)
        return accuracy

print("‚úÖ FederatedLearner class defined")

In [None]:
# Train federated model
print("üöÄ Training Federated Model...\n")

fl = FederatedLearner(n_clients=n_clients, model_fn=lambda: create_keras_model())

# Generate test set
X_test = np.random.randn(500, 20).astype(np.float32)
y_test = np.random.randint(0, 3, 500).astype(np.int32)
# Add signal
for i in range(500):
    X_test[i] += y_test[i] * 2

federated_accuracies = []
centralized_accuracies = []

# Federated training rounds
for round_num in range(20):
    # Federated training
    fl.train_round(client_data, epochs=1)
    fed_acc = fl.evaluate(X_test, y_test)
    federated_accuracies.append(fed_acc)
    
    # Centralized model for comparison
    central_model.fit(all_X, all_y, epochs=1, verbose=0)
    cent_acc = central_model.evaluate(X_test, y_test, verbose=0)[1]
    centralized_accuracies.append(cent_acc)
    
    if (round_num + 1) % 5 == 0:
        print(f"Round {round_num + 1}/20 | Federated: {fed_acc:.4f} | Centralized: {cent_acc:.4f}")

print(f"\n‚úÖ Federated Learning Complete")
print(f"Final federated accuracy: {federated_accuracies[-1]:.4f}")
print(f"Final centralized accuracy: {centralized_accuracies[-1]:.4f}")

---

## Section 4: Privacy-Preserving Learning with Differential Privacy

In [None]:
class DifferentiallyPrivateFL:
    """Federated Learning with Differential Privacy."""
    
    def __init__(self, n_clients: int, model_fn, noise_multiplier: float = 0.5):
        self.n_clients = n_clients
        self.model_fn = model_fn
        self.noise_multiplier = noise_multiplier
        self.global_model = model_fn()
        self.global_model.compile(
            optimizer=keras.optimizers.SGD(learning_rate=0.01),
            loss=keras.losses.SparseCategoricalCrossentropy(),
            metrics=[keras.metrics.SparseCategoricalAccuracy()]
        )
    
    def add_gaussian_noise(self, weights, noise_scale: float):
        """
        Add Gaussian noise to model weights for differential privacy.
        """
        noisy_weights = []
        for w in weights:
            noise = np.random.normal(0, noise_scale, w.shape)
            noisy_weights.append(w + noise)
        return noisy_weights
    
    def clip_and_aggregate(self, client_weights_list, clip_norm: float = 1.0):
        """
        Clip gradients and aggregate with noise for DP.
        """
        # Compute weight differences (gradients)
        reference_weights = self.global_model.get_weights()
        
        clipped_updates = []
        for client_weights in client_weights_list:
            update = []
            total_norm = 0
            
            # Compute L2 norm
            for ref_w, client_w in zip(reference_weights, client_weights):
                diff = client_w - ref_w
                total_norm += np.sum(diff ** 2)
            total_norm = np.sqrt(total_norm)
            
            # Clip updates
            clip_factor = min(1.0, clip_norm / (total_norm + 1e-10))
            
            for ref_w, client_w in zip(reference_weights, client_weights):
                diff = client_w - ref_w
                clipped_diff = diff * clip_factor
                update.append(clipped_diff)
            
            clipped_updates.append(update)
        
        # Average clipped updates
        avg_update = []
        for layer_idx in range(len(reference_weights)):
            layer_updates = [u[layer_idx] for u in clipped_updates]
            avg_layer_update = np.mean(layer_updates, axis=0)
            avg_update.append(avg_layer_update)
        
        # Add Gaussian noise
        noise_scale = self.noise_multiplier * clip_norm
        noisy_update = self.add_gaussian_noise(avg_update, noise_scale)
        
        # Apply update
        new_weights = [
            ref_w + noisy_u
            for ref_w, noisy_u in zip(reference_weights, noisy_update)
        ]
        
        return new_weights
    
    def train_round(self, client_data_list, epochs=1):
        """Execute DP-FL training round."""
        client_weights_list = []
        
        for client_idx in range(self.n_clients):
            client_model = self.model_fn()
            client_model.set_weights(self.global_model.get_weights())
            client_model.compile(
                optimizer=keras.optimizers.SGD(learning_rate=0.01),
                loss=keras.losses.SparseCategoricalCrossentropy(),
                metrics=[keras.metrics.SparseCategoricalAccuracy()]
            )
            
            X, y = client_data_list[client_idx]
            client_model.fit(X, y, epochs=epochs, verbose=0)
            client_weights_list.append(client_model.get_weights())
        
        # Clip, aggregate, and add noise
        aggregated_weights = self.clip_and_aggregate(client_weights_list, clip_norm=1.0)
        self.global_model.set_weights(aggregated_weights)
    
    def evaluate(self, X_test, y_test):
        """Evaluate model."""
        loss, accuracy = self.global_model.evaluate(X_test, y_test, verbose=0)
        return accuracy

print("‚úÖ Differentially Private FL class defined")

In [None]:
# Train DP-FL model
print("üöÄ Training Differentially Private FL Model...\n")

dp_fl = DifferentiallyPrivateFL(
    n_clients=n_clients,
    model_fn=lambda: create_keras_model(),
    noise_multiplier=0.1
)

dp_fl_accuracies = []

for round_num in range(20):
    dp_fl.train_round(client_data, epochs=1)
    acc = dp_fl.evaluate(X_test, y_test)
    dp_fl_accuracies.append(acc)
    
    if (round_num + 1) % 5 == 0:
        print(f"Round {round_num + 1}/20 | DP-FL Accuracy: {acc:.4f}")

print(f"\n‚úÖ DP-FL Training Complete")
print(f"Final DP-FL accuracy: {dp_fl_accuracies[-1]:.4f}")

---

## Section 5: Comparison & Analysis

In [None]:
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Convergence comparison
ax = axes[0]
ax.plot(federated_accuracies, label='Federated Learning', linewidth=2, marker='o')
ax.plot(centralized_accuracies, label='Centralized Learning', linewidth=2, marker='s')
ax.plot(dp_fl_accuracies, label='DP-Federated Learning', linewidth=2, marker='^')
ax.set_xlabel('Communication Round')
ax.set_ylabel('Accuracy')
ax.set_title('Learning Convergence Comparison')
ax.legend()
ax.grid(True, alpha=0.3)

# Final performance
ax = axes[1]
methods = ['Centralized', 'Federated', 'DP-Federated']
accuracies = [
    centralized_accuracies[-1],
    federated_accuracies[-1],
    dp_fl_accuracies[-1]
]
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1']

bars = ax.bar(methods, accuracies, color=colors, alpha=0.7, edgecolor='black')
ax.set_ylabel('Final Accuracy')
ax.set_title('Final Model Performance')
ax.set_ylim([0, 1])

# Add value labels
for bar, acc in zip(bars, accuracies):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{acc:.4f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

---

## Section 6: Key Takeaways

### Federated Learning Advantages
1. **Privacy:** Raw data never leaves client devices
2. **Communication Efficiency:** Only model updates transmitted
3. **Personalization:** Models can be tailored to local data
4. **Decentralization:** No single point of failure
5. **Regulatory Compliance:** Easier to meet privacy regulations

### Challenges & Solutions

| Challenge | Solution |
|-----------|----------|
| **Non-IID Data** | Use adaptive learning rates, local epochs |
| **Communication Cost** | Quantization, compression, selective updates |
| **Privacy** | Differential privacy, secure aggregation |
| **System Heterogeneity** | Client selection, asynchronous updates |

### Differential Privacy
- **Mechanism:** Add noise to gradients
- **Privacy Budget (Œµ):** Lower = stronger privacy, higher = better accuracy
- **Gradient Clipping:** Bound sensitivity of updates
- **Noise Multiplier:** Controls noise magnitude

### Practical Applications
- Mobile keyboard prediction (Gboard)
- Medical data analysis
- Edge device learning
- Personalized recommendation systems
- Collaborative learning across organizations

In [None]:
print("""
üìö Federated Learning - Summary
================================

‚úÖ Topics Covered:
  ‚Ä¢ Federated learning fundamentals
  ‚Ä¢ FedAvg algorithm and implementation
  ‚Ä¢ Centralized vs. federated comparison
  ‚Ä¢ Non-IID data challenges
  ‚Ä¢ Differential privacy integration
  ‚Ä¢ Gradient clipping and noise addition
  ‚Ä¢ Privacy-accuracy tradeoffs

üí° Key Insights:
  ‚Ä¢ FL enables privacy-preserving collaborative learning
  ‚Ä¢ Federated models can match centralized performance
  ‚Ä¢ Privacy and utility have inherent tradeoffs
  ‚Ä¢ Communication efficiency is critical
  ‚Ä¢ Non-IID data requires careful algorithm design

üéØ Next Steps:
  1. Implement client sampling for heterogeneous systems
  2. Add model compression (quantization, pruning)
  3. Experiment with asynchronous aggregation
  4. Analyze privacy guarantees formally
  5. Apply to real federated datasets (FEMNIST, etc.)

üîê Privacy Considerations:
  ‚Ä¢ Even aggregated updates can leak information
  ‚Ä¢ Differential privacy provides formal guarantees
  ‚Ä¢ Privacy-utility tradeoff is fundamental
  ‚Ä¢ Regular audits recommended for sensitive applications
""")