#### Environment

In [1]:
!pip show tensorflow-federated
import sys
print(sys.executable)  # Prints the path to the Python executable being used

Name: tensorflow-federated
Version: 0.17.0
Summary: TensorFlow Federated is an open-source federated learning framework.
Home-page: http://tensorflow.org/federated
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: c:\users\flc\anaconda3\envs\federated\lib\site-packages
Requires: absl-py, attrs, cachetools, dm-tree, grpcio, h5py, numpy, portpicker, retrying, semantic-version, tensorflow, tensorflow-addons, tensorflow-model-optimization, tensorflow-privacy
Required-by: 
c:\Users\flc\Anaconda3\envs\federated\python.exe


In [5]:
import collections
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model

tff.backends.reference.set_reference_context()

class CustomAdam(tf.keras.optimizers.Adam):
    def __init__(self, name="CustomAdam", **kwargs):
        super(CustomAdam, self).__init__(name=name, **kwargs)

def load_models(model_paths):
    """Load all models from given paths, recognizing custom optimizer."""
    return [load_model(path, custom_objects={'CustomAdam': CustomAdam}) for path in model_paths]

def create_federated_data(models, data):
    """Simulate federated data setup, assuming equal distribution of data across models."""
    client_data = collections.OrderedDict()
    for idx in range(len(models)):
        client_data[str(idx)] = data  # Simulate data partitioning
    return tff.simulation.FromTensorSlicesClientData(client_data)

def create_federated_model(model, example_data):
    """Wrap a Keras model for use with TFF."""
    return tff.learning.from_keras_model(
        model,
        input_spec=example_data.element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

def federated_training(federated_data, model, num_rounds=5):
    """Train using Federated Averaging."""
    def model_fn():
        return create_federated_model(model)
    
    iterative_process = tff.learning.build_federated_averaging_process(
        model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02))
    state = iterative_process.initialize()
    
    for _ in range(num_rounds):
        state, metrics = iterative_process.next(state, federated_data)
        print(f'Round {_}, Metrics: {metrics}')
    
    return tff.learning.keras_utils.from_tff_to_keras(state.model)

def gram_matrix(X):
    """Calculate the Gram matrix from layer activations."""
    n = X.shape[0]
    X = X - X.mean(axis=0)
    return X @ X.T / n

def cka(G, H):
    """Compute the CKA metric."""
    return np.trace(G @ H) / np.sqrt(np.trace(G @ G) * np.trace(H @ H))

def compute_cka(model1, model2, data):
    """Compute the CKA between layers of two models using data."""
    intermediate_model1 = tf.keras.Model(inputs=model1.input, outputs=[layer.output for layer in model1.layers])
    intermediate_model2 = tf.keras.Model(inputs=model2.input, outputs=[layer.output for layer in model2.layers])
    activations1 = intermediate_model1(data)
    activations2 = intermediate_model2(data)
    return np.mean([cka(gram_matrix(np.array(act1)), gram_matrix(np.array(act2))) for act1, act2 in zip(activations1, activations2)])

def compute_full_cka_matrix(models, data):
    """Compute the full CKA matrix for a list of models."""
    n = len(models)
    cka_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(i, n):
            if i == j:
                cka_matrix[i, j] = 1.0
            else:
                cka_matrix[i, j] = cka_matrix[j, i] = compute_cka(models[i], models[j], data)
    return cka_matrix

def plot_cka_heatmap(cka_matrix, model_names):
    """Plot a CKA similarity heatmap."""
    sns.set(style="white")
    plt.figure(figsize=(10, 8))
    ax = sns.heatmap(cka_matrix, annot=False, fmt=".2f", cmap="coolwarm_r",
                     xticklabels=[int(item[0])+1 if item else '' for item in model_names],
                     yticklabels=[int(item[0])+1 if item else '' for item in model_names],
                     square=True, linewidths=.5, cbar_kws={"shrink": .8})
    ax.set_xlabel('Orbital Planes')
    ax.set_ylabel('Orbital Planes')
    plt.savefig('./FL/CKA.png', bbox_inches='tight', dpi=500)
    plt.show()

def plot_cka_cdf(cka_matrix):
    """Plot the cumulative distribution function (CDF) of the CKA values."""
    sns.set(style="whitegrid")
    plt.figure(figsize=(8, 6))
    cka_values = cka_matrix[np.triu_indices_from(cka_matrix, k=1)]
    sorted_values = np.sort(cka_values)
    plt.step(sorted_values, np.linspace(0, 1, len(sorted_values), endpoint=False), where='post')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.xlabel('CKA Value')
    plt.ylabel('CDF')
    plt.savefig('./FL/CKA_CDF.png', bbox_inches='tight', dpi=500)
    plt.show()

def perform_cka_analysis(model_paths, data):
    models = load_models(model_paths)
    cka_matrix_before = compute_full_cka_matrix(models, data)

    # Federated training
    federated_data = create_federated_data(models, data)
    trained_model = federated_training(federated_data, models[0])
    models_after_fl = [trained_model for _ in models]  # Simulate all models updated
    cka_matrix_after = compute_full_cka_matrix(models_after_fl, data)

    # Extract model identifiers from paths
    model_names = [path.split('/')[-1].split('qNetwork')[0].rstrip('_') for path in model_paths]

    plot_cka_heatmap(cka_matrix_before, model_names)
    plot_cka_heatmap(cka_matrix_after, model_names)
    plot_cka_cdf(cka_matrix_before)
    plot_cka_cdf(cka_matrix_after)

perform_cka_analysis(model_paths, test_data)


ValueError: Unknown optimizer: Custom>Adam

In [2]:
# Divergencia de modelos
# Fedavg post prodcesado sacar nueva cka
# cdf: Nothing, model anticipation, plane FL, global FL
# average del plano y average total
# simulacion despues de fedavg check capabilities
# fed CDF

import os
import glob
import numpy as np

def generate_test_data(num_samples, include_not_avail=False):
    data = []
    queue_values = np.arange(0, 11)  # Possible queue values from 0 to 10
    # Set probabilities: 0 at 35%, 10 at 20%, and 5% each for values 1-9
    queue_probs = [0.35] + [0.05] * 9 + [0.20]

    for _ in range(num_samples):
        sample = []
        # Queue Scores for each direction: Up, Down, Right, Left (4 scores each)
        for _ in range(4):
            # Queue scores biased towards 0 and 10
            sample.extend(np.random.choice(queue_values, 4, p=queue_probs))
            
            # Relative positions for each direction: latitude and longitude
            sample.append(np.random.uniform(-2, 2))  # Latitude relative position
            sample.append(np.random.uniform(-2, 2))  # Longitude relative position
        
        # Absolute positions
        sample.append(np.random.uniform(0, 9))  # Absolute latitude normalized
        sample.append(np.random.uniform(0, 18))  # Absolute longitude normalized
        
        # Destination differential coordinates
        sample.append(np.random.uniform(-2, 2))  # Destination differential latitude
        sample.append(np.random.uniform(-2, 2))  # Destination differential longitude
        
        # Optionally include not available values
        if include_not_avail and np.random.rand() < 0.1:  # 10% chance to introduce a -1 value
            idx_to_replace = np.random.choice(len(sample), int(0.1 * len(sample)), replace=False)
            sample[idx_to_replace] = -1
        
        data.append(sample)
    
    return np.array(data)

def get_model_paths(folder, nGTs):
    # Create an empty list to hold all file paths
    model_files = []
    
    # Iterate over the range of first and second digit as specified
    for i in range(7):  # from 0 to 6
        for j in range(20):  # from 0 to 19
            # Construct the pattern for each file
            pattern = os.path.join(folder, f"{i}_{j}qNetwork_{nGTs}GTs.h5")
            # Use glob to find files matching the current pattern
            files = glob.glob(pattern)
            # Add found files to the list
            model_files.extend(files)
    
    # Sort the files to maintain a consistent order
    # model_files.sort()
    
    return model_files

# Model paths
nGTs = 8
secs = 1
con  = 'con'    # ['nocon' (no congestion), 'con' (congestion)]
folder_path = f'./NNs_{nGTs}GTs_{secs}s_{con}/'
model_paths = get_model_paths(folder_path, nGTs)
print(model_paths)

# Generate dataset
num_samples = 100  # Number of state vectors to generate
test_data = generate_test_data(num_samples)

print("Sample data shape:", test_data.shape)
print("Sample data:", test_data[:5])  # Print the first 5 samples to check


['./NNs_8GTs_1s_con/0_0qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_1qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_2qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_3qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_4qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_5qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_6qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_7qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_8qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_9qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_10qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_11qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_12qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_13qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_14qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_15qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_16qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_17qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_18qNetwork_8GTs.h5', './NNs_8GTs_1s_con/0_19qNetwork_8GTs.h5', './NNs_8GTs_1s_con/1_0qNetwork_8GTs.h5', './NNs_8GTs_1s_con/1_1qNetwork_8GTs.h5', './NNs_8GTs_1s_con/1_2qNetwork_8GTs.h5', './NNs_8GTs_1s_con/1_3qNetwork_8GTs.h5', './NN