In [None]:
# Federated Learning for Lung Disease Classification Using Custom Models and FedProx

This notebook demonstrates setting up a federated learning system using TensorFlow Federated (TFF) to classify lung diseases with a complex architecture involving hierarchical graph convolutional networks (HGCN) and Deepen-ShuffleNet for feature extraction, and CatBoost for classification, utilizing the FedProx algorithm for training across decentralized clients.

## Imports and Setup

```python
!pip install tensorflow-federated
!pip install catboost
!pip install spektral

import tensorflow as tf
import tensorflow_federated as tff
import numpy as np
import os
from catboost import CatBoostClassifier
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, GlobalAveragePooling2D, Conv2D
from spektral.layers import GraphConv

tff.backends.native.set_local_execution_context()


In [None]:
def HGCN(input_shape, num_classes):
    X_in = Input(shape=input_shape)
    graph_conv1 = GraphConv(64, activation='relu')([X_in, X_in])
    graph_conv2 = GraphConv(64, activation='relu')([graph_conv1, X_in])
    flatten = Flatten()(graph_conv2)
    dense = Dense(128, activation='relu')(flatten)
    output = Dense(num_classes, activation='softmax')(dense)
    model = Model(inputs=X_in, outputs=output)
    return model

def DeepenShuffleNet(input_shape, num_classes):
    base_model = ShuffleNetV2(include_top=False, input_shape=input_shape, weights='imagenet')
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model


In [None]:
def create_combined_model():
    # Assuming these functions return TensorFlow models ready for training
    hgcn_model = HGCN((128, 128, 3), 8)  # Placeholder function
    deepen_model = DeepenShuffleNet((128, 128, 3), 8)  # Placeholder function

    # This is an abstraction. In practice, you'll need to ensure these models can be combined appropriately and their outputs can be concatenated or otherwise combined to feed into CatBoost.
    def model_fn():
        return tff.learning.from_keras_model(
            keras_model=combined_model,  # This would be your actual implementation
            input_spec=input_spec,
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    return model_fn

In [None]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if img_path.endswith(".png"):
            img = load_img(img_path, target_size=(128, 128, 3))
            img_array = img_to_array(img)
            images.append(img_array)
            labels.append(folder.split('/')[-1])
    return np.array(images), labels


In [None]:
def feature_extraction(images, model_func, num_classes):
    model = model_func(images[0].shape, num_classes)
    features = model.predict(images)
    return features


In [None]:
def build_fedprox_optimizer(mu=0.01):
    def optimizer_fn():
        return tff.learning.optimizers.build_sgdm(learning_rate=0.02, momentum=0.9, proximal_coeff=mu)
    return optimizer_fn

iterative_process = tff.learning.build_federated_averaging_process(
    model_fn=create_combined_model(),
    client_optimizer_fn=build_fedprox_optimizer(mu=0.01),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))


In [None]:
state = iterative_process.initialize()
num_rounds = 10
for round_num in range(1, num_rounds + 1):
    state, metrics = iterative_process.next(state, federated_train_data)  # federated_train_data needs to be prepared
    print('Round {:2d}, Metrics: {}'.format(round_num, metrics))


In [None]:
def calculate_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    recall = tp / (tp + fn) if (tp + fn) != 0 else 0
    fpr = fp / (fp + tn) if (fp + tn) != 0 else 0
    precision = tp / (tp + fp) if (tp + fp) != 0 else 0
    f1 = f1_score(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    return {
        "Recall": recall,
        "False Positive Rate": fpr,
        "Precision": precision,
        "F1 Score": f1,
        "Cohen's Kappa": kappa,
        "Matthews Correlation Coefficient": mcc
    }


In [None]:
# Simulate data distribution among clients
def client_data_simulation(disease, augmented_path):
    folder_path = os.path.join(augmented_path, disease)
    images, labels = load_images_from_folder(folder_path)
    return images, labels

# Example model function for TFF
def create_tff_model():
    # Input layer for image features
    inputs = Input(shape=(128, 128, 3))  # Adjust the input shape based on your actual data

    # Let's say each disease contributes one client's model
    # Feature extraction can be mocked by simple layers, you would put your actual model architecture here
    x = Flatten()(inputs)
    outputs = Dense(8, activation='softmax')(x)  # Assuming 8 classes corresponding to the diseases

    model = Model(inputs=inputs, outputs=outputs)
    return tff.learning.from_keras_model(
        model,
        input_spec=(tf.TensorSpec(shape=[None, 128, 128, 3], dtype=tf.float32), tf.TensorSpec(shape=[None], dtype=tf.int32)),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
    )

# Define the federated computation
def federated_model_fn():
    return create_tff_model()

# FedProx optimizer
def build_fedprox_optimizer(mu=0.01):
    def optimizer_fn():
        return tff.learning.optimizers.build_sgdm(learning_rate=0.02, momentum=0.9, proximal_coeff=mu)
    return optimizer_fn

In [None]:
def main():
    diseases = ['Healthy', 'COPD', 'Asthma', 'Pneumonia', 'URTI', 'Bronchiectasis', 'Bronchiolitis', 'LRTI']
    augmented_path = 'c://sampledata//augmented'

    # Federated data simulation
    client_datasets = [client_data_simulation(disease, augmented_path) for disease in diseases]

    # Setting up the federated training process
    iterative_process = tff.learning.build_federated_averaging_process(
        model_fn=federated_model_fn,
        client_optimizer_fn=build_fedprox_optimizer(mu=0.01),
        server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0)
    )

    state = iterative_process.initialize()

    # Assuming one round of training per disease
    for round_num, client_data in enumerate(client_datasets, 1):
        state, metrics = iterative_process.next(state, [client_data])
        print(f'Round {round_num}, Metrics: {metrics}')

if __name__ == "__main__":
    main()