In [None]:
# Install the libraries
!pip install sentence_transformers
!pip install pandas
!pip install numpy
!pip install scikit-learn
!pip install matplotlib

Collecting sentence_transformers
  Downloading sentence_transformers-3.2.0-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.2.0-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.2/255.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence_transformers
Successfully installed sentence_transformers-3.2.0


In [None]:
# Import the packages
import matplotlib.pyplot as plt
import pandas as pd
import os
import json
from sklearn.manifold import TSNE
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import cross_validate
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import AUC
from tensorflow import keras
from tensorflow.keras import layers


In [None]:
# Load the embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Load the datasets and create embeddings
file_path = "IU-GroundTruth.csv"
report_column = 'Report'
df = pd.read_csv(file_path)
allReports = df[report_column]
allEmbeddings = [];

for report in allReports:
    response = model.encode(report)
    allEmbeddings.append(response)

label_columns = ['Enlarged Cardiom.','Cardiomegaly','Lung Lesion','Lung Opacity','Edema','Consolidation','Pneumonia','Atelectasis','Pneumothorax','Pleural Effusion','Pleural Other','Fracture','Support Devices']
labels = df[label_columns].values

allEmbeddings = np.array(allEmbeddings)
labels = np.array(labels)

# Federated Learning with Surgical Aggregation

## Setup

In [None]:
# Function to create an MLP classifier
def create_mlp_model(input_dim, output_dim):
    model = Sequential()

    inputs = keras.Input(shape=(input_dim,))
    x = layers.Dense(512, activation="relu")(inputs)
    x = layers.Dropout(0.2)(x) # optional dropout
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.2)(x) # optional dropout
    outputs = layers.Dense(output_dim, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)



    # Compile model
    model.compile(optimizer=Adam(),
                  loss=BinaryCrossentropy(),
                  metrics=[AUC(name='auc')])  # Use AUC as a metric

    return model

In [None]:
# Surgical Aggregation

def surgical_aggregation(nodes, max_label, global_model):

  """
  This function will surgically aggregate the last layer
  with the assumption that all the other layers
  are part of the representation block.

  Args:
    nodes: list of nodes with their models and label indices
    max_label: maximum number of labels / maximum label indice
    global_model: global model to update

  """

  # Extract the models and label indices from each node
  models = [node['classifier'] for node in nodes]
  labels = [node['label_names'] for node in nodes]

  # Identify the common and unique classes across all the models

  # Create a global final layer with all the labels
  lw = models[0].layers[-1].get_weights() # Get the last layer from one of the models
  row_shape = lw[0].shape[0] # Get the shape of the last layer
  final_layer_w = np.zeros((lw[0].shape[0],max_label+1)) # Use the last layer's shape to initialize the final layer for the global model
  final_layer_b = np.zeros(max_label+1) # Same for the bias

  node_count = np.zeros(max_label+1) # Number of contributing to each label



  num_layers = len(models[0].layers) # Get the number of layers
  for iter,layer in enumerate(models[0].layers): # iteratre through all the layers

    if isinstance(layer, tf.keras.layers.Dense): # Check if the layer type is a dense layer

      if iter == num_layers-1: # Check if this is the final layer (for surgical aggregation)

        # Surgically Aggregate the final layer


        node_count = np.zeros(max_label+1) # Initialize an array to count the number of nodes contributing to each label
        for iter1,model in enumerate(models): # iterate through all the models
          w = (model.layers[iter].get_weights()) # get the weights for the last layer
          w_0 = w[0] # get the weight matrix
          w_1 = w[1] # get the bias array

          for l_iter,l in enumerate(labels[iter1]): #iterate through all the labels in this model

            final_layer_w[:,l] += w_0[:,l_iter] # FedAvg on each label this model is contributing to
            final_layer_b[l] += w_1[l_iter]
            node_count[l] += 1

        for l in range(max_label+1):
          if node_count[l] > 0: # if this label has atleast one contributor
            final_layer_w[:,l] /= node_count[l] # perform fedavg
            final_layer_b[l] /= node_count[l] # perform fedavg

        for iter1,model in enumerate(models):
          fw = final_layer_w[:,labels[iter1]] # reassign the weights to each model
          fb = final_layer_b[labels[iter1]]
          model.layers[iter].set_weights([fw,fb])
        global_model.layers[iter].set_weights([final_layer_w,final_layer_b]) # assign the weights to the global model

      else:
        # Normal FedAvg for all other layers
        d_w = [] # for weights
        d_b = [] # for bias
        for model in models:
          d1 = model.layers[iter].get_weights()
          d_w.append(d1[0])
          d_b.append(d1[1])

        new_w = np.mean(d_w,axis=0)
        new_b = np.mean(d_b,axis=0)
        new_d = [new_w,new_b]

        for model in models:
          model.layers[iter].set_weights(new_d)
        global_model.layers[iter].set_weights(new_d)

  # Reassign the models to nodes
  for iter,node in enumerate(nodes):
    node['classifier'] = models[iter]

  return global_model, nodes

## Experiment

In [88]:
# Let's create the datasets for FL
X_train, X_test, y_train, y_test = train_test_split(allEmbeddings, labels, test_size=0.2, random_state=42)

# Input parameters
n_nodes = 3
labels_shared = 0

# Initialize the sample and label indices
sample_indices = np.arange(0, X_train.shape[0])
label_indices = np.arange(0, y_train.shape[1])

# Let's shuffle the indices
np.random.shuffle(label_indices)
np.random.shuffle(sample_indices)

# Picking the shared labels
if labels_shared > 0:
  shared_labels = label_indices[:labels_shared]
  label_indices = label_indices[labels_shared:]
else:
  shared_labels = []



# Split the remaining labels into different nodes.
label_indices_per_node = np.array_split(label_indices, n_nodes)


# Split the dataset into different nodes
sample_indices_per_node = np.array_split(sample_indices, n_nodes)

# Let's create the nodes. Each node will store a dictionary of X_train, y_train,
# and label_names
nodes = []
for i in range(n_nodes):
  node = {}
  node['X_train'] = X_train[sample_indices_per_node[i]]
  if len(shared_labels) > 0:
    node['y_train'] = y_train[np.ix_(sample_indices_per_node[i],np.concatenate((shared_labels,label_indices_per_node[i])))]
    node['label_names'] = np.concatenate((shared_labels,label_indices_per_node[i]))
  else:
    node['y_train'] = y_train[np.ix_(sample_indices_per_node[i],label_indices_per_node[i])]
    node['label_names'] = label_indices_per_node[i]

  node['classifier'] = create_mlp_model(node['X_train'].shape[1], node['y_train'].shape[1])
  nodes.append(node)






In [89]:
# Federated Learning Experiment

# Initialize the global model
global_model = create_mlp_model(allEmbeddings.shape[1], labels.shape[1])
max_label = labels.shape[1]-1
perf = global_model.evaluate(X_test,y_test)
print("performance of the global model before training: AUC=",perf[1], "Loss=", perf[0])
### Perform federated learning
# Iteratively train all models for N epochs and merge them using surgical aggregation
num_epochs = 1
num_rounds = 10

for round in range(num_rounds):
  for node in nodes:
    node['classifier'].fit(node['X_train'], node['y_train'], epochs=num_epochs, verbose=1)

  print (round)
  # surgical aggregation FL
  global_model, nodes = surgical_aggregation(nodes, max_label, global_model)

perf = global_model.evaluate(X_test,y_test)
print("performance of the global model after training: AUC=",perf[1], "Loss=", perf[0])

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - auc: 0.3674 - loss: 0.6941
performance of the global model before training: AUC= 0.3652147650718689 Loss= 0.6941906213760376
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 74ms/step - auc: 0.5249 - loss: 0.4517
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 72ms/step - auc: 0.6365 - loss: 0.4358
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 60ms/step - auc: 0.6888 - loss: 0.4512
0
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - auc: 0.6583 - loss: 0.2916
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - auc: 0.8596 - loss: 0.2534
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - auc: 0.8590 - loss: 0.2912
1
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - auc: 0.7951 - loss: 0.2203
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - auc

In [91]:
# Let's get the metrics for each label

import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, recall_score, roc_curve

# Run the global model on the test set
y_test_predict = global_model.predict(X_test)

# Create an empty dataframe to store all the values
df_metrics = {'Disease Label':[], 'Optimal Threshold':[],'Accuracy':[], 'Sensitivity':[], 'Specificity':[], 'AUC':[]}

# Extract the probability of the disease we are interested in
for disease_label in range(labels.shape[1]):
  #print(f"Disease Label: {label_columns[disease_label]}")

  y_prob = y_test_predict[:, disease_label]  # Assuming second column is the probability for class 1
  y_true = y_test[:, disease_label]

  # Calculate the ROC curve to find optimal threshold
  fpr, tpr, thresholds = roc_curve(y_true, y_prob)

  # Calculate Youden's J statistic for each threshold
  J = tpr - fpr
  optimal_idx = np.argmax(J)
  optimal_threshold = thresholds[optimal_idx]

  #print(f"Optimal Threshold: {optimal_threshold:.4f}")


  # Apply the optimal threshold to get binary predictions
  y_pred_optimal = (y_prob >= optimal_threshold).astype(int)

  # Accuracy
  accuracy = accuracy_score(y_true, y_pred_optimal)
  #print(f"Accuracy: {accuracy:.4f}")

  # Sensitivity (Recall for the positive class)
  sensitivity = recall_score(y_true, y_pred_optimal, pos_label=1)
  #print(f"Sensitivity: {sensitivity:.4f}")

  # Specificity: True Negative Rate (specificity = TN / (TN + FP))
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred_optimal).ravel()
  specificity = tn / (tn + fp)
  #print(f"Specificity: {specificity:.4f}")

  # AUC (Area Under the ROC Curve)
  auc = roc_auc_score(y_true, y_prob)
  #print(f"AUC: {auc:.4f}")

  df_metrics['Disease Label'].append(label_columns[disease_label])
  df_metrics['Optimal Threshold'].append(optimal_threshold)
  df_metrics['Accuracy'].append(accuracy)
  df_metrics['Sensitivity'].append(sensitivity)
  df_metrics['Specificity'].append(specificity)
  df_metrics['AUC'].append(auc)




df_metrics = pd.DataFrame(df_metrics)
print(df_metrics)
label_names = [node['label_names'] for node in nodes]
print(label_names)



[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
        Disease Label  Optimal Threshold  Accuracy  Sensitivity  Specificity  \
0   Enlarged Cardiom.           0.080823  0.877217     0.835443     0.882263   
1        Cardiomegaly           0.074099  0.894952     0.843750     0.899851   
2         Lung Lesion           0.251856  0.911323     0.902222     0.915354   
3        Lung Opacity           0.184259  0.918145     0.954338     0.902724   
4               Edema           0.017434  0.915416     1.000000     0.914246   
5       Consolidation           0.001177  0.683492     1.000000     0.681756   
6           Pneumonia           0.001287  0.667121     1.000000     0.663448   
7         Atelectasis           0.200091  0.912688     0.916667     0.912409   
8        Pneumothorax           0.019186  0.923602     1.000000     0.922971   
9    Pleural Effusion           0.011981  0.769441     1.000000     0.757880   
10      Pleural Other           0.043733  0.825