In [None]:
pip install tensorflow_federated

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# Check if GPU is available
device = '/GPU:0' if tf.config.experimental.list_physical_devices('GPU') else '/CPU:0'

# Define paths
HAM_PATH_PART1 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1"
HAM_PATH_PART2 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2"
METADATA_PATH = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv"

def load_data_ham(path, metadata_path):
    print("Loading data from:", path)
    data = []
    metadata = pd.read_csv(metadata_path)
    for img_file in os.listdir(path):
        img_id = img_file.split('.')[0]
        metadata_row = metadata[metadata['image_id'] == img_id]
        if not metadata_row.empty:
            lesion_type = metadata_row.iloc[0]['dx']
            img_path = os.path.join(path, img_file)
            img_resize = process_image(img_path)
            data.append([img_resize, lesion_type])
    return data

def process_image(img_path, target_size=(64, 192)):
    img_array = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img_resize = cv2.resize(img_array, target_size)
    img_normalize = img_resize.astype(np.float32) / 255.0
    return img_normalize

ham_data_part1 = load_data_ham(HAM_PATH_PART1, METADATA_PATH)
ham_data_part2 = load_data_ham(HAM_PATH_PART2, METADATA_PATH)

all_ham_data = ham_data_part1 + ham_data_part2
column_names = ['image_data', 'skin_disease']
ham_df = pd.DataFrame(all_ham_data, columns=column_names)
print("Structure of the merged HAM dataset:")
print(ham_df.head())

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the 'skin_disease' column
ham_df['skin_disease'] = label_encoder.fit_transform(ham_df['skin_disease'])

# Print the encoded DataFrame
print(ham_df.head())
df=pd.DataFrame(ham_df)

In [None]:
import random

num_client = 5

df["client"] = ["client_{}".format(random.randint(1, num_client)) for _ in range(df.shape[0])]

In [None]:
client_id_colname = 'client'

client_ids = df[client_id_colname].unique()

train_client_ids = pd.DataFrame(client_ids).sample(frac=0.8).values.ravel().tolist()
test_client_ids = [x for x in client_ids if x not in train_client_ids]

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [None]:
features ="image_data"

In [None]:
from collections import OrderedDict
import tensorflow as tf
import numpy as np

NUM_EPOCHS = 1
SHUFFLE_BUFFER = 100

def create_tf_dataset_for_client_fn(client_id):
    client_data = dataframe[dataframe[client_id_colname] == client_id]
    client_data_dict = OrderedDict()
    client_data_dict["image_data"] = np.array(client_data['image_data'].values.tolist(), dtype="float32")
    client_data_dict["skin_disease"] = np.array(client_data['skin_disease'].values.tolist(), dtype="int32")

    dataset = tf.data.Dataset.from_tensor_slices(client_data_dict)
    dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(1).repeat(NUM_EPOCHS)
    return dataset

In [None]:
import tensorflow_federated as tff

In [None]:
dataframe = train_df
train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=train_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

dataframe = test_df
test_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=test_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

In [None]:
train_data.element_type_structure

In [None]:
import collections

NUM_EPOCHS = 1
BATCH_SIZE = 10
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):
    def batch_format_fn(element):
        return collections.OrderedDict(x=tf.reshape(element['image_data'], [-1,64,64,3]),
                                       y=tf.reshape(element['skin_disease'], [-1, 1]))

    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [None]:
from tqdm import tqdm

def make_federated_data(client_data, client_ids):
    return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in tqdm(client_ids)]

In [None]:
NUM_CLIENTS = len(np.unique(train_df[client_id_colname]))

sample_clients = train_data.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(train_data, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

In [None]:
def create_keras_model(input_shape=(32, 32, 3), num_classes=7):
    base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False

    inputs = tf.keras.Input(shape=input_shape)
    x = tf.keras.applications.resnet.preprocess_input(inputs)
    x = base_model(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs, outputs)
    return model

In [None]:
example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])

example_element = next(iter(example_dataset))

example_element['skin_disease'].numpy()

In [None]:
preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

In [None]:
preprocessed_example_dataset.element_spec

In [None]:
from keras.metrics import Recall, Precision

def model_fn():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=preprocessed_example_dataset.element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001),
    proximal_strength=0.1)

In [None]:
str(iterative_process.initialize.type_signature)

In [None]:
state = iterative_process.initialize()

In [None]:
NUM_ROUNDS = 10
for round_num in range(0, NUM_ROUNDS):
  result = iterative_process.next(state, federated_train_data)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

In [None]:
evaluation_process = tff.learning.algorithms.build_fed_eval(model_fn)

In [None]:
evaluation_state = evaluation_process.initialize()
model_weights = iterative_process.get_model_weights(train_state)
evaluation_state = evaluation_process.set_model_weights(evaluation_state, model_weights)

In [None]:
federated_test_data = make_federated_data(test_data, test_client_ids)

len(federated_test_data), federated_test_data[0]

In [None]:
evaluation_output = evaluation_process.next(evaluation_state, federated_test_data)

In [None]:
str(evaluation_output.metrics)