<a href="https://colab.research.google.com/github/Nuzhattttt/braintumor/blob/main/Preprocessing_and_FederatedLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [4]:
train_directory = '/content/gdrive/MyDrive/Amar dataset/dataset/Training'
test_directory = '/content/gdrive/MyDrive/Amar dataset/dataset/Testing'

In [5]:
image_size = (128, 128)
train_images = []
train_labels = []
test_images = []
test_labels = []

In [6]:
def load_and_preprocess_images(directory, images_list, labels_list):
    for class_name in os.listdir(directory):
        class_directory = os.path.join(directory, class_name)

        for filename in os.listdir(class_directory):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                image_path = os.path.join(class_directory, filename)
                image = Image.open(image_path).convert("RGB")
                image = image.resize(image_size, Image.LANCZOS)
                images_list.append(image)
                labels_list.append(class_name)


In [7]:
import numpy as np
import random
import os
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K
def create_clients(data_list, label_list, num_clients=2, initial='clients'):
    client_names = ['{}_{}'.format(initial, i+1) for i in range(num_clients)]
    data = list(zip(data_list, label_list))
    random.shuffle(data)
    size = len(data)//num_clients
    shards = [data[i:i + size] for i in range(0, size*num_clients, size)]
    assert(len(shards) == len(client_names))
    return {client_names[i] : shards[i] for i in range(len(client_names))}
def batch_data(data_shard, bs=32):
    data, label = zip(*data_shard)
    dataset = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    return dataset.shuffle(len(label)).batch(bs)
class SimpleMLP:
    @staticmethod
    def build(shape, classes):
        model = Sequential()
        model.add(Dense(500,input_shape=(shape,)))
        model.add(Activation("relu"))
        model.add(Dense(300))
        model.add(Activation("relu"))
        model.add(Dense(200))
        model.add(Activation("relu"))
        model.add(Dense(classes))
        model.add(Activation("sigmoid"))
        return model
class VGG16:
    @staticmethod
    def build(shape, classes):
        model = Sequential()
        model.add(Conv2D(input_shape=shape,filters=32,kernel_size=(3,3),padding="same", activation="relu"))
        model.add(Conv2D(filters=32,kernel_size=(3,3),padding="same", activation="relu"))
        model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
        model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))

        model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
        model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
        model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
        model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))


        model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
        model.add(Flatten())
        model.add(Dense(units=4096,activation="relu"))
        model.add(Dense(units=4096,activation="relu"))
        model.add(Dense(units=classes, activation="softmax"))
        return model
def weight_scalling_factor(clients_trn_data, client_name):
    client_names = list(clients_trn_data.keys())
    bs = list(clients_trn_data[client_name])[0][0].shape[0]
    global_count = sum([tf.data.experimental.cardinality(clients_trn_data[client_name]).numpy() for client_name in client_names])*bs
    local_count = tf.data.experimental.cardinality(clients_trn_data[client_name]).numpy()*bs
    return local_count/global_count
def scale_model_weights(weight, scalar):
    weight_final = []
    steps = len(weight)
    for i in range(steps):
        weight_final.append(scalar * weight[i])
    return weight_final
def sum_scaled_weights(scaled_weight_list):
    avg_grad = list()
    for grad_list_tuple in zip(*scaled_weight_list):
        layer_mean = tf.math.reduce_sum(grad_list_tuple, axis=0)
        avg_grad.append(layer_mean)
    return avg_grad
def test_model(X_test, Y_test,  model, comm_round):
    cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    predictions = model.predict(X_test)
    loss = cce(Y_test, predictions)
    acc = accuracy_score(tf.argmax(predictions, axis=1), tf.argmax(Y_test, axis=1))
    print('comm_round: {} | global_acc: {:.3%} | global_loss: {}'.format(comm_round, acc, loss))
    return acc, loss

In [8]:
load_and_preprocess_images(train_directory, train_images, train_labels)

In [9]:
load_and_preprocess_images(test_directory, test_images, test_labels)

In [10]:
print(train_images)
print(train_labels)
print(test_images)
print(test_labels)

[<PIL.Image.Image image mode=RGB size=128x128 at 0x7C37211EE1A0>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720CD5750>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C37520287F0>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C37C0C20460>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D244F0>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720CF3310>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D24670>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D53160>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D532B0>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D533A0>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D53280>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D53340>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D53250>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D53310>, <PIL.Image.Image image mode=RGB size=128x128 at 0x7C3720D53670>, <PIL.Image.Image image m

In [None]:
###
# train_images = train_images[0:35] + train_images[1325:1350] + train_images[3000:3055] + train_images[4400:4455]
# train_labels = train_labels[0:35] + train_labels[1325:1350] + train_labels[3000:3055] + train_labels[4400:4455]

# test_images = test_images[425:470]
# test_labels = test_labels[425:470]

# print("X_train shape:", train_images.shape)
# print("y_train shape:", train_labels.shape)
# print("X_test shape:", test_images.shape)
# print("y_test shape:", test_labels.shape)


In [11]:
print(set(train_labels))
print(set(test_labels))

{'glioma', 'meningioma', 'notumor', 'pituitary'}
{'meningioma', 'notumor', 'glioma', 'pituitary'}


In [12]:
label_encoder = LabelEncoder()
train_encoded_labels = label_encoder.fit_transform(train_labels)
test_encoded_labels = label_encoder.transform(test_labels)

In [13]:
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [14]:
X_train = np.array([np.array(img) for img in train_images])
y_train = np.array(train_encoded_labels)

In [15]:
print(len(X_train))

5722


In [16]:
augmented_images = []
for image in X_train:
    augmented_image = datagen.random_transform(image)
    augmented_images.append(augmented_image)
X_train_augmented = np.array(augmented_images)

In [17]:
print(X_train_augmented.shape)

(5722, 128, 128, 3)


In [18]:
X_test = np.array([np.array(img) for img in test_images])
y_test = np.array(test_encoded_labels)


In [19]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


X_train shape: (5722, 128, 128, 3)
y_train shape: (5722,)
X_test shape: (1311, 128, 128, 3)
y_test shape: (1311,)


In [None]:
#print(X_train[0:125])


In [20]:
import numpy as np
import random
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K
labels = list(set(train_encoded_labels.tolist()))
#lb = LabelBinarizer()
#label_list = lb.fit_transform(label_list)
n_values = np.max(train_encoded_labels) + 1
train_encoded_labels = np.eye(n_values)[train_encoded_labels]
X_train, X_test, y_train, y_test = train_test_split(X_train,
                                                    train_encoded_labels,
                                                    test_size=0.1,
                                                    random_state=42)
clients = create_clients(X_train, y_train, num_clients=2, initial='client')


In [21]:
print(n_values)

4


In [22]:
print(y_test)

[[0. 0. 1. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 ...
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]]


In [23]:
clients_batched = dict()
for (client_name, data) in clients.items():
    clients_batched[client_name] = batch_data(data)
test_batched = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(len(y_test))
print(test_batched)

<_BatchDataset element_spec=(TensorSpec(shape=(None, 128, 128, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 4), dtype=tf.float64, name=None))>


In [24]:
print(clients_batched)

{'client_1': <_BatchDataset element_spec=(TensorSpec(shape=(None, 128, 128, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 4), dtype=tf.float64, name=None))>, 'client_2': <_BatchDataset element_spec=(TensorSpec(shape=(None, 128, 128, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 4), dtype=tf.float64, name=None))>}


In [25]:
len(labels)

4

In [26]:
from tensorflow.keras.optimizers.legacy import SGD
comms_round = 4
#create optimizer
lr = 0.01
loss='categorical_crossentropy'
metrics = ['accuracy']
# optimizer = Adam(learning_rate=lr,
#                 weight_decay=lr / comms_round,
#                 ema_momentum=0.9
#                )
optimizer = SGD(lr=lr,
                decay=lr / comms_round,
                momentum=0.9
               )


#initialize global model
#print(data_list.shape,labels)
# smlp_global = SimpleMLP()
# global_model = smlp_global.build(data_list[0].shape,len(labels))

smpl_vgg = VGG16()
global_model = smpl_vgg.build(X_train[0].shape,len(labels))

  super().__init__(name, **kwargs)


In [27]:
print(global_model.get_weights())

[array([[[[ 0.11345164, -0.12341982, -0.11334089,  0.02955613,
           0.05840789, -0.08360228,  0.05651131, -0.02052778,
          -0.0612593 , -0.03144493, -0.07382031, -0.06370775,
           0.00413634, -0.00014535, -0.12516253,  0.099554  ,
          -0.09838973,  0.11914329, -0.02948485,  0.1292804 ,
          -0.01576506,  0.04571989,  0.09977168, -0.12082287,
          -0.13191012, -0.04708726, -0.06158091,  0.02556726,
           0.0632433 ,  0.01044329,  0.02273922,  0.09539749],
         [-0.05347887, -0.08972397,  0.03687598,  0.0465111 ,
          -0.00778368,  0.10353741,  0.1076235 , -0.09994419,
           0.03397638, -0.08603316, -0.07725811, -0.06700841,
          -0.01147756,  0.1347623 ,  0.08042678,  0.092822  ,
           0.04482341,  0.03140251,  0.03873704, -0.08947988,
           0.12465276, -0.13285382,  0.1119321 , -0.05382128,
          -0.09781653, -0.02232344, -0.04913256, -0.11367498,
          -0.08752526,  0.01633438,  0.13428144, -0.10940751],
     

In [28]:
for comm_round in range(comms_round):
    global_weights = global_model.get_weights()
    scaled_local_weight_list = list()
    client_names= list(clients_batched.keys())
    random.shuffle(client_names)
    for client in tqdm(client_names , desc = 'Progress Bar'):
        #time.sleep(0.5)
        smpl_local = VGG16()
        local_model = smpl_vgg.build(X_train[0].shape,len(labels))
        smlp_local = SimpleMLP()
        local_model.compile(loss=loss,
                      optimizer=optimizer,
                      metrics=metrics)
        #print(local_model.summary())
        #print(clients_batched)
        local_model.set_weights(global_weights)
        local_model.fit(clients_batched[client], epochs=1, verbose=0)
        scaling_factor = weight_scalling_factor(clients_batched, client)
        scaled_weights = scale_model_weights(local_model.get_weights(), scaling_factor)
        scaled_local_weight_list.append(scaled_weights)
        K.clear_session()
    average_weights = sum_scaled_weights(scaled_local_weight_list)
    global_model.set_weights(average_weights)
    for(X_test, Y_test) in test_batched:
        global_acc, global_loss = test_model(X_test, Y_test, global_model, comm_round)
        batch_size=32
SGD_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(y_train)).batch(batch_size)
smlp_SGD = VGG16()
# SGD_model = smlp_SGD.build(X_train[0].shape,len(labels))
n_classes_for_testing = 4#min(n_classes, 2)
SGD_model = smpl_vgg.build(X_train[0].shape, n_classes_for_testing)
SGD_model.compile(loss=loss,
              optimizer=optimizer,
              metrics=metrics)
_ = SGD_model.fit(SGD_dataset, epochs=50, verbose=0)
for(X_test, Y_test) in test_batched:
        SGD_acc, SGD_loss = test_model(X_test, Y_test, SGD_model, 1)
# from tensorflow.keras.mixed_precision import experimental as mixed_precision
# policy = mixed_precision.Policy('mixed_float16')
# mixed_precision.set_policy(policy)


Progress Bar: 100%|██████████| 2/2 [00:37<00:00, 18.56s/it]


comm_round: 0 | global_acc: 25.480% | global_loss: 1.3865903615951538


Progress Bar: 100%|██████████| 2/2 [00:22<00:00, 11.13s/it]


comm_round: 1 | global_acc: 25.480% | global_loss: 1.387127161026001


Progress Bar: 100%|██████████| 2/2 [00:22<00:00, 11.05s/it]


comm_round: 2 | global_acc: 58.115% | global_loss: 1.1993025541305542


Progress Bar: 100%|██████████| 2/2 [00:22<00:00, 11.34s/it]


comm_round: 3 | global_acc: 72.949% | global_loss: 1.0758099555969238
comm_round: 1 | global_acc: 94.066% | global_loss: 0.8020989894866943
