In [1]:
import numpy as np
from medmnist import PneumoniaMNIST
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import losses

from keras.layers import Input, Conv2D, MaxPooling2D , UpSampling2D, add, Input
from keras.models import Model
from keras import regularizers

from skimage.io import imshow
import neptune
# run = neptune.init_run(project='momkeybomkey/Federated',
#                        api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJjNDdlN2ZhNy00ZmJmLTQ4YjMtYTk0YS1lNmViZmZjZWRhNzUifQ==')

In [14]:
import random

def _collate_fn(data):
        xs = []

        for x, _ in data:
            x = np.array(x).astype(np.float32) / 255.
            xs.append(x)

        return np.array(xs)

def shuffle_iterator(iterator):
    # iterator should have limited size
    index = list(iterator)
    total_size = len(index)
    i = 0
    random.shuffle(index)
    result = []

    while len(result) < total_size:
        result.append(index[i])
        i += 1
        
        if i >= total_size:
            i = 0
            random.shuffle(index)

    return result

def get_loader(dataset, random):
    total_size = len(dataset)
    print('Size', total_size)
    if random:
        index_generator = shuffle_iterator(range(total_size))
    else:
        index_generator = list(range(total_size))

    while True:
        data = []

        for _ in range(len(index_generator)):
            idx = index_generator.pop()
            data.append(dataset[idx])

        return _collate_fn(data)

In [15]:
train = PneumoniaMNIST(split="train", download=True, size=128)
test = PneumoniaMNIST(split="test", download=True, size=128)
val = PneumoniaMNIST(split="val", download=True, size=128)

train_load = np.array(get_loader(train, True))
test_load = np.array(get_loader(test, True))
val_load = np.array(get_loader(val, False))

val_noisy = (np.array(get_loader(val, True)) + 0.02 * tf.random.normal(val_load.shape))[:10]

Using downloaded and verified file: C:\Users\malth\.medmnist\pneumoniamnist_128.npz
Using downloaded and verified file: C:\Users\malth\.medmnist\pneumoniamnist_128.npz
Using downloaded and verified file: C:\Users\malth\.medmnist\pneumoniamnist_128.npz
Size 4708
Size 624
Size 524
Size 524


In [2]:
help(Conv2D)

Help on class Conv2D in module keras.src.layers.convolutional.conv2d:

class Conv2D(keras.src.layers.convolutional.base_conv.BaseConv)
 |  Conv2D(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), groups=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs)
 |
 |  2D convolution layer.
 |
 |  This layer creates a convolution kernel that is convolved with the layer
 |  input over a 2D spatial (or temporal) dimension (height and width) to
 |  produce a tensor of outputs. If `use_bias` is True, a bias vector is created
 |  and added to the outputs. Finally, if `activation` is not `None`, it is
 |  applied to the outputs as well.
 |
 |  Args:
 |      filters: int, the dimension of the output space (the number of filters
 |          in the convolution).
 |      kernel_size: i

In [16]:
def setup_model():
    n = 128
    chan = 1
    input_img = Input(shape=(n, n, chan))

    l1 = Conv2D(32, (3, 3), padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10))(input_img)
    l2 = Conv2D(32, (3, 3), padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10))(l1)
    l3 = MaxPooling2D(padding='same')(l2)

    l4 = Conv2D(64, (3, 3),  padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10))(l3)
    l5 = Conv2D(64, (3, 3), padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10))(l4)
    l6 = MaxPooling2D(padding='same')(l5)

    # Decoder

    l7 = UpSampling2D()(l6)
    l8 = Conv2D(64, (3, 3), padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10),)(l7)
    l9 = Conv2D(64, (3, 3), padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10))(l8)
    l10 = add([l5, l9])

    l11 = UpSampling2D()(l10)
    l12 = Conv2D(32, (3, 3), padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10))(l11)
    l13 = Conv2D(32, (3, 3), padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10))(l12)
    l14 = add([l13, l2])

    # chan = 3, for RGB
    decoded = Conv2D(chan, (3, 3), padding='same', activation='relu', activity_regularizer=regularizers.l1(10e-10))(l14)

    return Model(input_img, decoded)

In [17]:
def generate_noise(data, noise_factor = 0.2):
    return data + noise_factor * tf.random.normal(shape=data.shape)

In [18]:
def mse(orig, res):
    return ((orig - res) ** 2).mean()

In [19]:
def split_data(data, n, noise_factor = 0.2):
    m = len(data)
    clean_split = []
    
    for i in range(n):
        clean_split.append(data[m * i // n: m * (i + 1) // n])

    noisy_split = [generate_noise(x, noise_factor) for x in clean_split]
    
    return clean_split, noisy_split

In [None]:
def neptune_log(epoch, autoencoder):
    print("Evaluation")
    run["evaluation/mse"].append(autoencoder.evaluate(val_noisy, val_load[:10]))
    run[f"images/reconstructed_{epoch + 1}"].upload(neptune.types.File.as_image(autoencoder.predict(val_noisy)[0]))
    print("")

In [21]:
def neptune_val_images(autoencoder):
    print("Final evaluation")
    decoded_imgs = autoencoder.predict(val_noisy)
    
    for i in range(1, 6):
        run[f"validation/original_{i}"].upload(neptune.types.File.as_image(val_load[i]))
        run[f"validation/noisy_{i}"].upload(neptune.types.File.as_image(val_noisy[i]))
        run[f"validation/reconstructed_{i}"].upload(neptune.types.File.as_image(decoded_imgs[i]))

In [None]:
# Limited data
# train_load = split_data(train_load, 5)
# test_load = split_data(test_load, 5)

# train_load = train_load[0][0]
# test_load = test_load[0][0]

In [22]:
def run_model(n, epochs):
    # Make overall model
    autoencoder = setup_model()
    autoencoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=losses.MeanSquaredError())

    # try:
    #     autoencoder.load_model("./Checkpoints/model.weights.h5")
    # except:
    #     pass

    # neptune_log(-1, autoencoder)
    run["images/original"].upload(neptune.types.File.as_image(val_load[0]))
    run["images/noisy"].upload(neptune.types.File.as_image(val_noisy[0]))

    # Make n models
    models = [setup_model() for _ in range(n)]
    for model in models:
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=losses.MeanSquaredError())
    

    # Split data
    train, train_noisy = split_data(train_load, n)
    test, test_noisy = split_data(test_load, n)

    # Get central weights
    primary_weights = autoencoder.get_weights()
    for model in models:
        model.set_weights(primary_weights)
    
    # Train the networks
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}")

        for i, model in enumerate(models):
            print(f"Encoder {i + 1}")
            model.fit(train_noisy[i], train[i], batch_size=32, epochs=1, shuffle=True, validation_data=(test_noisy[i], test[i]))
            run[f"evaluation/encoder_{i + 1}/mse"].append(model.evaluate(test_noisy[i], test[i]))

        weights = [model.get_weights() for model in models]
        weight_update = [0 for _ in primary_weights]

        for weight in weights:
            weight_update = [wu + (w - w0) / n for wu, w, w0 in zip(weight_update, weight, primary_weights)]

        primary_weights = [w0 + wu for w0, wu in zip(primary_weights, weight_update)]

        for model in models:
            model.set_weights(primary_weights)

        print(f"Epoch {epoch + 1} completed")
        print("")
        
        autoencoder.set_weights(primary_weights)
        neptune_log(epoch, autoencoder)
    
    neptune_val_images(autoencoder)

    autoencoder.save_weights("./Checkpoints/model_full.weights.h5")

    return autoencoder

In [23]:
autoencoder = run_model(n = 1, epochs = 200)

Epoch 1
Encoder 1
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 1s/step - loss: 0.0248 - val_loss: 0.0058
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 325ms/step - loss: 0.0059
Epoch 1 completed

Evaluation


ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 10
'y' sizes: 524


In [None]:
run.stop()