### Data Agumentation

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define data augmentation strategy
augmentation = ImageDataGenerator(
    shear_range=0.2,
    zoom_range=0.2,
    rotation_range=90,
    width_shift_range=0.1,
    height_shift_range=0.1,
    channel_shift_range=0.1, # extra
    vertical_flip=True,
    horizontal_flip=True,
    fill_mode="nearest"
)


In [None]:
import os
from tensorflow.keras.preprocessing.image import img_to_array, load_img, array_to_img
import numpy as np

def augment_class_images(input_dir, target_dir, target_count, augmentation):
    current_count = len(os.listdir(input_dir))
    n_to_generate = target_count - current_count

    # If current count is already sufficient, no augmentation is needed
    if n_to_generate <= 0:
        print(f"No augmentation needed for {input_dir}")
        return

    img_files = os.listdir(input_dir)
    i = 0

    while i < n_to_generate:
        img_file = img_files[i % current_count]
        img_path = os.path.join(input_dir, img_file)

        # Load and augment image
        image = load_img(img_path)
        image_array = img_to_array(image)
        image_array = np.expand_dims(image_array, axis=0)

        # Generate augmented images
        for batch in augmentation.flow(image_array, batch_size=1):
            new_img = array_to_img(batch[0], scale=True)
            new_img.save(os.path.join(target_dir, f"aug_{i}.jpg"))
            i += 1
            if i >= n_to_generate:
                break


### Augment Training data

In [None]:
# Replace the path of input and outdirectories
# I use 3 seperate parts becuase in each client 1 class have sufficient number of images
# Remaining 3 classes data is imabalenced. So use the path of those imbalaced data
augment_class_images(
    input_dir="path",
    target_dir="path",
    target_count=1157,
    augmentation=augmentation
)

augment_class_images(
    input_dir="path",
    target_dir="path",
    target_count=1157,
    augmentation=augmentation
)

augment_class_images(
    input_dir="path",
    target_dir="path",
    target_count=1157,
    augmentation=augmentation
)

### Augment Testing data

In [None]:
# Replace the path of input and outdirectories
# I use 3 seperate parts becuase in each client 1 class have sufficient number of images
# Remaining 3 classes data is imabalenced. So use the path of those imbalaced data
augment_class_images(
    input_dir="path",
    target_dir="path",
    target_count=150,
    augmentation=augmentation
)

augment_class_images(
    input_dir="path",
    target_dir="path",
    target_count=150,
    augmentation=augmentation
)

augment_class_images(
    input_dir="path",
    target_dir="path",
    target_count=150,
    augmentation=augmentation
)

### plot the data distribution of Augmented data

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Categories (Tumor types)
categories = ["Glioma", "Meningioma", "No Tumor", "Pituitary"]
x = np.arange(len(categories))  # X positions

# Augmented_Data for each model
model_1_train = [1021, 1039, 1295, 1157]
model_1_test = [210, 210, 255, 150]

model_2_train = [1021, 1039, 1295, 1157]
model_2_test = [210, 210, 255, 150]

model_3_train = [1021, 1039, 1295, 1157]
model_3_test = [210, 210, 255, 150]

model_4_train = [1021, 1039, 1295, 1157]
model_4_test = [210, 210, 255, 150]


# Bar width
bar_width = 0.2

# Creating the plot
fig, ax = plt.subplots(figsize=(16, 10))

# Plot bars for Model 1
ax.bar(x - 1.5 * bar_width, model_1_train, bar_width, label="Model 1 - Training", color='#1f77b4')
ax.bar(x - 1.5 * bar_width, model_1_test, bar_width, bottom=model_1_train, label="Model 1 - Testing", color='#aec7e8')

# Plot bars for Model 2
ax.bar(x - 0.5 * bar_width, model_2_train, bar_width, label="Model 2 - Training", color='#9467bd')
ax.bar(x - 0.5 * bar_width, model_2_test, bar_width, bottom=model_2_train, label="Model 2 - Testing", color='#c5b0d5')

# Plot bars for Model 3
ax.bar(x + 0.5 * bar_width, model_3_train, bar_width, label="Model 3 - Training", color='#2ca02c')
ax.bar(x + 0.5 * bar_width, model_3_test, bar_width, bottom=model_3_train, label="Model 3 - Testing", color='#98df8a')

# Plot bars for Model 4
ax.bar(x + 1.5 * bar_width, model_4_train, bar_width, label="Model 4 - Training", color='#d62728')
ax.bar(x + 1.5 * bar_width, model_4_test, bar_width, bottom=model_4_train, label="Model 4 - Testing", color='#ff9896')

# Labels and title with increased font size
ax.set_xlabel("Tumor Type", fontsize=20)
ax.set_ylabel("Number of Samples", fontsize=20)
ax.set_title("Combined Sample Distribution for Models 1, 2, 3, and 4", fontsize=24)
ax.set_xticks(x)
ax.set_xticklabels(categories, fontsize=19)
#ax.set_yticks(y)
ax.set_yticklabels([0, 200, 400, 600, 800, 1000, 1200 ,1400 ,1600],fontsize=19)

# Adjust legend to be at the bottom center
ax.legend(
    loc='upper center',
    bbox_to_anchor=(0.5, -0.25),  # Place it outside the plot area
    ncol=4,  # Number of columns for legend
    fontsize=20,  # Adjust font size for legend items
    title="Training and Testing Models",
    title_fontsize=21 # Font size for legend title
)

# Adjust layout to ensure the legend fits within the figure
plt.tight_layout()

# Show the plot
plt.show()

# Save the figure as a high-resolution PDF
fig.savefig("IID_DATA_REPRESENTATION.pdf", format="pdf", dpi=900)

