# Set up

In [None]:
import tensorflow as tf
import tensorflow.keras as tfk
import numpy as np
import random

import augmentation as augh
import plotter as plot
import folding as fold

print("Libraries have been imported")

# Test set split

In [None]:
data_set = np.load("/kaggle/input/bloodcells-filtered/dataset_filtered.npz")
test_set, free_set = fold.split_set(data_set, count=150, shuffle=False)

print(f"Data set: {data_set['images'].dtype}{data_set['images'].shape} - {data_set['labels'].dtype}{data_set['labels'].shape}")
print(f"Test set: {test_set['images'].dtype}{test_set['images'].shape} - {test_set['labels'].dtype}{test_set['labels'].shape}")
print(f"Free set: {free_set['images'].dtype}{free_set['images'].shape} - {free_set['labels'].dtype}{free_set['labels'].shape}")

np.savez("dataset_test.npz", **test_set)
np.savez("dataset_free.npz", **free_set)

# Free dataset import

In [None]:
data = np.load("/kaggle/input/bloodcells-free/dataset_free.npz")
# data = np.load("/kaggle/input/bloodcells-filtered/dataset_filtered.npz")

images = data['images']
labels = data['labels']
print(f"images: {images.dtype}{images.shape}")
print(f"labels: {labels.dtype}{labels.shape}")

# Example augmentation

In [None]:
image = random.choice(images)
augmented_image = augh.augment(image)

print(f"input image: {image.dtype}{image.shape}")
print(f"output image: {augmented_image.dtype}{augmented_image.shape}")
print(f"input image range: ({image.min()}, {image.max()})")
print(f"output image range: ({augmented_image.min()}, {augmented_image.max()})")

plot.plot_image(image, augmented_image)

# Augment dataset

In [None]:
augmented_data = augh.augment_set(data, top=5000)

# Export dataset

In [None]:
np.savez("dataset_evaluation.npz", **augmented_data)
# np.savez("dataset_maximisation.npz", **augmented_data)

# Show dataset distribution

In [None]:
augmented_images = augmented_data['images']
augmented_labels = augmented_data['labels']

print(f"original dataset images: {images.dtype}{images.shape}")
print(f"augmented dataset images: {augmented_images.dtype}{augmented_images.shape}")
print(f"original dataset labels: {labels.dtype}{labels.shape}")
print(f"augmented dataset labels: {augmented_labels.dtype}{augmented_labels.shape}")

plot.plot_distribution(data['labels'], augmented_data['labels'])

# Print some images

In [None]:
plot.print_example_images(augmented_images, augmented_labels, row=4)

# Split dataset

In [None]:
validation, training = fold.split_set(augmented_data)

# Show datasets distributions after split

In [None]:
val_images = validation['images']
val_labels = validation['labels']
train_images = training['images']
train_labels = training['labels']

print(f"original dataset images: {images.dtype}{images.shape}")
print(f"validation dataset images: {val_images.dtype}{val_images.shape}")
print(f"training dataset images: {train_images.dtype}{train_images.shape}")
print(f"original dataset labels: {labels.dtype}{labels.shape}")
print(f"validation dataset labels: {val_labels.dtype}{val_labels.shape}")
print(f"training dataset labels: {train_labels.dtype}{train_labels.shape}")

plot.plot_distribution(data['labels'], augmented_data['labels'], validation['labels'], training['labels'])

# Print example images after split

In [None]:
print("VALIDATION")
plot.print_example_images(validation['images'], validation['labels'])
print("TRAINING")
plot.print_example_images(training['images'], training['labels'])