In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# [SETTING WARNINGS]
import warnings
warnings.simplefilter(action='ignore', category=Warning)

# import modules and components
from FEXT.commons.utils.validation.images import ImageAnalysis
from FEXT.commons.utils.dataloader.serializer import DataSerializer
from FEXT.commons.utils.process.splitting import TrainValidationSplit
from FEXT.commons.constants import CONFIG, IMG_DATA_PATH

Select whether to normalize the images within the [0, 1] range, and if you wish to use a fraction of the availbale data

In [None]:
normalize_images = True
data_fraction = 0.1

In [None]:
serializer = DataSerializer(CONFIG) 
images_paths = serializer.get_images_path(IMG_DATA_PATH, sample_size=data_fraction)

# Evaluate image dataset

---

### Calculate images statistics
For every image in the dataset, we compute essential statistics such as average brightness, spread of pixel values (median, standard deviation, minimum, and maximum), and the range of pixel intensities. Additionally, the level of noise is estimated by comparing the original image with a slightly blurred version. All the gathered metrics, along with the image identifiers, are compiled into a structured table, which is then saved as a CSV file.

In [None]:
analyzer = ImageAnalysis()
image_statistics = analyzer.calculate_image_statistics(images_paths)

### Pixel intensity

The pixel intensity distribution of the entire image dataset is evaluated, plotting the histogram with the mean value of pixel intensity for all images

In [None]:
analyzer.calculate_pixel_intensity(images_paths)      

## Compare train and validation datasets

---

In [None]:
splitter = TrainValidationSplit(images_paths, CONFIG)     
train_data, validation_data = splitter.split_train_and_validation()
print(f'Number of train samples: {len(train_data)}')
print(f'Number of validation samples: {len(validation_data)}')

### Pixel intensity

The pixel intensity distribution is now used to compare train and validation datasets, plotting the overlapping histograms with the mean value of pixel intensity for all images