### Computing mean and std of colors for the whole dataset (for normalization)

In [1]:
import os
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

In [10]:
from tqdm import tqdm

In [11]:
def calculate_mean_std(folder_path, target_size=(256, 256), batch_size=32):
    # Define transformations to resize images and convert them to tensors
    transform = transforms.Compose([
        transforms.Resize(target_size),
        transforms.ToTensor()
    ])

    # Create a dataset from the folder with the specified transformations
    dataset = ImageFolder(folder_path, transform=transform)

    # Create a data loader to iterate over the dataset
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    # Initialize variables to store the running sum of means and squared means
    channels_sum = torch.zeros(3)  # Assuming RGB images
    channels_squared_sum = torch.zeros(3)

    num_batches = 0

    # Iterate over the dataset to compute the sum of means and squared means
    for batch_images, _ in tqdm(data_loader):
        # Calculate the sum of pixel values along each channel
        channels_sum += torch.sum(batch_images, dim=[0, 2, 3])
        # Calculate the sum of squared pixel values along each channel
        channels_squared_sum += torch.sum(batch_images ** 2, dim=[0, 2, 3])
        num_batches += 1

    # Calculate the mean and standard deviation across all images
    mean = channels_sum / (len(dataset) * target_size[0] * target_size[1])
    std = torch.sqrt(
        (channels_squared_sum / (len(dataset) * target_size[0] * target_size[1])) - mean ** 2
    )

    return mean, std

# Example usage:
folder_path = "/home/bart_grabek/Documents/DL/DL-2024-Diffusion/data"
mean, std = calculate_mean_std(folder_path)
print("Mean:", mean)
print("Standard Deviation:", std)
