In [3]:
import torch
import torchvision
import os
import numpy as np
from pathlib import Path
from PIL import Image
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 데이터셋 경로
data_dir = "input/segmented_chest_xray"
phases = ['train', 'val', 'test']

# 이미지를 텐서로 변환하는 변환 정의
transform = transforms.Compose([
    transforms.Resize((1024, 1024)),
    transforms.ToTensor()
])

# 데이터셋 로드
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), transform) for x in phases}

# 배치 크기를 늘려 데이터 로더 정의
batch_size = 16
dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True) for x in phases}

def calculate_non_zero_mean_std(dataloader, device):
    non_zero_sum = 0.0
    non_zero_count = 0
    non_zero_sq_sum = 0.0

    for images, _ in dataloader:
        images = images.to(device)
        images = images.view(images.size(0), -1)

        # 0이 아닌 픽셀 선택
        non_zero_mask = images > 0
        non_zero_pixels = images[non_zero_mask]

        if non_zero_pixels.numel() == 0:  # 0이 아닌 픽셀이 없는 경우
            continue

        non_zero_sum += non_zero_pixels.sum().item()
        non_zero_count += non_zero_pixels.numel()
        non_zero_sq_sum += (non_zero_pixels ** 2).sum().item()

        # 중간 결과 출력
        if non_zero_count > 1e6:  # 100만 개의 픽셀을 처리할 때마다
            intermediate_mean = non_zero_sum / non_zero_count
            intermediate_std = ((non_zero_sq_sum / non_zero_count) - intermediate_mean ** 2) ** 0.5
            print(f"Intermediate Mean: {intermediate_mean}, Intermediate Std: {intermediate_std}")

    if non_zero_count == 0:  # 0이 아닌 픽셀이 전혀 없는 경우
        return float('nan'), float('nan')

    mean = non_zero_sum / non_zero_count
    std = ((non_zero_sq_sum / non_zero_count) - mean ** 2) ** 0.5
    return mean, std

# GPU 장치 설정
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

mean, std = calculate_non_zero_mean_std(dataloaders['train'], device)
print(f"Mean: {mean}")
print(f"Std: {std}")


Intermediate Mean: 0.4381377257502554, Intermediate Std: 0.1741469354330572
Intermediate Mean: 0.44029111638073426, Intermediate Std: 0.1711763092106223
Intermediate Mean: 0.44374453692267846, Intermediate Std: 0.16534963303639882
Intermediate Mean: 0.446653725309989, Intermediate Std: 0.16704042995570387
Intermediate Mean: 0.44925216102414567, Intermediate Std: 0.1693197412654415
Intermediate Mean: 0.45160420040539806, Intermediate Std: 0.1690010244405088
Intermediate Mean: 0.453649818965771, Intermediate Std: 0.1686089477598742
Intermediate Mean: 0.4565148890199603, Intermediate Std: 0.16803475776868337
Intermediate Mean: 0.4578548257681673, Intermediate Std: 0.1686843470742016
Intermediate Mean: 0.45557129406160757, Intermediate Std: 0.17014769091499712
Intermediate Mean: 0.4578216377435688, Intermediate Std: 0.17029504337491744
Intermediate Mean: 0.45706153386503345, Intermediate Std: 0.16938884857956593
Intermediate Mean: 0.4564272040734348, Intermediate Std: 0.1684556455368441
In