In [None]:
import os
from PIL import Image
import numpy as np
import pandas as pd

def get_std(df, pixel_counts, mean):
        # 4) per-image E[X²] = mean_i^2 + std_i^2
        per_image_ex2 = df['mean']**2 + df['std']**2

        # 5) 전체 E[X²] 계산 (픽셀 수 가중)
        dataset_ex2 = (per_image_ex2 * pixel_counts).sum() / pixel_counts.sum()

        # 6) 전체 분산
        dataset_var = dataset_ex2 - mean**2

        # 7) 전체 표준편차(std)
        dataset_std = np.sqrt(dataset_var)

        return dataset_std

def get_describe(imgs_dir_path, fname_list):
        records = []
        for fname in fname_list:
            img_path = os.path.join(imgs_dir_path, fname)

            img = Image.open(img_path).convert('L')

            arr = np.array(img, dtype=float) / 255
            mean = float(arr.mean())
            std = float(arr.std())
            width, height = img.size

            records.append({
                'width': width,
                'height': height,
                'mean': mean,
                'std': std
            })
            
        return records

def save_imgs_df(cfg):
    normal_path = f'{cfg.data.train_dir}/NORMAL'
    pneu_path = f'{cfg.data.train_dir}/PNEUMONIA'
    normal_images_list = get_image_list(normal_path)
    pneu_images_list = get_image_list(pneu_path)

    
    normal_records = get_describe(normal_path, normal_images_list)
    pneu_records = get_describe(pneu_path, pneu_images_list)
    total_records = normal_records + pneu_records

    df = pd.DataFrame(total_records)
    df.to_csv(f'{cfg.data.root}/meta.csv', index=False)


def get_image_list(dir_path):
    image_list = []
    for f in os.listdir(dir_path):
        name = f.lower()
        if(name.endswith('.jpg') or name.endswith('.png') or name.endswith('.jpeg')):
            image_list.append(f)
    return image_list