In [None]:
# Check Image Size

from PIL import Image
from torchvision import datasets

root_dirs = ['train', 'test'] 
classes = [
    'Chorionic_villi',
    'Decidual_tissue',
    'Hemorrhage',
    'Trophoblastic_tissue'
]

# {'train': {'first_size': (W, H), 'inconsistent_images': [...]}, 'test': {...}}
results = {}

for root_dir in root_dirs:
    results[root_dir] = {
        'first_size': None,
        'inconsistent_images': []
    }
    
    current_result = results[root_dir]
    
    print(f"\n{root_dir.upper()} directory check start")
    
    try:
        dataset = datasets.ImageFolder(root_dir, transform=None)
        image_paths_and_labels = dataset.samples

    except FileNotFoundError:
        print(f"  No '{root_dir}' directory.")
        continue
    except Exception as e:
        print(f"  Error loading data from '{root_dir}': {e}")
        continue
        
    if not image_paths_and_labels:
        print(f"  No images found in '{root_dir}'.")
        continue

    for image_path, _ in image_paths_and_labels:
        try:
            with Image.open(image_path) as img:
                current_size = img.size # (width, height)
                
                if current_result['first_size'] is None:
                    current_result['first_size'] = current_size
                
                elif current_size != current_result['first_size']:
                    current_result['inconsistent_images'].append({
                        'path': image_path,
                        'actual_size': current_size
                    })
                    
        except IOError:
            print(f"  Can't open '{image_path}'.")

for root_dir in root_dirs:
    result = results.get(root_dir)
    if not result:
        continue
        
    first_size = result['first_size']
    inconsistent_images = result['inconsistent_images']
    inconsistent_count = len(inconsistent_images)
    
    print(f"\n### {root_dir.upper()} directory")
    
    if first_size is None and inconsistent_count == 0:
        print("  [Error] No data.")
    elif inconsistent_count == 0:
        print(f"  Sizes of all data are {first_size}.")
    else:
        for item in inconsistent_images:
            print(f"  File: {item['path']} | Size: {item['actual_size']}")


TRAIN directory check start

TEST directory check start

### TRAIN directory
  File: train\Chorionic_villi\326.jpg | Size: (157, 224)
  File: train\Chorionic_villi\327.jpg | Size: (157, 224)
  File: train\Chorionic_villi\328.jpg | Size: (157, 224)
  File: train\Trophoblastic_tissue\427.jpg | Size: (205, 224)
  File: train\Trophoblastic_tissue\428.jpg | Size: (205, 224)
  File: train\Trophoblastic_tissue\429.jpg | Size: (205, 224)
  File: train\Trophoblastic_tissue\439.jpg | Size: (187, 224)
  File: train\Trophoblastic_tissue\440.jpg | Size: (187, 224)
  File: train\Trophoblastic_tissue\441.jpg | Size: (187, 224)
  File: train\Trophoblastic_tissue\47.jpg | Size: (191, 224)
  File: train\Trophoblastic_tissue\475.jpg | Size: (208, 224)
  File: train\Trophoblastic_tissue\476.jpg | Size: (208, 224)
  File: train\Trophoblastic_tissue\477.jpg | Size: (208, 224)
  File: train\Trophoblastic_tissue\48.jpg | Size: (191, 224)
  File: train\Trophoblastic_tissue\530.jpg | Size: (103, 224)
  File: t

In [None]:
# PCA for color augmentation

import numpy as np
import torch
from torchvision import datasets, transforms
from PIL import Image
from sklearn.decomposition import PCA

data_dir = 'train'

def calculate_pca_for_rgb(data_dir, img_size):
    
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor() # (0~1)
    ])
    
    try:
        dataset = datasets.ImageFolder(data_dir, transform=transform)
    except FileNotFoundError:
        print(f"  No \'{data_dir}\' directory.")
        return None, None
    
    rgb_pixels = []
    
    for i in range(len(dataset)):
        img_tensor, _ = dataset[i]
        
        # (C, H, W) -> (H*W, 3) (range: 0~255)
        img_np = (img_tensor.numpy() * 255).astype(np.uint8)
        pixels = img_np.transpose(1, 2, 0).reshape(-1, 3) 
        rgb_pixels.append(pixels)

    all_rgb_pixels = np.concatenate(rgb_pixels, axis=0)
    
    pca = PCA(n_components=3)
    pca.fit(all_rgb_pixels)
    
    eigenvectors = pca.components_
    eigenvalues = pca.explained_variance_

    eigenvalues_tensor = torch.from_numpy(eigenvalues).float()
    eigenvectors_tensor = torch.from_numpy(eigenvectors).float()
    
    return eigenvalues_tensor, eigenvectors_tensor

if __name__ == '__main__':
    
    # 224x224 (for resize)
    eigvals_224, eigvecs_224 = calculate_pca_for_rgb(data_dir, 224)
    
    # 256x256 (for crop)
    eigvals_256, eigvecs_256 = calculate_pca_for_rgb(data_dir, 256)


    if eigvals_224 is not None:
        print("\n--- A. 224x224 PCA value ---")
        print("POC_EIGVALS_224 = torch.tensor([", ', '.join(f"{v:.6f}" for v in eigvals_224), "])")
        print("POC_EIGVECS_224 = torch.tensor([")
        for i in range(3):
            print(f"    [{eigvecs_224[i, 0]:.4f}, {eigvecs_224[i, 1]:.4f}, {eigvecs_224[i, 2]:.4f}],")
        print("])")

    if eigvals_256 is not None:
        print("\n--- B. 256x256 PCA value ---")
        print("POC_EIGVALS_256 = torch.tensor([", ', '.join(f"{v:.6f}" for v in eigvals_256), "])")
        print("POC_EIGVECS_256 = torch.tensor([")
        for i in range(3):
            print(f"    [{eigvecs_256[i, 0]:.4f}, {eigvecs_256[i, 1]:.4f}, {eigvecs_256[i, 2]:.4f}],")
        print("])")


--- A. 224x224 PCA value ---
POC_EIGVALS_224 = torch.tensor([ 3290.864258, 1239.206299, 538.111511 ])
POC_EIGVECS_224 = torch.tensor([
    [0.4459, 0.7635, 0.4671],
    [0.8872, -0.4461, -0.1177],
    [-0.1185, -0.4669, 0.8763],
])

--- B. 256x256 PCA value ---
POC_EIGVALS_256 = torch.tensor([ 3126.742920, 1230.723022, 530.931519 ])
POC_EIGVECS_256 = torch.tensor([
    [0.4332, 0.7737, 0.4623],
    [0.8945, -0.4318, -0.1156],
    [-0.1102, -0.4637, 0.8791],
])
