In [32]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import cv2

IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3

#데이터 경로 지정  
TRAIN_PATH = '../data/'
# TEST_PATH = './page_data/test/'

class CustomDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.images[idx].replace('org', 'seg'))
        image = cv2.imread(img_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        # Normalize and resize as necessary
        image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
        mask = cv2.resize(mask, (IMG_WIDTH, IMG_HEIGHT))

        # Transform to tensor
        image = torch.from_numpy(image).float()
        mask = torch.from_numpy(mask).long()

        # Rearrange image dimensions to CxHxW
        image = image.permute(2, 0, 1) / 255.0  # Normalize to [0, 1]

        return image, mask

# Define any transforms you want to apply to both images and masks
transform = transforms.Compose([
    # Add any transformations here
])

# Initialize your dataset
train_dataset = CustomDataset(
    image_dir=TRAIN_PATH+'org/',
    mask_dir=TRAIN_PATH+'seg/',
    transform=transform
)

# Initialize DataLoader
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)


In [29]:
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3


#데이터 경로 지정  
TRAIN_PATH = '../data/'
# TEST_PATH = './page_data/test/'

# UserWarning을 무시하는 설정(없음 OK)
warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

#이미지 파일명을 리스트 형식으로 리턴  
train_imgs = glob.glob(TRAIN_PATH+'org/*.jpg')
train_masks = glob.glob(TRAIN_PATH+'seg/*.jpg')
# test_imgs = glob.glob(TEST_PATH+'org/*.jpg')
# test_masks = glob.glob(TEST_PATH+'seg/*.jpg')


#리스트 길이 리턴  
num_of_train_imgs = len(train_imgs)
num_of_train_masks = len(train_masks)
if num_of_train_imgs != num_of_train_masks:
    print('invalid datasets, please check train data')
    
#각이미지를 배열로 리턴
#image 
X_train = np.zeros((num_of_train_imgs, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
# mask
Y_train = np.zeros((num_of_train_masks, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
print('Getting and resizing train images and masks ... ')
sys.stdout.flush()

for n in range(num_of_train_imgs):
    # 흑백으로 불러와서
    # img = cv2.imread(train_imgs[n], cv2.IMREAD_GRAYSCALE)
    img = cv2.imread(train_imgs[n])
    # 정규화하고 0~1
    img = (img * 255).astype(np.uint8)
    # 모델에 들어가는 이미지 사이즈로 변경
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)
    X_train[n] = img.reshape(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
    
    mask = cv2.imread(train_masks[n])
    mask_resized = cv2.resize(mask, (IMG_HEIGHT, IMG_WIDTH), interpolation=cv2.INTER_CUBIC)
    Y_train[n] = mask_resized.reshape(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)


Getting and resizing train images and masks ... 
