In [1]:
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

from PIL import Image, ImageOps
import pandas as pd
import numpy as np

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Device:', device)

Device: cuda


In [3]:
df = pd.read_csv('./data/hangeul_2350.csv')

In [4]:
df

Unnamed: 0,img_path,label
0,/data/1_syllable/00165717.png,훌
1,/data/1_syllable/00094922.png,흴
2,/data/1_syllable/00163366.png,뭡
3,/data/1_syllable/00189967.png,딪
4,/data/1_syllable/00135120.png,염
...,...,...
152467,/data/2_syllable/01188949.png,붓
152468,/data/2_syllable/00549013.png,뎄
152469,/data/2_syllable/00380048.png,웍
152470,/data/2_syllable/00859689.png,윈


In [5]:
# transfrom
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [6]:
class path_to_img(Dataset):
    def __init__(self, img_path, labels, transform):  # 데이터셋 전처리
        self.img_path = img_path
        self.labels = labels
        self.transform = transform
        
    def __len__(self):  # 데이터셋 길이 (총 샘플의 수)
        return len(self.img_path)
    
    def __getitem__(self, idx):  # 데이터셋에서 특정 샘플을 가져옴
        image = ImageOps.grayscale(Image.open('.'+self.img_path[idx]))
        image = self.transform(image)
        return image, self.labels[idx]

In [7]:
df_X = df.iloc[:, 0] # img_path
df_y = df.iloc[:, 1] # label

In [8]:
df_X

0         /data/1_syllable/00165717.png
1         /data/1_syllable/00094922.png
2         /data/1_syllable/00163366.png
3         /data/1_syllable/00189967.png
4         /data/1_syllable/00135120.png
                      ...              
152467    /data/2_syllable/01188949.png
152468    /data/2_syllable/00549013.png
152469    /data/2_syllable/00380048.png
152470    /data/2_syllable/00859689.png
152471    /data/2_syllable/00499030.png
Name: img_path, Length: 152472, dtype: object

In [9]:
df_y

0         훌
1         흴
2         뭡
3         딪
4         염
         ..
152467    붓
152468    뎄
152469    웍
152470    윈
152471    잰
Name: label, Length: 152472, dtype: object

In [10]:
dataset = path_to_img(df_X, df_y, transform_train)
print(len(dataset))

152472


In [11]:
img, label = dataset[0]

In [12]:
img

tensor([[[0.7608, 0.8588, 0.9137,  ..., 0.8392, 0.8118, 0.7608],
         [0.8118, 0.9098, 0.9647,  ..., 0.9647, 0.9373, 0.8824],
         [0.8471, 0.9373, 0.9882,  ..., 0.9961, 0.9647, 0.9098],
         ...,
         [0.9059, 0.9647, 0.9882,  ..., 0.9725, 0.9059, 0.7725],
         [0.9020, 0.9569, 0.9843,  ..., 0.9843, 0.9176, 0.7882],
         [0.8824, 0.9490, 0.9882,  ..., 0.9922, 0.9255, 0.7961]]])

In [13]:
label

'훌'