In [42]:
import torch
from torchvision import transforms
import numpy as np

In [43]:
## 기본 틀
class Custom_Dataset(torch.utils.data.Dataset):
    def __init__(self, X, Y, transform=None):
        self.X = X
        self.Y = Y
        self.transform = transform

    def __len__(self):
        return self.X.shape[0] # 개x?x?x?를 줘야하므로

    def __getitem__(self, idx):
        x = self.X[idx]
        if self.transform is not None: # 만약 transform이 있다면, 적용해서 줘야하므로
            x = self.transform(x)
        y = self.Y[idx]
        return x, y

# 데이터 테스트

In [44]:
X_data = np.arange(-10,10).reshape(-1,1) # shape : (20) > (20,1)
Y_data = X_data**2

transform = lambda x:x+1

In [45]:
custom_DS = Custom_Dataset(X_data, Y_data, transform=transform)
# custom_DS는 데이터 전체를 다 가지고 있게 됨

## train, val, test data split

In [46]:
train_DS, val_DS, test_DS = torch.utils.data.random_split(custom_DS, [10,5,5]) # custom DS를 넣고, train 10개 val 5개 test 5개로 나누어주는 것

print(len(train_DS))
print(len(val_DS))
print(len(test_DS))

10
5
5


## train ratio

In [47]:
TRAIN_RATIO = 0.8

In [48]:
NoT = int(len(custom_DS)*TRAIN_RATIO)
NoV = int(len(custom_DS)*0.1) # val 개수
NoTes = len(custom_DS) - NoT - NoV # NoTes = 10%

train_DS, val_DS, test_DS = torch.utils.data.random_split(custom_DS, [NoT, NoV, NoTes]) # 0.8, 0.1, 0.1의 비율 개수 전달

print(len(train_DS))
print(len(val_DS))
print(len(test_DS))

16
2
2


## 데이터로더

In [55]:
BATCH_SIZE = 8

In [56]:
train_DL = torch.utils.data.DataLoader(train_DS, batch_size=BATCH_SIZE, shuffle=True)
val_DL = torch.utils.data.DataLoader(val_DS, batch_size=BATCH_SIZE, shuffle=True)
test_DL = torch.utils.data.DataLoader(test_DS, batch_size=BATCH_SIZE, shuffle=True)

In [57]:
for x_batch, y_batch in train_DL:
    print(f"x_batch = {x_batch.reshape(-1)}, \n"
          f"y_batch = {y_batch.reshape(-1)}")
    # transform이 +1이었으니, x는 이미 transform이 적용된 것이고 y는 원래의 x (x-1)을 기준으로 **2 값
print("-"*50)
for x_batch, y_batch in val_DL:
    print(f"x_batch = {x_batch.reshape(-1)}, \n"
          f"y_batch = {y_batch.reshape(-1)}")
print("-"*50)
for x_batch, y_batch in test_DL:
    print(f"x_batch = {x_batch.reshape(-1)}, \n"
          f"y_batch = {y_batch.reshape(-1)}")

x_batch = tensor([ 5,  8,  1,  6, -6,  2, -9, -3]), 
y_batch = tensor([ 16,  49,   0,  25,  49,   1, 100,  16])
x_batch = tensor([-2,  7, 10,  4,  0, -7, -4, -1]), 
y_batch = tensor([ 9, 36, 81,  9,  1, 64, 25,  4])
--------------------------------------------------
x_batch = tensor([-5,  3]), 
y_batch = tensor([36,  4])
--------------------------------------------------
x_batch = tensor([-8,  9]), 
y_batch = tensor([81, 64])
