In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [2]:
file = '../data/iris.csv'

In [3]:
# 데이터 셋 만들기
irisDF = pd.read_csv(file)
feature = irisDF.iloc[:, :-1]
target = irisDF.iloc[:, -1]

In [4]:
from sklearn.preprocessing import LabelEncoder

target = LabelEncoder().fit_transform(target)

In [5]:
featureNP = np.array(feature)
targetNP = np.array(target)

In [6]:
featureT = torch.FloatTensor(featureNP)
targetT = torch.FloatTensor(targetNP)

In [7]:
from torch.utils.data import Dataset, DataLoader, TensorDataset

In [8]:
class CustomDataset(Dataset) :
    def __init__(self, x, y) -> None :
        super().__init__()
        self.x = torch.FloatTensor(x)
        self.y = torch.LongTensor(y)
        self.classes = np.unique(self.y).tolist()
        self.nclasses = len(self.classes)
        self.len=len(self.x)

    def __len__(self) :
        return self.len
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]



In [9]:
irisDS = CustomDataset(featureNP, targetNP)
print(irisDS[0])

(tensor([5.1000, 3.5000, 1.4000, 0.2000]), tensor(0))


데이터로더의 역할
- Pytorch 데이터 로딩 유틸리티 Class
- 데이터셋에서 지정된 크기만큼 랜덤하게 인덱스 추출하는 Generator

#### DataLoader
- batch_size, shuffle, drop_last, sampler

In [10]:
# 로딩된 데이터 확인 함수
def print_batch_data(loader, epochs, batch_size = 1, shuffle = False, drop_last = False, sampler=None) :
    print(f'[설정값] batch_size : {batch_size}, shuffle : {shuffle}, drop_last : {drop_last}, sampler : {sampler}')
    
    for ep in range(epochs) :
        print(f'[{ep}] EPOCHS=======batch : {len(loader)}개')
        for (feature, label) in loader:
            print(feature.shape, label.shape, label)



In [11]:
# # 로딩된 데이터 확인 함수
# def print_batch_data(loader,epochs, batch_size = 1, shuffle = False, drop_last = False, sampler = None) :
#     print(f'[설정값] batchsize = {batch_size}, shuffle: {shuffle}, drop_last = {drop_last}, sampler = {sampler}')
#     for ep in range(epochs):
#         print(f'[{ep}] EPOCHS===================Batch {len(loader)}===')
#         for (feature,label) in loader:
#             print(feature.shape, label.shape, label)

In [12]:
#### [기본 사용] batch_size=1, shuffle=False, drop_last=False, sampler=None
loader = DataLoader(irisDS)
print_batch_data(loader=loader, epochs=2, batch_size=22)
# for (feature,label) in loader :
#     print(feature.shape, label.shape)

[설정값] batch_size : 22, shuffle : False, drop_last : False, sampler : None
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]

In [13]:
#### [기본 사용] batch_size=1, shuffle=False, drop_last=False, sampler=None
loader = DataLoader(irisDS, 22, drop_last=True)
print_batch_data(loader=loader, epochs=2, batch_size=22, drop_last=True, shuffle=True)

[설정값] batch_size : 22, shuffle : True, drop_last : True, sampler : None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([

In [14]:
for (feature, label) in loader :
    print(feature.shape, label.shape, label, label.bincount())
    break

torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) tensor([22])


In [15]:
label.bincount()

tensor([22])