In [1]:
# 모듈 로딩
import torch
import torch.nn as nn
import torch.optim as optim 
from torch.utils.data import Dataset, DataLoader

import numpy as np
import pandas as pd

In [4]:
data_file = '../DATA/Iris.csv'

In [5]:
irisDF = pd.read_csv(data_file)

In [6]:
irisDF.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [44]:
# 사용자 정의 DataSet 클래스
class DLDataset(Dataset):

    # 초기화 콜백 함수(callback function)
    def __init__(self, x_data, y_data):
        super().__init__()
        x_data = x_data.values if isinstance(x_data, pd.DataFrame) else x_data
        y_data = y_data.values if isinstance(y_data, pd.DataFrame) else y_data
        self.feature = torch.tensor(x_data)
        self.target = torch.tensor(y_data)
        self.classes = np.unique(self.target).tolist()
        self.nclasses = len(self.classes)
        self.len = len(self.feature)

        # ndarray => tensor
        # self.feature = torch.FloatTensor(x_data)
        # self.target = torch.FloatTensor(y_data)
    
    # 데이터셋의 개수 체크 콜백 함수(callback function)
    def __len__(self):
        return self.target.shape[0]
    
    # 특정 인덱스 데이터 + 라벨 반환 콜백 함수(callback function)
    def __getitem__(self, index):
        return self.feature[index], self.target[index]

In [45]:
# 피처와 라벨 분리
featureDF = irisDF[irisDF.columns[:-1]]
targetSR = irisDF[irisDF.columns[-1]]

print(f'featureDF => {featureDF.shape}, {featureDF.ndim}D')
print(f'targerSR => {targetSR.shape}, {targetSR.ndim}D')

featureDF => (150, 4), 2D
targerSR => (150,), 1D


In [46]:
# 타겟이 object이므로 정수로 변환
from sklearn.preprocessing import LabelEncoder

targetNP = LabelEncoder().fit_transform(targetSR)
print(targetNP.shape, targetNP.ndim)

(150,) 1


In [47]:
# 데이터셋 생성 => DF, NP
irisDS = DLDataset(featureDF, targetNP)
irisDS[0]

(tensor([5.1000, 3.5000, 1.4000, 0.2000], dtype=torch.float64),
 tensor(0, dtype=torch.int32))

#### DataLoader
- batch_size, shuffle, drop_last, sampler

In [49]:
# 로딩된 데이터 확인 함수
def print_batch_data(loader, epochs, batch_size = 1, shuffle = False, drop_last = False, sampler = None) -> None:
    print(f'[설정값] batch_size : {batch_size}, shuffle : {shuffle}, drop_last : {drop_last}, sampler : {sampler}')

    for ep in range(epochs):
        print(f'[{ep + 1} EPOCHS] ============ batch : {len(loader)}개')
        for (feature, label) in loader:
            print(feature.shape, label.shape, label)

In [50]:
# [기본사용] batch_size = 1, shuffle = False, drop_last = False, sampler = None
loader = DataLoader(irisDS)
print_batch_data(loader, epochs = 2)

[설정값] batch_size : 1, shuffle : False, drop_last : False, sampler : None
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
torch.Size([1, 4]) torch.Size([1]) tensor([0], dtype=torch.int32)
tor

In [51]:
# [기본사용] batch_size = 22, shuffle = False, drop_last = False, sampler = None
loader = DataLoader(irisDS, 22)
print_batch_data(loader, epochs = 2, batch_size = 22)

[설정값] batch_size : 22, shuffle : False, drop_last : False, sampler : None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
       dtype=torch.int32)
torch.Size([18, 4]) torch.Size([18]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

In [52]:
# [기본사용] batch_size = 22, shuffle = True, drop_last = True, sampler = None
loader = DataLoader(irisDS, 22, shuffle = True, drop_last = True)
print_batch_data(loader, epochs = 2, batch_size = 22, shuffle = True, drop_last = True)

[설정값] batch_size : 22, shuffle : True, drop_last : True, sampler : None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 1, 2, 0, 2, 1, 2, 2, 1, 1, 1, 2, 1, 0, 0, 0, 2, 0, 1, 1, 0, 1],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([0, 2, 2, 1, 0, 0, 1, 2, 2, 2, 0, 2, 0, 2, 2, 2, 1, 2, 1, 1, 1, 2],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 0, 1, 2, 1, 0, 0, 1, 1, 0, 2, 0, 0, 1, 1, 0, 0, 2, 0, 2, 2],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([0, 1, 1, 1, 0, 0, 0, 1, 2, 1, 1, 0, 0, 1, 1, 2, 0, 2, 1, 2, 1, 2],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([1, 2, 0, 2, 0, 0, 1, 1, 1, 0, 2, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 2],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 1, 0, 1, 2, 0, 2, 1, 1, 1, 2, 0, 0, 0, 0, 0, 2, 2, 2, 1, 0],
       dtype=torch.int32)
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 0, 2, 2, 2, 1, 1, 1, 1, 2, 2, 0, 1, 0, 2, 2, 0, 1

In [53]:
for (feature, label) in loader:
    print(feature.shape, label.shape, label)
    break

torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 0, 2, 2, 2, 0, 2, 0, 0, 1, 2, 1, 1, 2, 1, 1, 1, 0, 0, 1, 0],
       dtype=torch.int32)


In [54]:
# 클래스별 데이터 개수
label.bincount()

tensor([7, 9, 6])