In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [5]:
class IrisDataset(Dataset):
    def __init__(self, X_data, y_data) -> None:
        super().__init__()
        X_data = X_data if isinstance(X_data, np.ndarray) else X_data.values
        y_data = y_data if isinstance(y_data, np.ndarray) else y_data.values
        if X_data.dtype != np.float32:
            X_data = X_data.astype(np.float32)
        if y_data.dtype != np.int64:
            y_data = y_data.astype(np.int64)
        self.feature = torch.FloatTensor(X_data)
        self.target = torch.LongTensor(y_data)

    def __len__(self):
        return len(self.target)

    def __getitem__(self, idx):
        return self.feature[idx], self.target[idx]

    def getInOut(self):
        return self.feature.shape[1], len(torch.unique(self.target))


In [13]:
filename = "../data/iris.csv"
irisNP = np.loadtxt(filename, delimiter=",", usecols=[0, 1, 2, 3], skiprows=1)
labelNP = np.loadtxt(filename, delimiter=",", dtype=str, usecols=[4], skiprows=1)


In [19]:
labelNP = np.where(labelNP == "setosa", 0, np.where(labelNP == "versicolor", 1, 2))


In [20]:
irisDS = IrisDataset(irisNP, labelNP)


In [22]:
irisDS.getInOut()


(4, 3)

In [24]:
irisDS.feature.shape


torch.Size([150, 4])

#### DataLoader

-   batch_size, shuffle, drop_last, sampler


In [37]:
### 로딩된 데이터 확인 함수
def print_batch_data(
    loader, epochs, batch_size=1, shuffle=False, drop_last=False, sampler=None
):
    print(
        f"[설정값] batch_size: {batch_size}, shuffle: {shuffle}, drop_last: {drop_last}, sampler: {sampler}"
    )

    for ep in range(epochs):
        print(f"[{ep} EPOCHS]========== batch : {len(loader)}개")
        for feature, label in loader:
            print(feature.shape, label.shape, label)


In [38]:
#### ===> [기본사용] batch_size=1, shuffle=False, drop_last=False, sampler=None
loader = DataLoader(irisDS)
print_batch_data(loader, 2)


[설정값] batch_size: 1, shuffle: False, drop_last: False, sampler: None
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) ten

In [39]:
#### ===> [설정] batch_size=22, shuffle=False, drop_last=False, sampler=None
loader = DataLoader(irisDS, batch_size=22)
print_batch_data(loader, 2, 22)


[설정값] batch_size: 22, shuffle: False, drop_last: False, sampler: None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([18, 4]) torch.Size([18]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0

In [40]:
#### ===> [설정] batch_size=22, shuffle=False, drop_last=True, sampler=None
loader = DataLoader(irisDS, batch_size=22, drop_last=True)
print_batch_data(loader, 2, 22, drop_last=True)


[설정값] batch_size: 22, shuffle: False, drop_last: True, sampler: None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]

In [42]:
#### ===> [설정] batch_size=22, shuffle=True, drop_last=True, sampler=None
loader = DataLoader(irisDS, batch_size=22, shuffle=True, drop_last=True)
print_batch_data(loader, 2, 22, shuffle=True, drop_last=True)


[설정값] batch_size: 22, shuffle: True, drop_last: True, sampler: None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 2, 2, 1, 2, 0, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2, 2, 0, 0, 2, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 2, 1, 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 0, 2, 1, 2, 2, 1, 0, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 1, 2, 2, 0, 2, 0, 0, 1, 1, 0, 0, 2, 2, 2, 0, 0, 2, 1, 1, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 2, 0, 2, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 1, 0, 0, 2, 2, 0, 1, 1, 1, 0, 0, 2, 1, 0, 2, 2, 1, 1, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 1, 1, 1, 0, 2, 1, 2, 1, 2, 0, 0, 1, 2, 2, 1, 1, 1, 2, 0, 0, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 2, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1, 0, 1, 1, 2, 0, 2, 2, 0, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 0, 2, 1, 1, 1, 0, 1, 2, 0, 0, 0, 2, 0, 2, 2, 1, 0, 2, 1])
torch.Size([22, 4]) torch.Size([22])

In [76]:
for feature, label in loader:
    print(feature.shape, label.shape, label.bincount())


torch.Size([22, 4]) torch.Size([22]) tensor([9, 7, 6])
torch.Size([22, 4]) torch.Size([22]) tensor([7, 7, 8])
torch.Size([22, 4]) torch.Size([22]) tensor([8, 9, 5])
torch.Size([22, 4]) torch.Size([22]) tensor([8, 7, 7])
torch.Size([22, 4]) torch.Size([22]) tensor([7, 6, 9])
torch.Size([22, 4]) torch.Size([22]) tensor([7, 6, 9])


In [77]:
help(DataLoader)


Help on class DataLoader in module torch.utils.data.dataloader:

class DataLoader(typing.Generic)
 |  DataLoader(*args, **kwds)
 |  
 |  Data loader combines a dataset and a sampler, and provides an iterable over the given dataset.
 |  
 |  The :class:`~torch.utils.data.DataLoader` supports both map-style and
 |  iterable-style datasets with single- or multi-process loading, customizing
 |  loading order and optional automatic batching (collation) and memory pinning.
 |  
 |  See :py:mod:`torch.utils.data` documentation page for more details.
 |  
 |  Args:
 |      dataset (Dataset): dataset from which to load the data.
 |      batch_size (int, optional): how many samples per batch to load
 |          (default: ``1``).
 |      shuffle (bool, optional): set to ``True`` to have the data reshuffled
 |          at every epoch (default: ``False``).
 |      sampler (Sampler or Iterable, optional): defines the strategy to draw
 |          samples from the dataset. Can be any ``Iterable`` with