In [2]:
import torch
import torchvision
import torchvision.transforms as tr
from torch.utils.data import DataLoader, Dataset
import numpy as np

1. 파이토치 제공 데이터 셋

In [3]:
transf = tr.Compose([tr.Resize(8), tr.ToTensor()])

In [5]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transf) # Transform 에는 PIL 형태여야 한다.
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transf)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


0it [00:00, ?it/s]

Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [12]:
trainloader = DataLoader(trainset, batch_size=50, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=50, shuffle=True, num_workers=2)

In [17]:
len(trainloader)

1000

In [18]:
dataiter = iter(trainloader) # iterator를 만듦
images, labels = dataiter.next()

In [19]:
images.size()

torch.Size([50, 3, 8, 8])

In [21]:
labels.size()

torch.Size([50])

2. 같은 클래스 별 폴더 이미지 데이터 이용

In [24]:
# ./class/tiger ./class/lion 처럼 폴더 별로 정리가 잘 되어있는 경우
transf = tr.Compose([tr.Resize(16), tr.ToTensor()])
trainset = torchvision.datasets.ImageFolder(root='./class', transform=transf)
trainloader = DataLoader(trainset, batch_size=10, shuffle=False, num_workers=2)

FileNotFoundError: [WinError 3] 지정된 경로를 찾을 수 없습니다: './class'

개인 데이터 사용

In [26]:
train_images = np.random.randint(256, size=(20,32,32,3))
train_labels = np.random.randint(2, size=(20,1))

In [29]:
class TensorData(Dataset):
    
    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)(
        self.x_data = self.x_data.permute(0, 3, 1, 2)
        self.y_data = torch.LongTensor(y_data)
        self.len = self.y_data.shape[0]
        
    def __getitem__(self, index):
        
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len)

In [30]:
train_data = TensorData(train_images, train_labels)
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)

In [33]:
train_data[0][0].size()

torch.Size([3, 32, 32])

In [34]:
dataiter = iter(train_loader)
images, label = dataiter.next()

In [35]:
images.size()

torch.Size([10, 3, 32, 32])

In [39]:
##### 기억해둘 양식
class MyDataset(Dataset):
    
    def __init__(self, x_data, y_data, transform=None):
        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)
        
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample
    
    def __len__(self):
        return self.len

# transform 모듈은 기본적으로 PIL input이어야 한다.
class ToTensor:
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2,0,1)
        
        return inputs, torch.LongTensor(labels)

class LinearTensor:
    
    def __init__(self, slope=1, bias=0):
        self.slope = slope
        self.bias = bias
        
    def __call__(self, sample):
        inputs, labels = sample
        inputs = self.slope*inputs + self.bias
        return inputs, labels

In [40]:
trans = tr.Compose([ToTensor(), LinearTensor(2,5)])
ds1 = MyDataset(train_images, train_labels, transform=trans)
train_loader1 = DataLoader(ds1, batch_size=10, shuffle=True)

In [42]:
first_data = ds1[0]
features, labels = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [43]:
dataiter1 = iter(train_loader1)
images1, label1 = dataiter1.next()

In [44]:
images1[0]

tensor([[[271., 503., 437.,  ...,  45., 461., 367.],
         [ 43., 275., 285.,  ..., 123., 421., 419.],
         [ 99., 177., 399.,  ..., 269., 283., 451.],
         ...,
         [399., 419., 163.,  ...,  73., 207.,  47.],
         [319., 405., 339.,  ..., 169., 473., 299.],
         [401.,  17., 171.,  ...,  11., 331.,  57.]],

        [[467., 407., 455.,  ..., 223., 507.,  31.],
         [151., 203., 133.,  ..., 229.,  47., 249.],
         [499., 249., 147.,  ..., 379., 381., 229.],
         ...,
         [105., 321., 423.,  ..., 113., 173.,  69.],
         [129., 343.,  69.,  ..., 407., 217.,  41.],
         [233., 481., 147.,  ...,  45., 435.,  47.]],

        [[347., 345.,  83.,  ..., 313., 317., 367.],
         [ 87., 493., 271.,  ..., 321., 251., 121.],
         [185., 219., 339.,  ...,  17., 259., 259.],
         ...,
         [377., 117., 309.,  ..., 349.,  99., 435.],
         [463., 403., 295.,  ..., 453., 471., 429.],
         [ 17., 115., 457.,  ..., 409., 111., 387.]]]

In [70]:
##### 기억해둘 양식
class MyDataset(Dataset):
    
    def __init__(self, x_data, y_data, transform=None):
        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)
        
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample
    
    def __len__(self):
        return self.len

# transform 모듈은 기본적으로 PIL input이어야 한다. transform 모듈을 사용하고 싶다면 
# tf.Compose에 tf.ToPILImage()를 처음에 포함시키기

class MyTransform:
    
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2,0,1)
        labels = torch.FloatTensor(labels)
        
        transf = tr.Compose([tr.ToPILImage(), tr.ToTensor(), tr.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # tr.Resize(128), 
        final_output = transf(inputs)
        
        return final_output, labels

In [71]:
ds2 = MyDataset(train_images, train_labels, transform=MyTransform())
train_loader2 = DataLoader(ds2, batch_size=10, shuffle=True)

In [72]:
first_data = ds2[0]
features, labels = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [73]:
dataiter2 = iter(train_loader2)
images2, label2 = dataiter2.next()

In [74]:
images2.size()

torch.Size([10, 3, 32, 32])

In [83]:
x = torch.tensor(train_images).permute(0,3,1,2)[0]
x

tensor([[[176,  47,  89,  ..., 197, 212,  11],
         [145,  61,  47,  ..., 167,  13, 134],
         [ 89,  63,   3,  ...,  21, 182, 132],
         ...,
         [236, 140, 191,  ..., 131, 132, 111],
         [ 42, 236,   6,  ..., 217, 204, 205],
         [234, 154,  15,  ...,  36,  66, 156]],

        [[190,  67, 137,  ..., 175, 138,   7],
         [248, 119, 252,  ..., 153, 132, 199],
         [  3, 116, 119,  ..., 130, 227,  47],
         ...,
         [ 12, 209,  48,  ..., 208, 252,  55],
         [170, 121, 161,  ...,  21, 250,  97],
         [252,  73, 176,  ...,  42, 199,  89]],

        [[ 45, 129, 206,  ..., 145,  93,  79],
         [  4,  37,  39,  ..., 231,  11,   5],
         [ 56, 126, 185,  ...,  40, 128, 214],
         ...,
         [129,  24, 209,  ...,  94,   5,  38],
         [ 68, 251, 162,  ..., 137, 151,  22],
         [178, 225, 137,  ...,  49, 157,  27]]], dtype=torch.int32)