In [1]:
import torch
import torchvision
import torchvision.transforms as tr
from torch.utils.data import DataLoader, Dataset
import numpy as np

# 1. 파이토치 제공 데이터 사용

In [2]:
transf =tr.Compose([tr.Resize(8), tr.ToTensor()])
#데이터 전처리 부분
#데이터를 8x8, Tensor로바꿔줌
#PIL이미지의 경우만 사용가능

In [3]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transf)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
#채널수가 먼저
trainset[0][0].size()

torch.Size([3, 8, 8])

In [5]:
trainloader =DataLoader(trainset, batch_size=50, shuffle=True, num_workers=2)
testloader =DataLoader(testset, batch_size=50, shuffle=True, num_workers=2)

In [6]:
#5만개의 train 데이터를 batch_size(50)에 맞게 짜름
len(trainloader)

1000

In [7]:
dataiter = iter(trainloader)
images, labels = dataiter.next() #한 묶음 불러옴

In [8]:
images.size() #배치사이즈, 채널, 이미지 사이즈

torch.Size([50, 3, 8, 8])

# 2. 같은 클래스 별 폴더 이미지 데이터 이용

In [None]:
# ./class/dogs  ./class/cats Labeling이 잘 되어 있는 경우
transf =tr.Compose([tr.Resize(16), tr.ToTensor()])
trainset = torchvision.datasets.ImageFolder(root='./class',transform=transf) #ImageFolder는 각 폴더에 대해 labeling이 됨
trainloader =DataLoader(trainset, batch_size=10, shuffle=False, num_workers=2)

In [None]:
trainset[0][0].size()

# 3.개인 데이터 사용(2 types)

In [9]:
#import preprocessing

train_images = np.random.randint(256, size=(20,32,32,3))
train_labels = np.random.randint(2, size=(20,1))

#preprocessing....
#train_images, train_labels - preprocessing(train_images, train_labels)

print(train_images.shape, train_labels.shape)

(20, 32, 32, 3) (20, 1)


In [10]:
class TensorData(Dataset):
    
    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)
        self.x_data = self.x_data.permute(0,3,1,2) # 이미지 개수, 채널 수, 이미지 너비, 높이 로 순서 바꿈
        self.y_data = torch.LongTensor(y_data)
        self.len = self.y_data.shape[0]
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

In [11]:
train_data = TensorData(train_images, train_labels)
train_loader =DataLoader(train_data, batch_size=10, shuffle=True)

In [12]:
train_data[0][0].size()

torch.Size([3, 32, 32])

In [13]:
dataiter = iter(train_loader)
images, labels = dataiter.next() #한 묶음 불러옴

In [14]:
images.size() #배치사이즈, 채널, 이미지 사이즈

torch.Size([10, 3, 32, 32])

#####class 별로 안나눠 져 있는 경우

from torch.utils.data import Dataset

class MyDataset(Dataset):
    
    def __init__(self):
    
    def __getitem__(self, index):
    
    def __len__(self):

In [15]:
class MyDataset(Dataset): #데이터를 변환
    
    def __init__(self, x_data, y_data, transform=None):
        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)
        
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample #numpy로 나감
    
    def __len__(self):
        return self.len
    
class ToTensor: #데이터를 Tensor로 변환
    
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2,0,1)
        return inputs, torch.LongTensor(labels)

class LinearTensor: #데이터 연산
    
    def __init__(self, slope=1, bias=0):
        self.slope = slope
        self.bias = bias
        
    def __call__(self, sample):
        inputs, labels = sample
        inputs = self.slope*inputs + self.bias
        
        return inputs, labels

In [17]:
first_data = train_images[0]
features = first_data
print(type(features))

<class 'numpy.ndarray'>


In [18]:
trans = tr.Compose([ToTensor(), LinearTensor(2,5)])
ds1 = MyDataset(train_images, train_labels, transform=trans)
train_loader1 = DataLoader(ds1, batch_size=10, shuffle=True)

In [19]:
first_data = ds1[0]
features, label = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [20]:
dataiter1 = iter(train_loader1)
images1, labels1 = dataiter1.next()

In [21]:
images1

tensor([[[[ 95., 429., 153.,  ...,  47.,  25., 247.],
          [377., 339.,  39.,  ..., 311., 187., 419.],
          [ 93., 341., 147.,  ..., 269., 103., 407.],
          ...,
          [ 83.,  35., 139.,  ..., 461., 305., 183.],
          [163., 213., 499.,  ...,  31.,   9., 451.],
          [ 69., 473., 267.,  ..., 355., 425., 475.]],

         [[503., 287., 365.,  ...,  93.,  61., 209.],
          [181.,  67., 191.,  ..., 171., 191., 315.],
          [399., 285., 363.,  ..., 141.,  25., 305.],
          ...,
          [431.,  65.,  63.,  ..., 195., 125., 171.],
          [ 63., 301., 377.,  ..., 397., 195., 107.],
          [379., 439., 141.,  ..., 231., 265., 287.]],

         [[143.,  83.,  73.,  ..., 397., 257., 229.],
          [367., 179., 141.,  ..., 229., 279., 479.],
          [203., 127., 211.,  ..., 203., 199., 335.],
          ...,
          [499.,  45., 277.,  ..., 117., 495.,  67.],
          [489., 195., 325.,  ...,  33., 355., 483.],
          [497., 479., 205.,  ...

# 제공하는 Transforms 사용

In [22]:
class MyDataset(Dataset): #데이터를 변환
    
    def __init__(self, x_data, y_data, transform=None):
        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)
        
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample #numpy로 나감
    
    def __len__(self):
        return self.len

class MyTransform:
    
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2,0,1)
        labels = torch.LongTensor(labels)
        
        transf = tr.Compose([tr.ToPILImage(), tr.Resize(128), tr.ToTensor(), tr.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        final_output = transf(inputs)
        
        return final_output, labels

In [23]:
ds2 = MyDataset(train_images, train_labels, transform=MyTransform())
train_loader2 = DataLoader(ds2, batch_size=10, shuffle=True)

In [24]:
first_data = ds2[0]
features, label = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [25]:
dataiter2 = iter(train_loader2)
images2, labels2 = dataiter2.next()

In [26]:
images2.size()

torch.Size([10, 3, 128, 128])

In [27]:
images2

tensor([[[[ 0.4667,  0.4667,  0.4431,  ..., -0.1216, -0.1216, -0.1216],
          [ 0.4667,  0.4667,  0.4431,  ..., -0.1216, -0.1216, -0.1216],
          [ 0.5137,  0.5137,  0.4902,  ..., -0.1373, -0.1529, -0.1529],
          ...,
          [-0.0353, -0.0353, -0.1373,  ..., -0.4196, -0.4510, -0.4510],
          [-0.0510, -0.0510, -0.1608,  ..., -0.3647, -0.3882, -0.3882],
          [-0.0510, -0.0510, -0.1608,  ..., -0.3647, -0.3882, -0.3882]],

         [[-0.8510, -0.8510, -0.8431,  ..., -0.1608, -0.1137, -0.1137],
          [-0.8510, -0.8510, -0.8431,  ..., -0.1608, -0.1137, -0.1137],
          [-0.6392, -0.6392, -0.6392,  ..., -0.1294, -0.0902, -0.0902],
          ...,
          [ 0.1922,  0.1922,  0.0588,  ..., -0.4588, -0.5765, -0.5765],
          [ 0.1059,  0.1059, -0.0196,  ..., -0.5137, -0.6392, -0.6392],
          [ 0.1059,  0.1059, -0.0196,  ..., -0.5137, -0.6392, -0.6392]],

         [[ 0.8196,  0.8196,  0.6235,  ...,  0.1137,  0.1373,  0.1373],
          [ 0.8196,  0.8196,  