<a href="https://colab.research.google.com/github/CatalystM47/Deep_Learning/blob/main/PyTorch_Transform_ex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. 데이터 불러오기

In [49]:
import torch
import torchvision
import torchvision.transforms as tr
from torch.utils.data import DataLoader, Dataset
import numpy as np

In [50]:
transf = tr.Compose([tr.Resize(8), tr.ToTensor()])
# 필요한 작업 Compose 안에 순서대로 넣기.
# 트랜스폼에 넣을때 넘파이는 에러, PIL 이미지 형태여야함.
# Pad, Grayscale, RandomCrop, Normalize

In [51]:
trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transf)
testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transf)

Files already downloaded and verified
Files already downloaded and verified


In [52]:
trainset[0][0].size()

torch.Size([3, 8, 8])

In [53]:
trainloader = DataLoader(trainset, batch_size = 50, shuffle = True, num_workers = 2)
testloader = DataLoader(testset, batch_size = 50, shuffle = True, num_workers = 2)

In [54]:
len(trainloader)

1000

In [55]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

In [56]:
images.size() # 파이토치에서 신경망에 들어갈 때는 [배치사이즈, 채널 수, 이미지 사이즈, 이미지사이즈]순서이다. 

torch.Size([50, 3, 8, 8])

2. 같은 클래스 별 폴더 이미지 데이터 이용

In [57]:
# ./class/tiger ./class/lion 으로 만약 레이블링을 해두었을 경우 사용하기.
# 다만, 다른 작업과 공용으로 사용하거나, 폴더가 아닌 SQL 같은곳에서 넘어오는 경우 폴더 라벨링 힘듦.

#transf = tr.Compose([tr.Resize(16), tr.ToTensor()])
#trainset = torchvision.datasets.ImageFolder(root = './class', transform = transf) # 트랜스폼으로 전처리 작업 가능
#trainloader = DataLoader(trainset, batch_size = 10, shuffle = False, num_workwes = 2)
#print(len(trainloader))

3. 개인 데이터 사용 (2 types)

In [58]:
# import preprocessing

train_images = np.random.randint(256, size = (20,32,32,3)) #numpy 형태로 들어왔다고 가정
train_labels = np.random.randint(2,size = (20,1))

# preprocessing....
# train_images, train_labels = preprocessing(train_images, train_labels)

print(train_images.shape, train_labels.shape)

(20, 32, 32, 3) (20, 1)


In [59]:
# 외부 데이터 받아서
# 1. 텐서로 변경
# 2. permute함수로 순서 변경
# 3. 데이터 개수 산출
# 4. getitem으로 튜플 형식으로 리턴

class TensorData(Dataset):
    
    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)
        self.x_data = self.x_data.permute(0, 3, 1, 2) ## 이미지 개수, 채널 수, 이미지 너비, 높이
        self.y_data = torch.LongTensor(y_data)
        self.len = self.y_data.shape[0]

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

In [60]:
train_data = TensorData(train_images, train_labels)
train_loader = DataLoader(train_data, batch_size = 10, shuffle = True)

In [61]:
train_data[0][0].size()

torch.Size([3, 32, 32])

In [62]:
dataiter = iter(train_loader)
images, labels = dataiter.next()

In [63]:
images.size()

torch.Size([10, 3, 32, 32])

In [64]:
##### Form #####

#from torch.utils.data import Dataset

#class MyDataset(Dataset):
#    def __init__(self):

#    def __getitem__(self, index):

#    def __len__(self):

In [65]:
from torch.utils.data import Dataset

class MyDataset(Dataset):

    def __init__(self, x_data, y_data, transform = None):

        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)

    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]

        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        return self.len

class ToTensor:
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2, 0, 1)

        return inputs, torch.LongTensor(labels)

class LinearTensor:
    def __init__(self, slope = 1, bias = 0):
        self.slope = slope
        self.bias = bias

    def __call__(self, sample):
        inputs, labels = sample
        inputs = self.slope * inputs + self.bias

        return inputs, labels

In [66]:
trans = tr.Compose([ToTensor(),LinearTensor(2, 5)]) #  !!!! tr.ToTensor 로 쓰려면, PIL이미지 타입이어야 사용 가능함.
ds1 = MyDataset(train_images, train_labels, transform = trans)
train_loader1 = DataLoader(ds1, batch_size = 10, shuffle = True)

In [67]:
ds1[0]

(tensor([[[441., 149., 515.,  ..., 505., 193., 335.],
          [ 83.,  35., 391.,  ..., 513., 467., 229.],
          [305., 175., 389.,  ...,  83.,   5., 151.],
          ...,
          [313., 103., 463.,  ...,  69., 121., 395.],
          [117., 479., 389.,  ..., 307., 499., 327.],
          [413.,  63., 391.,  ..., 339., 165., 175.]],
 
         [[123., 157., 111.,  ...,  49., 375., 441.],
          [353., 155., 401.,  ..., 143., 211., 501.],
          [191., 429., 139.,  ..., 251.,  71., 401.],
          ...,
          [117., 253., 447.,  ..., 377., 263., 453.],
          [387., 105., 131.,  ..., 239.,  29., 331.],
          [ 53., 261., 151.,  ..., 267., 157., 201.]],
 
         [[219.,  49., 279.,  ..., 423., 269., 155.],
          [469., 319., 511.,  ..., 345.,  71., 409.],
          [365., 105., 471.,  ..., 341.,  69., 257.],
          ...,
          [ 57.,  15., 349.,  ..., 127.,  81., 255.],
          [457.,  43., 505.,  ..., 159., 249., 195.],
          [ 27., 125., 515.,  .

In [68]:
first_data = ds1[0]
features, labels = first_data
print(type(features), type(labels)) 
# numpy -> tensor

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [69]:
dataiter1 = iter(train_loader1)
images1, labels1 = dataiter1.next()

In [70]:
images1

tensor([[[[173., 427., 139.,  ...,  85., 141., 417.],
          [163., 355., 247.,  ..., 203., 265., 137.],
          [  5.,  31., 397.,  ..., 501., 253., 245.],
          ...,
          [183., 135., 297.,  ..., 223., 209., 507.],
          [179., 295., 393.,  ..., 367., 233., 157.],
          [507., 329., 501.,  ...,  77., 271., 195.]],

         [[357., 401., 251.,  ..., 499., 353., 191.],
          [235., 433., 303.,  ..., 303., 259., 423.],
          [167., 429., 273.,  ..., 323.,  43., 275.],
          ...,
          [221., 435., 279.,  ..., 497.,  17.,  71.],
          [353., 375.,  67.,  ...,  19., 485.,  25.],
          [479., 119., 363.,  ..., 291., 331., 257.]],

         [[403., 201., 437.,  ..., 259., 365.,  73.],
          [ 45., 335., 319.,  ..., 371., 311., 231.],
          [329.,  69., 123.,  ..., 315.,  17., 113.],
          ...,
          [461., 185., 377.,  ..., 185., 439., 439.],
          [ 23., 421.,  73.,  ..., 189., 135., 133.],
          [253., 483., 373.,  ...

In [71]:
class MyDataset(Dataset):

    def __init__(self, x_data, y_data, transform = None):

        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)

    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]

        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        return self.len

class MyTransform:
    
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2, 0, 1)
        labels = torch.FloatTensor(labels)

        transf = tr.Compose([tr.ToPILImage(), tr.Resize(128), tr.ToTensor(), tr.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        final_output = transf(inputs)

        return final_output, labels

In [72]:
ds2 = MyDataset(train_images, train_labels, transform = MyTransform())
train_loader2 = DataLoader(ds2, batch_size = 10, shuffle = True)

In [73]:
first_data = ds2[0]
features, labels = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [74]:
dataiter2 = iter(train_loader2)
images2, labels2 = dataiter2.next()

In [75]:
images2.size() # 배치, 채널 수 , 이미지 사이즈

torch.Size([10, 3, 128, 128])

In [76]:
images2

tensor([[[[ 0.5137,  0.5137,  0.5608,  ...,  0.9451,  0.9686,  0.9686],
          [ 0.5137,  0.5137,  0.5608,  ...,  0.9451,  0.9686,  0.9686],
          [ 0.3882,  0.3882,  0.4353,  ...,  0.8510,  0.8667,  0.8667],
          ...,
          [ 0.1451,  0.1451,  0.0980,  ...,  0.0353,  0.0039,  0.0039],
          [ 0.0745,  0.0745,  0.0196,  ..., -0.0667, -0.1294, -0.1294],
          [ 0.0745,  0.0745,  0.0196,  ..., -0.0667, -0.1294, -0.1294]],

         [[ 0.3804,  0.3804,  0.4039,  ...,  0.1608,  0.1294,  0.1294],
          [ 0.3804,  0.3804,  0.4039,  ...,  0.1608,  0.1294,  0.1294],
          [ 0.2863,  0.2863,  0.3176,  ...,  0.1137,  0.0824,  0.0824],
          ...,
          [ 0.6078,  0.6078,  0.5922,  ..., -0.4667, -0.5765, -0.5765],
          [ 0.6706,  0.6706,  0.6706,  ..., -0.6627, -0.7961, -0.7961],
          [ 0.6706,  0.6706,  0.6706,  ..., -0.6627, -0.7961, -0.7961]],

         [[-0.2157, -0.2157, -0.0745,  ...,  0.8039,  0.7882,  0.7882],
          [-0.2157, -0.2157, -