<a href="https://colab.research.google.com/github/HAMA-DL-dev/VML-internship/blob/master/Custom_Data_Loader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import gzip
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable


In [2]:
def load_custom_mnist(image_root, label_root, bsize, shuffle):
    dataset = MNISTCustomDataset(image_root, label_root)
    loader = DataLoader(dataset, batch_size=bsize, shuffle=shuffle)

    return loader

In [3]:
class MNISTCustomDataset(Dataset):
    def __init__(self, image_data_root, label_data_root):
        # image
        self.image_data_root = image_data_root
        self.image_magic_number = 0
        self.num_images = 0
        self.image_rows = 0
        self.image_columns = 0
        self.images = np.empty(0)
        # label
        self.label_data_root = label_data_root
        self.label_magic_number = 0
        self.num_labels = 0
        self.labels = np.empty(0)

        self.image_init_dataset()  # image data
        self.label_init_dataset()  # label data

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

    def __len__(self):
        return self.num_images

    # 여기서 이 메서드는 mnist 데이터에서 이미지를 읽는 역할.

    def image_init_dataset(self):

        # gzip 모듈을 통해 파일을 읽는, with 문법 기능을 수행.
        # c언어의 fopen과 유사한 기능을 수행.
        # 이때, 읽어야 하는 파일이 총 4개이기 때문에
        # gzip 모듈을 쓰는 경우 코드를 간략하게 작성할 수 있음.
        image_file = gzip.open(self.image_data_root, 'r')
      
        # 아래의 과정은 데이터셋을 위한 엔디안 byte order 변환 과정이다. (byte swapping)
        # Intel CPU를 쓰는 경우 디폴트 값으로 low 엔디안 바이트 순서로 컴퓨팅이 되는데
        # 이를통해 데이터를 읽을 때 문제가 없도록 할 수 있다.

        r_type = np.dtype(np.int32).newbyteorder('>')
      
        # np.frombuffer( 바꾸고 싶은 bytes , dtype = <자료형>)
        # read 함수는 파일의 내용 전체를 문자열로 돌려준다. 
        # 소괄호 안은 글자수. 
        self.image_magic_number = np.frombuffer(image_file.read(4), dtype=r_type)[0]
        self.num_images = np.frombuffer(image_file.read(4), dtype=r_type)[0]
        self.image_rows = np.frombuffer(image_file.read(4), dtype=r_type)[0]
        self.image_columns = np.frombuffer(image_file.read(4), dtype=r_type)[0]
                  
        buffer = image_file.read(self.num_images * self.image_rows * self.image_columns)
    
        self.images = np.frombuffer(buffer, dtype=np.uint8).astype(np.float32)
        
        # 네트워크에 쓰기 위해 1차원 배열을 60000 x 784 크기로 reshape 한다. 
        self.images = np.reshape(self.images, (self.num_images, 784))
        # This normalizes the data to be between 0 and 1.  255는 픽셀 값의 범위를 의미.
        self.images = self.images / 255
        
        # 네트워크에 쓰기 위해 텐서변환
        self.images = torch.tensor(self.images)
          
        # mnist 데이터에서 label을 불러오는 메서드.
    def label_init_dataset(self):
        label_file = gzip.open(self.label_data_root, 'r')

        r_type = np.dtype(np.int32).newbyteorder('>')

        self.label_magic_number = np.frombuffer(label_file.read(4), dtype=r_type).astype(np.int64)[0]
        self.num_labels = np.frombuffer(label_file.read(4), dtype=r_type).astype(np.int64)[0]

        buffer = label_file.read(self.num_labels)

        self.labels = np.frombuffer(buffer, dtype=np.uint8)
        self.labels = torch.tensor(self.labels, dtype=torch.long)


In [4]:
trainloader = load_custom_mnist('/content/drive/My Drive/Colab Notebooks/train-images-idx3-ubyte.gz','/content/drive/My Drive/Colab Notebooks/train-labels-idx1-ubyte.gz', 10, True)
testloader = load_custom_mnist('/content/drive/My Drive/Colab Notebooks/t10k-images-idx3-ubyte.gz','/content/drive/My Drive/Colab Notebooks/t10k-labels-idx1-ubyte.gz', 10, False)




In [5]:
import torch.nn as nn

class NeuralNet(nn.Module):

    def __init__(self):
        super(NeuralNet, self).__init__()
        
        self.linear1 = nn.Linear(784, 100)
        self.linear2 = nn.Linear(100, 50)
       
        self.linear3 = nn.Linear(50, 10)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        
        x = self.linear1(x)
        x = self.sigmoid(x)
        x = self.linear2(x)
        x = self.sigmoid(x)
        x = self.linear3(x)
        return x

net=NeuralNet()

In [6]:
import torch.optim as optim

criterion=nn.CrossEntropyLoss()
# 확률적 경사 하강법.
optimizer=optim.SGD(net.parameters(),lr=0.001,momentum=0.9)

for epoch in range(2):

    running_loss=0.0
    for i, data in enumerate(trainloader,0):
        inputs,labels=data
        inputs,labels=Variable(inputs),Variable(labels)
        optimizer.zero_grad()

        outputs=net(inputs)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()

        running_loss+=loss.data
        if i % 2000==1999:

            print("[%d %5d] loss: %.3f"%(epoch+1,i+1,running_loss/2000))
            running_loss=0.0

[1  2000] loss: 2.300
[1  4000] loss: 2.280
[1  6000] loss: 2.201
[2  2000] loss: 1.866
[2  4000] loss: 1.384
[2  6000] loss: 1.069
