In [1]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(777)
if device == "cuda":
    torch.cuda.manual_seed(777)
    
# download
mnist_train = dsets.MNIST(root = "MNIST_data/", train = True, transform=transforms.ToTensor(), download= True)
mnist_test = dsets.MNIST(root = "MNIST_data/", train = False, transform=transforms.ToTensor(), download= True)

In [28]:
# hyper parmeter
batch_size = 100
learning_rate = 0.001
training_epoch = 10

In [3]:
#Dataloader
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset=mnist_train,
                          batch_size= batch_size,
                          shuffle= True,
                          drop_last= True
)

test_loader = DataLoader(dataset=mnist_test,
                         batch_size=batch_size,
                        shuffle=False,
                        drop_last=False
                        )

In [8]:
# 간단 wegiht initalization
import torch.nn as nn

net = nn.Linear(28*28,256)
print(net.weight.data)

nn.init.xavier_uniform_(net.weight)
print(net.weight.data)

tensor([[ 0.0185, -0.0049, -0.0295,  ..., -0.0090, -0.0308, -0.0336],
        [-0.0322,  0.0167,  0.0160,  ...,  0.0318,  0.0269, -0.0138],
        [ 0.0090,  0.0111,  0.0108,  ...,  0.0267, -0.0091,  0.0140],
        ...,
        [ 0.0086, -0.0235, -0.0190,  ...,  0.0244, -0.0341,  0.0254],
        [ 0.0162,  0.0298, -0.0347,  ...,  0.0017,  0.0320,  0.0342],
        [-0.0012,  0.0350, -0.0184,  ...,  0.0091, -0.0125,  0.0024]])
tensor([[ 6.8028e-02, -2.2149e-02, -1.7397e-02,  ...,  5.4531e-02,
         -1.0376e-02,  9.1727e-03],
        [ 3.0390e-02,  5.6982e-02,  6.9683e-02,  ..., -5.9019e-02,
         -1.6356e-02, -6.0323e-02],
        [-1.3146e-03, -1.2332e-02, -4.6129e-02,  ..., -6.4880e-05,
          6.3648e-02,  3.6229e-02],
        ...,
        [ 1.6134e-02, -1.7151e-03, -1.9428e-02,  ...,  3.9911e-02,
          4.2307e-02, -1.7196e-02],
        [-7.6853e-03, -5.1876e-02,  2.9235e-02,  ...,  2.4711e-02,
         -6.9202e-02,  5.6174e-02],
        [ 1.3312e-02, -6.9033e-02, -4.

In [40]:
# model 만들기
# wegiht initalization 추가
import torch.nn as nn

class MnistClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Sequential(
            nn.Linear(28*28, 256, bias= True),
            nn.ReLU(),
            
            
            nn.Linear(256,256, bias=True),
            nn.ReLU(),
            
            nn.Linear(256,10,bias=True)
            
        )
        
    
    def forward(self,x):
        return self.layer(x)
      
model = MnistClassifier().to(device)

### Weight 초기화
https://towardsdatascience.com/batch-normalization-and-dropout-in-neural-networks-explained-with-pytorch-47d7a8459bcd

In [41]:
def weights_init_uniform(m):
    classname = m.__class__.__name__
    # for every Linear layer in a model..
    if classname.find('Linear') != -1:
        # apply a uniform distribution to the weights and a bias=0
        #m.weight.data.uniform_(0.0, 1.0)
        #m.bias.data.fill_(0)
        torch.nn.init.xavier_uniform_(m.weight.data)

# wegiht초기화는 처음만 시행하는 것 같아서 layer에서 따로 빼고, 함수를 만들어 주었다.        
#model.apply(weights_init_uniform)


In [42]:
# cost & optim

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [43]:
iteration = len(train_loader)
iteration

600

In [44]:
iteration = len(train_loader)

for epoch in range(training_epoch):
    loss = 0
    for batch_idx, sample in enumerate(train_loader):
        X_train, Y_train = sample
        X_train = X_train.view(-1, 28*28).to(device)
        Y_train = Y_train.to(device)
        
        # test
        hypothesis = model(X_train)
        cost = criterion(hypothesis, Y_train)
        
        #backward
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        #calculate
        loss += cost.item() / iteration
        
    print('Epoch: {:04d} cost : {:.4f}'.format(epoch+1, loss))
        

Epoch: 0001 cost : 0.3017
Epoch: 0002 cost : 0.1123
Epoch: 0003 cost : 0.0757
Epoch: 0004 cost : 0.0547
Epoch: 0005 cost : 0.0413
Epoch: 0006 cost : 0.0316
Epoch: 0007 cost : 0.0247
Epoch: 0008 cost : 0.0232
Epoch: 0009 cost : 0.0178
Epoch: 0010 cost : 0.0158


In [50]:
# test
import random

with torch.no_grad():
    X_test = mnist_test.test_data.view(-1,28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    #pred
    pred = model(X_test)
    correct_pred = torch.argmax(pred, 1) == Y_test
    acc = correct_pred.float().mean()
    print('Accuracy:', acc.item())
    
    # Get one
    r = random.randint(0, len(mnist_test) -1)
    X_single_data = mnist_test.test_data[r:r+1].view(-1,28*28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r+1].to(device)
    
    print("Label:",Y_single_data)
    single_pred = model(X_single_data)
    print("pred", torch.argmax(single_pred,1).item())
    


Accuracy: 0.9736999869346619
Label: tensor([8])
pred 8


# Dropout 사용하기
https://towardsdatascience.com/batch-normalization-and-dropout-in-neural-networks-explained-with-pytorch-47d7a8459bcd

In [74]:
class Model_Dropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Sequential(
            nn.Linear(28*28, 256),
            nn.ReLU(),
#             nn.Dropout(0.5),
            
            
            nn.Linear(256,256),
#             nn.Dropout(0.5),
            nn.ReLU(),
            
            
            nn.Linear(256,10),
#             nn.Dropout(0.5)
        )
        
        
    def forward(self,x):
        return self.layer(x)
    
model = Model_Dropout().to(device)
#torch.nn.init.xavier_uniform_()
weights_init_uniform(model)

In [75]:
# cost & optim

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [76]:
model.train()
for epoch in range(training_epoch):
    loss = 0
    for batch_idx, sample in enumerate(train_loader):
        X_train, Y_train = sample
        X_train = X_train.view(-1, 28*28).to(device)
        Y_train = Y_train.to(device)
        
        # test
        hypothesis = model(X_train)
        cost = criterion(hypothesis, Y_train)
        
        #backward
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        #calculate
        loss += cost.item() / iteration
        
    print('Epoch: {:04d} cost : {:.4f}'.format(epoch+1, loss))

Epoch: 0001 cost : 0.3055
Epoch: 0002 cost : 0.1131
Epoch: 0003 cost : 0.0714
Epoch: 0004 cost : 0.0535
Epoch: 0005 cost : 0.0406
Epoch: 0006 cost : 0.0301
Epoch: 0007 cost : 0.0235
Epoch: 0008 cost : 0.0197
Epoch: 0009 cost : 0.0188
Epoch: 0010 cost : 0.0155


In [77]:
#test
with torch.no_grad():
    model.eval() # dropout 사용안함
    
    X_test = mnist_test.test_data.view(-1,28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1).float() == Y_test
    accuracy = correct_prediction.float().mean()
    
    print("Accuracy:",accuracy.item())
    
    
    
    



Accuracy: 0.9769999980926514


# Batch_Normalization
- 사용이유: Internal Covatiate Shift(Layer마다 입력의 분포가 달라짐 --> 배치단위로 Normalize를 진행하였음)
- 주의사항: Test와 같은 경우, Train에서 사용되는 Batchnorm의 평균,분산,감마,베타 값을 그대로 이용하므로 반드시 model.test() 써줘야함.

In [78]:
class Model_Dropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Sequential(
            # layer1
            nn.Linear(28*28, 256),
            nn.BatchNorm1d(256), # n
            nn.ReLU(),
            nn.Dropout(0.5),
            
            #layer2
            nn.Linear(256,10),
        )
        
        
    def forward(self,x):
        return self.layer(x)
    
model = Model_Dropout().to(device)
#torch.nn.init.xavier_uniform_()
weights_init_uniform(model)

In [79]:
# cost & optim
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [81]:
model.train() #dropout, batchnorm이 원활히 적용.

iteration = len(train_loader)
for epoch in range(training_epoch):
    loss = 0
    for batch_idx, sample in enumerate(train_loader):
        X,Y = sample
        X_train = X.view(-1,28*28).float().to(device)
        Y_train = Y.to(device)
        
        hypothesis = model(X_train)
        cost = criterion(hypothesis, Y_train)
        
        #backward
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        #cal
        loss += cost.item() /iteration
        
    print("Epoch: {:04d} Cost: {:.4f}".format(epoch+1, loss))
    
    #test
    with torch.no_grad():
        model.eval()
        acc = 0
        for idx, (X,Y) in enumerate(test_loader):
            X = X.view(-1,28*28).float().to(device)
            Y = Y.to(device)

            pred = model(X)
            correct_pred = torch.argmax(pred,1) == Y
            acc += correct_pred.float().mean()
            
        print("Acc : {:.4f}".format(acc/len(test_loader)))   

Epoch: 0001 Cost: 0.3294
Acc : 0.9571
Epoch: 0002 Cost: 0.1165
Acc : 0.9705
Epoch: 0003 Cost: 0.0717
Acc : 0.9776
Epoch: 0004 Cost: 0.0495
Acc : 0.9762
Epoch: 0005 Cost: 0.0363
Acc : 0.9741
Epoch: 0006 Cost: 0.0270
Acc : 0.9794
Epoch: 0007 Cost: 0.0214
Acc : 0.9785
Epoch: 0008 Cost: 0.0174
Acc : 0.9794
Epoch: 0009 Cost: 0.0141
Acc : 0.9786
Epoch: 0010 Cost: 0.0130
Acc : 0.9803
