In [1]:
import torch
import torch.nn as nn
import torch.optim as optim


import torchvision
import torchvision.datasets as dsets # 데이터셋 (Mnist, Cifar 등)
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader # Custom data 및 Dataloader


In [2]:
#step2) device
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(777)
if device == "cuda":
    torch.cuda.manual_seed_all(777)

#step3) hyper-parameter
learning_rate = 0.001
batch_size = 100
training_epoch = 10

In [6]:
# step4) Dataset
# Dataset download
mnist_train = dsets.MNIST(root = "MNIST_data/", train = True, transform=transforms.ToTensor(), download = True)
mnist_test = dsets.MNIST(root = "MNIST_data/", train = False, transform=transforms.ToTensor(), download = True)

#fashion_train = dsets.FashionMNIST(root = "Fashion_data", train = True, transform=transforms.ToTensor(), download= True)
#fashion_test = dsets.FashionMNIST(root = "Fashion_data", train = False, transform=transforms.ToTensor(), download= True)
train_loader = DataLoader(dataset= mnist_train, batch_size = 100, shuffle = True, drop_last= True)
test_loader = DataLoader(dataset = mnist_test, batch_size = batch_size, shuffle = False, drop_last = False)



In [72]:
Batch1_mean = []
Batch1_std = []
Batch2_mean = []
Batch2_std = []

class CNN(nn.Module):
    def __init__(self,num_classes):
        super(CNN,self).__init__()
        
        # 1*28*28
        self.layer1 = nn.Sequential(
            nn.Conv2d(1,128,3, padding= 1), 
        )
        self.Batch1 = nn.BatchNorm2d(128)
        
        self.layer2= nn.Sequential(
            nn.Conv2d(128,56,3, padding= 1), 
        )
        self.Batch2 = nn.BatchNorm2d(56)
        self.fc = nn.Linear(43904,num_classes)
                
    def forward(self,x):
        # 첫번째 layer
        out = self.layer1(x)
        out = self.Batch1(out)
        Batch1_mean.append(torch.mean(out, dim = [0,2,3]).tolist())
        Batch1_std.append(torch.std(out, dim = [0,2,3]).tolist())
        
        # 두번째 layer
        out = self.layer2(out)
        out = self.Batch2(out)
        Batch2_mean.append(torch.mean(out, dim = [0,2,3]).tolist())
        Batch2_std.append(torch.std(out, dim = [0,2,3]).tolist())
        
        # FC layer
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
    def initalize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode = "fan_out", nonlinearity= "relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)

In [88]:
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
    # Use `autograd` to determine whether the current mode is training mode or
    # prediction mode
    if not autograd.is_training():
        # If it is prediction mode, directly use the mean and variance
        # obtained by moving average
        X_hat = (X - moving_mean) / np.sqrt(moving_var + eps)
    else:
        assert len(X.shape) in (2, 4)
        if len(X.shape) == 2:
            # When using a fully-connected layer, calculate the mean and
            # variance on the feature dimension
            mean = X.mean(axis=0)
            var = ((X - mean)**2).mean(axis=0)
        else:
            # When using a two-dimensional convolutional layer, calculate the
            # mean and variance on the channel dimension (axis=1). Here we
            # need to maintain the shape of `X`, so that the broadcasting
            # operation can be carried out later
            mean = X.mean(axis=(0, 2, 3), keepdims=True)
            var = ((X - mean)**2).mean(axis=(0, 2, 3), keepdims=True)
        # In training mode, the current mean and variance are used for the
        # standardization
        X_hat = (X - mean) / np.sqrt(var + eps)
        # Update the mean and variance using moving average
        moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
        moving_var = momentum * moving_var + (1.0 - momentum) * var
    Y = gamma * X_hat + beta  # Scale and shift
    return Y, moving_mean, moving_var

In [74]:
# step5) model 호출하기
model = CNN(10).to(device)
model.initalize_weights()

for X,Y in train_loader:
    X = X.to(device)
    Y = Y.to(device)
    break

torch.Size([100, 10])

In [75]:
#step6) loss & optim
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)



In [76]:
#step7) train
iteration = len(train_loader)
model.train()

for epoch in range(training_epoch):
    loss = 0
    correct = 0
    for sample in train_loader:
        optimizer.zero_grad()
        
        X,Y = sample
        X = X.to(device)
        Y = Y.to(device)
        Y = torch.squeeze(Y) # 채널 맞추기
        Y = Y.long() # type 맞추기
        
        # forward & backward & optim
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        #calculate
        loss += cost.item()
        correct += (torch.argmax(hypothesis, dim = 1) == Y).float().sum()
    
    loss /= iteration
    acc = correct / (batch_size * iteration)
    print("[Epoch {:04d}, loss = {:.4f}, acc = {:.2f}]%".format(epoch, loss, acc*100))
    
    # 그래프 그리기 및 weight 저장하기.
    #loss_tracker(loss_plt, torch.FloatTensor([loss]), torch.FloatTensor([epoch]))
    torch.save(model.state_dict(), "./hw5_weight/%d_%d.pth"%(epoch+1,int(acc*100)))
    
    

[Epoch 0000, loss = 1.3458, acc = 85.93]%


KeyboardInterrupt: 

In [87]:
np.array(Batch1_mean).shape

(609, 128)

In [77]:
Batch1_mean
Batch1_std
Batch2_mean
Batch2_std

[[1.000004768371582,
  1.000004529953003,
  1.000006079673767,
  1.0000048875808716,
  1.0000052452087402,
  1.0000059604644775,
  1.0000044107437134,
  1.0000041723251343,
  1.0000059604644775,
  1.0000051259994507,
  1.0000020265579224,
  1.0000038146972656,
  1.0000038146972656,
  1.0000038146972656,
  1.0000053644180298,
  1.0000051259994507,
  1.0000046491622925,
  1.000003695487976,
  1.0000050067901611,
  1.000003457069397,
  1.0000042915344238,
  1.0000046491622925,
  1.0000054836273193,
  1.0000056028366089,
  1.0000048875808716,
  1.0000054836273193,
  1.000005841255188,
  1.0000056028366089,
  1.0000054836273193,
  1.0000040531158447,
  1.0000040531158447,
  1.0000046491622925,
  1.000003695487976,
  1.0000033378601074,
  1.0000052452087402,
  1.0000053644180298,
  1.0000040531158447,
  1.0000025033950806,
  1.0000050067901611,
  1.000005841255188,
  1.0000054836273193,
  1.000005841255188,
  1.0000051259994507,
  1.0000050067901611,
  1.000003695487976,
  1.000004768371582,

In [82]:
import numpy as np
np.array(Batch1_mean).shape

(609, 128)

In [83]:
np.array(Batch1_std).shape

(609, 128)

In [84]:
Batch1_mean

[[5.4554217143731876e-08,
  7.105649046934559e-08,
  1.0782282799937093e-07,
  1.1359018259327058e-07,
  7.75103643491093e-08,
  -6.111756878368624e-09,
  9.690848656873641e-08,
  -4.3725023601837165e-08,
  -9.129926326068016e-08,
  8.248628802220992e-08,
  -1.1696081436696204e-08,
  -5.37403543887649e-08,
  3.078382349031017e-08,
  8.719992194983206e-08,
  -1.4311576812531257e-08,
  1.3648627783524603e-09,
  6.786870621056096e-09,
  -4.223186778062882e-08,
  -5.3470841976377415e-08,
  2.58983696710402e-08,
  -9.076023488319152e-09,
  3.964013117752074e-09,
  -8.342141200046171e-08,
  -5.916749401535526e-08,
  1.33829152204612e-08,
  1.522579395896173e-08,
  5.1742762963158384e-08,
  6.738974178688295e-08,
  -1.2575515739854382e-08,
  -4.058741964740875e-08,
  1.3866518600025302e-07,
  -4.3449809083995206e-08,
  9.642191400871525e-08,
  -8.20331749196157e-08,
  6.665988649956489e-09,
  6.72362343578925e-09,
  5.271380665305969e-09,
  1.0976985720390076e-07,
  1.489735979021134e-08,
  -