In [4]:
# step1) 모듈 불러오기
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.utils.data import DataLoader


In [2]:
# step2) device
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(777)
if device == "cuda":
    torch.cuda.manual_seed_all(777)
print(device)

cpu


In [3]:
#stpe3) hyper parameter
batch_size = 100
learning_rate = 0.001
training_epochs = 10

In [30]:
#step4) Dataset(transform) & Dataloader

trans = transforms.Compose([
#     transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307), (0.3081))
])



# 4-1) dataset 가져오기
mnist_train = dsets.MNIST(root = "../data/MNIST_data/", train= True, transform = trans, download= True)
mnist_test = dsets.MNIST(root = "../data/MNIST_data/", train= False, transform = trans, download= True)

# 4-2) loader 만들기
train_loader = DataLoader(dataset= mnist_train, batch_size= batch_size, shuffle= True, drop_last= True)
test_loader = DataLoader(dataset= mnist_test, batch_size= batch_size, shuffle= False, drop_last= False)


In [31]:
def get_mean_std(loader):
    mu = 0
    mu_square = 0
    std = 0
    for sample in loader:
        X,Y = sample

        mu += torch.mean(X, dim = [0,2,3])
        mu_square += torch.mean(X**2, dim = [0,2,3])
        std += torch.std(X, dim = [0,2,3])

    mu /= len(loader)
    mu_square /= len(loader)
    std = (mu_square - mu**2)**0.5 # E[X**2] - {E[X]}**2
    return mu, std

get_mean_std(train_loader)

(tensor([-0.0001]), tensor([1.0000]))

In [61]:
#step5) 모델링
# 간단한 conv + depthwise separable conv + fc1 + fc2

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # 1*28*28
        self.conv_layer1 = nn.Sequential(
            nn.Conv2d(1,128,3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        # 128*14*14
        self.depthwise = nn.Sequential(
            nn.Conv2d(128, 128*3, 3, groups= 128, padding=1),
            nn.BatchNorm2d(128*3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
        )
        
        # 384*7*7
        self.pointwise = nn.Sequential(
            nn.Conv2d(384,512, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        )
        
        # 512*3*3
        self.fc1 = nn.Sequential(
            nn.Linear(512*3*3, 100),
            nn.BatchNorm1d(100),
            nn.ReLU(),
            nn.Dropout(0.2), # dropout 추가
            
        )
        
        # 100
        self.fc2 = nn.Linear(100,10)
        
        
    def forward(self,x):
        out = self.conv_layer1(x)
        
        out = self.depthwise(out)
        
        out = self.pointwise(out)
        
        out = out.view(out.size(0), -1)
        
        out = self.fc1(out)
        
        out = self.fc2(out)
        
        return out
    
    def weight_initalzie(self):
        for m in self.modules():
            
            if isinstance(m, nn.Conv2d):
                print("here1")
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            if isinstance(m,nn.BatchNorm2d):
                print("here2")

                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias, 0)
            if isinstance(m,nn.Linear):
                print("here3")

                nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0)
            if isinstance(m, nn.BatchNorm1d):
                print("here4")

                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.weight,0)
                
                
    
model = MyModel().to(device)

# init
model.weight_initalzie()

test_data = torch.Tensor(2,1,28,28)
model(test_data)

here1
here2
here1
here2
here1
here2
here3
here4
here3


tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], grad_fn=<AddmmBackward>)

In [63]:
# step6) loss & optim & lr_scheduler
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)


In [None]:
# step7) train

iteration = len(train_loader)
model.train()

for epoch in range(training_epochs):
    
    lr_sche.step()
    loss = 0
    correct = 0
    
    for X,Y in train_loader:
        optimizer.zero_grad()
        X = X.to(device)
        Y = Y.to(device)
        
        # forward, backward, optim
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        #calculate
        loss += cost.item()
        correct += (torch.argmax(hypothesis, dim = 1) == Y).float().sum()
    
    loss /= iteration
    acc = correct / (batch_size * iteration)
    
    print("[Epoch {:04d}], loss = {:.4f}, acc = {:.2f}, lr = {:.4f}".format(epoch+1, loss, acc*100, learning_rate))
    

[Epoch 0001], loss = 2.3016, acc = 11.10, lr = 0.0010
[Epoch 0002], loss = 2.3013, acc = 11.24, lr = 0.0010


In [None]:
#step8) test

test_iteration = len(test_loader)
with torch.no_grad():
    model.eval()
    loss = 0
    correct = 0
    
    for sample in test_loader:
        X_test, Y_test = sample
        X_test = X_test.to(device)
        Y_test = Y_test.to(device)
        
        # test
        hypothesis = model(X_test)
        cost = criterion(hypothesis,Y_test)
        
        # calculate
        loss += cost.item()
        correct += (torch.argmax(hypothesis, 1) == Y_test).float().sum()
        
    loss /= test_iteration
    acc = correct / (batch_size * test_iteration)
    
    print("[Test] loss = {:.4f}, Acc = {:.2f}".format(loss,acc*100))
        
        
