In [1]:
#step1) 라이브러리
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import DataLoader


In [2]:
# step2) Device
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(777)
if device == "cuda":
    torch.cuda.manual_seed_all(777)


In [3]:
# step3) parameter setting

learning_rate = 0.1
training_epochs = 10
batch_size = 100


In [43]:
# step4) Dataset & DataLoader

# trans=transforms.Compose([
#                                transforms.Resize((224,224)),
#                                transforms.CenterCrop((224,224)),
#                                transforms.ToTensor(),
#                                transforms.Lambda(lambda x: x.repeat(3,1,1)),
#                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
#                                transforms.RandomHorizontalFlip(p=1),
#                                transforms.RandomRotation((90,90),expand=False, center=None, fill=0, resample=None),
                               
#                               ])

trans = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor()
    
])


# Dataset
mnist_train = dsets.MNIST(root = "MNIST_data/", train= True, transform= trans)
mnist_test = dsets.MNIST(root = "MNIST_data/", train= False, transform= trans)

# DataLoader
train_loader = DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle= True, drop_last= True)
test_loader = DataLoader(dataset=mnist_test, batch_size=batch_size, shuffle= False, drop_last= False)



In [11]:
# step5) model

# make_bottleneck
def make_layers(cfg):
    in_channel = 32 #첫번째 convolution을 통과하고 난 뒤.
    layers = []
    
    for v in cfg:
        t, out_channel, n, stride = v
        # layer 만들기
        for i in range(n):
            
            # The first layer of each sequence has a stride s and all others use stride 1
            if i != 0:
                stride = 1
                
            conv_pointwise1 = nn.Conv2d (in_channel,
                                        in_channel*t,
                                        kernel_size= 1,
                                        stride = 1,
                                        padding = 0)

            conv_depthwise = nn.Conv2d(in_channel*t, 
                                       in_channel*t, 
                                       kernel_size=3, 
                                       groups= in_channel*t,
                                       stride = stride,
                                       padding = 1)

            conv_pointwise2 = nn.Conv2d (in_channel*t,
                                        out_channel,
                                        kernel_size= 1,
                                        stride = 1,
                                        padding = 0)

            #layer 추가하기
            layers.append([conv_pointwise1, nn.BatchNorm2d(in_channel*t) ,nn.ReLU6(),
                           conv_depthwise, nn.BatchNorm2d(in_channel*t),nn.ReLU6(),
                           conv_pointwise2, nn.BatchNorm2d(out_channel)])

            # 다음번 layer를 위해 값 change
            in_channel = out_channel
        
    return layers

# model
class Mobilenet(nn.Module):
    def __init__(self, features, stride_list, num_classes = 10):
        super(Mobilenet,self).__init__()
        
        # conv2d 첫번째
        self.layer1 = nn.Sequential(
            nn.Conv2d(1,32, kernel_size= 3, stride=2, padding = 1),
            nn.BatchNorm2d(32),
        )
        
        
        # bottle_neck
        self.bottle_necks = []
        for layer in features:
            self.bottle_necks.append(nn.Sequential(*layer))
        
        # conv2d 마지막
        self.layer2 = nn.Sequential(
                nn.Conv2d(320,1280, kernel_size=1, stride = 1, padding= 0),
                nn.BatchNorm2d(1280),
                nn.AvgPool2d(7,7),
        )
        
        # clasifier
        self.fc = nn.Linear(1280, num_classes)
        
    
        
    def forward(self, x):
        
        # layer1
        out = self.layer1(x)
        print(out.shape)
        
        #bottle_neck
        for i in range(len(self.bottle_necks)):
            new = self.bottle_necks[i](out) # 
            if stride_list[i] == 1: # skip connection 연결해주기
                # 채널이 다르다면, 
                flag = True
                if out.shape[1] != new.shape[1]:
                    # 방법1) DownSample
                    #out = Downsample(out.shape[1], new.shape[1])(out) # 채널 맞춰주기
    
                    # 방법2) Skip
                    flag = False
        
                
                    
                #채널이 같거나, Downsample이 이뤄졌다면,
                if flag:
                    new += out
            
            out = new
            print(out.shape)

        
        # layer2
        out = self.layer2(out)
        print(out.shape)
        
        # FC
        out =out.view(out.size(0), -1)
        print(out.shape)
        
        out = self.fc(out)
        print(out.shape)


        return out
    
    
# Residual 과정에서 channel이 안맞는 문제를 해결
class Downsample(nn.Module):
    def __init__(self, in_channel, out_channel):
        super(Downsample,self).__init__()
        self.layer = nn.Sequential(
                    nn.Conv2d(in_channel, out_channel, kernel_size= 1),
                    nn.BatchNorm2d(out_channel),
                    )
        
    def forward(self,x):
        out = self.layer(x)
        return out
    




In [12]:
# bottle_neck을 담고있는 리스트를 만들어봅시다.
# 논문에서 나온 순서를 지켜봅시다.

cfg = {'MobileNetV2': [(1,16,1,1),
                       (6,24,2,2),
                       (6,32,3,2),
                       (6,64,4,2),
                       (6,96,3,1),
                       (6,160,3,2),
                       (6,320,1,1)]}

bottleneck = make_layers(cfg["MobileNetV2"])
stride_list = [1,
               2,1,
               2,1,1,
               2,1,1,1,
               1,1,1,
               2,1,1,
               1]

# output과 stride, expansion factor
model = Mobilenet(bottleneck, stride_list, num_classes= 10).to(device)

# layer가 잘 연결되었는지, test
target = torch.Tensor(2,1,224,224).to(device)
model(target).shape

torch.Size([2, 32, 112, 112])
torch.Size([2, 16, 112, 112])
torch.Size([2, 24, 56, 56])
torch.Size([2, 24, 56, 56])
torch.Size([2, 32, 28, 28])
torch.Size([2, 32, 28, 28])
torch.Size([2, 32, 28, 28])
torch.Size([2, 64, 14, 14])
torch.Size([2, 64, 14, 14])
torch.Size([2, 64, 14, 14])
torch.Size([2, 64, 14, 14])
torch.Size([2, 96, 14, 14])
torch.Size([2, 96, 14, 14])
torch.Size([2, 96, 14, 14])
torch.Size([2, 160, 7, 7])
torch.Size([2, 160, 7, 7])
torch.Size([2, 160, 7, 7])
torch.Size([2, 320, 7, 7])
torch.Size([2, 1280, 1, 1])
torch.Size([2, 1280])
torch.Size([2, 10])


torch.Size([2, 10])

In [15]:
# step6) optim & loss

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
#lr_scheduler.StepLR


TypeError: __init__() missing 1 required positional argument: 'step_size'

In [50]:
# step7) train
model.train()
iteration = len(train_loader)

for epoch in range(training_epochs):
    loss = 0.0
    acc_correct = 0.0
    
    for idx, sample in enumerate(train_loader):
        optimizer.zero_grad()
        X,Y = sample
        X = X.to(device)
        Y = Y.to(device)
        
        # forward, backward, optimizer
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        #calculate
        loss += cost.item()
        correct = (torch.argmax(hypothesis, dim = 1) == Y).float()
        acc_correct += correct.sum()
    
    acc_correct /= (batch_size * iteration)
    loss /= iteration
    print("[Epoch: {:04d}], loss : {:.2f}, acc: {:.2f}%".format(epoch, loss, acc_correct*100))
    

KeyboardInterrupt: 

In [None]:
# step8) test
with torch.no_grad():
    model.eval()
    accuracy = 0
    loss = 0
    
    for idx, sample in enumerate(test_loader):
        X, Y = sample
        X = X.to(device)
        Y = Y.to(device)
        
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        
        loss += cost.item()
        accuracy += (torch.argmax(hypothesis, dim = 1) == Y).sum().float()
        
    # calculate
    loss /= len(test_loader)
    accuracy /= (batch_size * len(test_loader))
    
    print("[Test] Loss = {:.4f}, Acc = {:.4f}%".format(loss, accuracy*100))

# 더해야할점
1) 시각화
2) weight_initalization
3) learning rate 조정