### Import Libraries

In [14]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch import nn
from torchinfo import summary

### Dataset

In [15]:
# Image to Tensor, Normalization
transform = transforms.Compose([
    transforms.ToTensor(),  
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  
])

# Download CIFAR-100 Dataset
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

classes = trainset.classes

Files already downloaded and verified
Files already downloaded and verified


In [16]:
class InvertedBottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, t, stride = 1):
        """
        Define Inverted Bottleneck block 

        Parameters:
        - in_channels (int) : 입력 채널 수
        - out_channels (int) : 출력 채널 수
        t (int): 확장 비율 (expand ratio)
        - stride (int, optional): stride값 (default = 1)
        """
        super().__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride = stride

        # Expansion layer (Channel 수 확장)
        expand = nn.Sequential(
            nn.Conv2d(in_channels, in_channels * t, 1, bias = False),
            nn.BatchNorm2d(in_channels * t),
            nn.ReLU6(inplace = True),
        )

        # Depthwise convolution layer
        depthwise = nn.Sequential(
            nn.Conv2d(in_channels * t, in_channels * t, 3, stride = stride, padding = 1, groups = in_channels * t, bias = False),
            nn.BatchNorm2d(in_channels * t),
            nn.ReLU6(inplace = True),
        )

        # Pointwise convolution layer
        pointwise = nn.Sequential(
            nn.Conv2d(in_channels * t, out_channels, 1, bias = False),
            nn.BatchNorm2d(out_channels),
        )
        
        # Residual connection
        residual_list = []
        if t > 1:
            residual_list += [expand]
        residual_list += [depthwise, pointwise]
        self.residual = nn.Sequential(*residual_list)
    


    def forward(self, x):
        """
        Define forward propagation

        Parameters:
        - x: input tensor

        Returns:
        - out: output tensor
        """
        if self.stride == 1 and self.in_channels == self.out_channels:
            # skip connection을 적용하는 경우 -> input과 output channel이 동일하고, stride=1인 경우
            out = self.residual(x) + x
        else:
            # skip connection을 적용하지 않는 경우
            out = self.residual(x)
    
        return out

In [17]:
class MobileNetV2(nn.Module):
    def __init__(self, n_classes = 1000):
        super().__init__()

        self.first_conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride = 2, padding = 1, bias = False),
            nn.BatchNorm2d(32),
            nn.ReLU6(inplace = True)
        )

        self.bottlenecks = nn.Sequential(
            self.make_stage(32, 16, t = 1, n = 1),
            self.make_stage(16, 24, t = 6, n = 2, stride = 2),
            self.make_stage(24, 32, t = 6, n = 3, stride = 2),
            self.make_stage(32, 64, t = 6, n = 4, stride = 2),
            self.make_stage(64, 96, t = 6, n = 3),
            self.make_stage(96, 160, t = 6, n = 3, stride = 2),
            self.make_stage(160, 320, t = 6, n = 1)
        )

        self.last_conv = nn.Sequential(
            nn.Conv2d(320, 1280, 1, bias = False),
            nn.BatchNorm2d(1280),
            nn.ReLU6(inplace = True)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
        	nn.Dropout(0.2), # 채널 축으로 놓여있는 feature 들을 일부 가려보면서 학습
            nn.Linear(1280, n_classes)
        )
    
    def forward(self, x):
        x = self.first_conv(x)
        x = self.bottlenecks(x)
        x = self.last_conv(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1) # (N, C, 1, 1) -> (N, C)
        x = self.fc(x)
        return x
    
    def make_stage(self, in_channels, out_channels, t, n, stride = 1):
        layers = [InvertedBottleneck(in_channels, out_channels, t, stride)]
        in_channels = out_channels
        for _ in range(n-1):
            layers.append(InvertedBottleneck(in_channels, out_channels, t))
        
        return nn.Sequential(*layers)

In [18]:
model = MobileNetV2()
summary(model, input_size = (2, 3, 224, 224), device = 'cuda' if torch.cuda.is_available() else cpu)

Layer (type:depth-idx)                             Output Shape              Param #
MobileNetV2                                        [2, 1000]                 --
├─Sequential: 1-1                                  [2, 32, 112, 112]         --
│    └─Conv2d: 2-1                                 [2, 32, 112, 112]         864
│    └─BatchNorm2d: 2-2                            [2, 32, 112, 112]         64
│    └─ReLU6: 2-3                                  [2, 32, 112, 112]         --
├─Sequential: 1-2                                  [2, 320, 7, 7]            --
│    └─Sequential: 2-4                             [2, 16, 112, 112]         --
│    │    └─InvertedBottleneck: 3-1                [2, 16, 112, 112]         896
│    └─Sequential: 2-5                             [2, 24, 56, 56]           --
│    │    └─InvertedBottleneck: 3-2                [2, 24, 56, 56]           5,136
│    │    └─InvertedBottleneck: 3-3                [2, 24, 56, 56]           8,832
│    └─Sequential: 2-6     

In [23]:
model = MobileNetV2(n_classes=100) 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

cuda:0


In [24]:
def train_model(model, trainloader, criterion, optimizer, num_epochs=50):
    model.train()
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f'Epoch {epoch+1}, Loss: {running_loss / len(trainloader)}')

train_model(model, trainloader, criterion, optimizer)

Epoch 1, Loss: 4.241464905116869
Epoch 2, Loss: 3.8254292169800195
Epoch 3, Loss: 3.5838198585583427
Epoch 4, Loss: 3.399807040648692
Epoch 5, Loss: 3.2567111092150363
Epoch 6, Loss: 3.1653423937385345
Epoch 7, Loss: 3.017401352867751
Epoch 8, Loss: 2.9079995673635732
Epoch 9, Loss: 2.818799265815169
Epoch 10, Loss: 2.709724346999927
Epoch 11, Loss: 2.6243818559305137
Epoch 12, Loss: 2.5314308731147395
Epoch 13, Loss: 2.4511972792313226
Epoch 14, Loss: 2.4094053349836404
Epoch 15, Loss: 2.330988227101543
Epoch 16, Loss: 2.262169106842002
Epoch 17, Loss: 2.219592074146661
Epoch 18, Loss: 2.213916571701274
Epoch 19, Loss: 2.167724436811169
Epoch 20, Loss: 2.0593560580402386
Epoch 21, Loss: 1.9882515912775494
Epoch 22, Loss: 1.9553162725380315
Epoch 23, Loss: 1.9335244188223348
Epoch 24, Loss: 1.8776875659632866
Epoch 25, Loss: 1.8301710507753866
Epoch 26, Loss: 1.7765883277444279
Epoch 27, Loss: 1.739388688141123
Epoch 28, Loss: 1.7019983123025626


In [None]:
def evaluate_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Accuracy: {100 * correct / total}%')

evaluate_model(model, testloader)

Accuracy: 30.53%
