<a href="https://colab.research.google.com/github/Soosembly/ResearchPaper/blob/main/%08GoogLeNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- GoogLeNet은 주어진 하드웨어 자원을 최대한 효율적으로 이용하면서 학습 능력은 극대화할 수 있는 깊고 넓은 신경망.
- 깊고 넓은 신경망을 위해 Inception 모듈을 추가함.
- 인셉션 모듈 특징을 효율적으로 추출하기 위해 1x1, 3x3, 5x5의 합성곱 연산을 각각 수행
- 3x3 최대 풀링은 입력과 출력의 높이와 너비가 같아야 하므로 풀링 연산에서는 드물게 '패딩을 추가해야함'
- 결과적으로 GoogLeNet에 적용된 해결 방법은 sparse connectivity(희소 연결)임.
- CNN은 합성곱, 풀링, 완전연결층들이 서로 dense(밀집, 정교하고 빽빽)하게 연결되어 있는데,
- 희소 연결이라 함은 빽빽하게 연결된 신경망 대신 correlation이 높은 노드끼리만 연결하는 방법을 말함.
- 이것으로 연산량이 적어지며 과적합도 해결할 수 있음
- 대용량 데이터 학습을 해야할 때, 심층 신경망의 아키텍처에서 계층이 넓고(뉴런이 많고) 깊으면(계층이 많으면) 인식률은 좋아지지만, 과적합이나 vanishing gradient problem(기울기 소멸 문제)를 비롯한 학습 시간 지연과 연산 속도 등의 문제가 있는데, 특히 합성곱 신경망에서 이러한 문제들이 자주 나타남
- 하지만 GoogLeNet(혹은 인셉션)으로 이러한 문제를 해결할 수 있다고 생각하면 됨.

### 인셉션 모듈의 4가지 연산
- 1x1 합성곱
- 1x1 합성곱 + 3x3합성곱
- 1x1 합성곱 + 5x5합성곱
- 3x3 max pooling + 1x1합성곱(convolutional)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms


#01.Define Convolution Blocks

In [2]:
# Convolutional blocks
def conv_1(in_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, out_dim, kernel_size=1, stride=1),
        nn.ReLU(),
    )
    return model

def conv_1_3(in_dim, mid_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, mid_dim, kernel_size=1, stride=1),
        nn.ReLU(),
        nn.Conv2d(mid_dim, out_dim, kernel_size=3, stride=1, padding=1),
        nn.ReLU()
    )
    return model

def conv_1_5(in_dim, mid_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, mid_dim, kernel_size=1, stride=1),
        nn.ReLU(),
        nn.Conv2d(mid_dim, out_dim, kernel_size=5, stride=1, padding=2),
        nn.ReLU()
    )
    return model

def max_3_1(in_dim, out_dim):
    model = nn.Sequential(
        nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
        nn.Conv2d(in_dim, out_dim, kernel_size=1, stride=1),
        nn.ReLU(),
    )
    return model

#02. Define Inception Module

In [3]:
class inception_module(nn.Module):
    def __init__(self,in_dim,out_dim_1,mid_dim_3,out_dim_3,mid_dim_5,out_dim_5,pool_dim):
        super(inception_module,self).__init__()
        # 1x1 Convolution
        self.conv_1 = conv_1(in_dim,out_dim_1)

        # 1x1 Convolution -> 3x3 Convolution
        self.conv_1_3 = conv_1_3(in_dim,mid_dim_3,out_dim_3)

        # 1x1 Convolution -> 5x5 Convolution
        self.conv_1_5 = conv_1_5(in_dim,mid_dim_5,out_dim_5)

        # 3x3 MaxPooling -> 1x1 Convolution
        self.max_3_1 = max_3_1(in_dim,pool_dim)

    def forward(self,x):
        out_1 = self.conv_1(x)
        out_2 = self.conv_1_3(x)
        out_3 = self.conv_1_5(x)
        out_4 = self.max_3_1(x)
        # concat
        output = torch.cat([out_1,out_2,out_3,out_4],1)
        return output

#02. Define GoogLeNet

In [4]:
class GoogLeNet(nn.Module):
    def __init__(self, base_dim, num_classes=2):
        super(GoogLeNet, self).__init__()
        self.layer_1 = nn.Sequential(
            nn.Conv2d(3, base_dim, kernel_size=7, stride=2, padding=3),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.Conv2d(base_dim, base_dim * 3, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        )

        self.layer_2 = nn.Sequential(
            inception_module(base_dim*3,64,96,128,16,32,32),
            inception_module(base_dim*4,128,128,192,32,96,64),
            nn.MaxPool2d(3,2,1),
        )

        self.layer_3 = nn.Sequential(
            inception_module(480,192,96,208,16,48,64),
            inception_module(512,160,112,224,24,64,64),
            inception_module(512,128,128,256,24,64,64),
            inception_module(512,112,144,288,32,64,64),
            inception_module(528,256,160,320,32,128,128),
            nn.MaxPool2d(3,2,1),
        )

        self.layer_4 = nn.Sequential(
            inception_module(832,256,160,320,32,128,128),
            inception_module(832,384,192,384,48,128,128),
            nn.AvgPool2d(1,1),
        )

        # 평균 풀링과 드롭아웃 레이어
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)


        # 완전 연결 층
        self.fc_input_size = self.calculate_fc_input(base_dim)  # 여기에 저장
        self.fc_layer = nn.Linear(self.fc_input_size, num_classes)


    def calculate_fc_input(self, base_dim):
        # 이 함수는 실제 네트워크의 출력 크기에 맞게 입력 차원을 계산해야 합니다.
        mock_tensor = torch.zeros(1, 3, 32, 32)  # CIFAR10의 이미지 크기를 가정
        mock_tensor = self.layer_1(mock_tensor)
        mock_tensor = self.layer_2(mock_tensor)
        mock_tensor = self.layer_3(mock_tensor)
        mock_tensor = self.layer_4(mock_tensor)
        return mock_tensor.size(1) * mock_tensor.size(2) * mock_tensor.size(3)


    def forward(self, x):
        out = self.layer_1(x)
        out = self.layer_2(out)
        out = self.layer_3(out)
        out = self.layer_4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)  # Flatten
        out = self.dropout(out)
        out = self.fc_layer(out)
        return out

In [5]:
# 하이퍼파라미터 설정
batch_size = 100
learning_rate = 0.0002
num_epochs = 100

In [6]:
# 데이터셋 로드 및 전처리
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29802814.58it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [7]:
# 모델 인스턴스화 및 장치 설정
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = GoogLeNet(base_dim=64, num_classes=10).to(device)

# 손실 함수 및 최적화 알고리즘 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

#04.TRAIN

In [8]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)  # GPU로 이동

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 매 100 미니배치마다 로그 출력
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')



Epoch 1, Batch 100, Loss: 2.217
Epoch 1, Batch 200, Loss: 2.035
Epoch 1, Batch 300, Loss: 1.950
Epoch 1, Batch 400, Loss: 1.887
Epoch 1, Batch 500, Loss: 1.826
Epoch 2, Batch 100, Loss: 1.763
Epoch 2, Batch 200, Loss: 1.706
Epoch 2, Batch 300, Loss: 1.656
Epoch 2, Batch 400, Loss: 1.610
Epoch 2, Batch 500, Loss: 1.588
Epoch 3, Batch 100, Loss: 1.533
Epoch 3, Batch 200, Loss: 1.523
Epoch 3, Batch 300, Loss: 1.458
Epoch 3, Batch 400, Loss: 1.456
Epoch 3, Batch 500, Loss: 1.448
Epoch 4, Batch 100, Loss: 1.379
Epoch 4, Batch 200, Loss: 1.336
Epoch 4, Batch 300, Loss: 1.324
Epoch 4, Batch 400, Loss: 1.310
Epoch 4, Batch 500, Loss: 1.307
Epoch 5, Batch 100, Loss: 1.247
Epoch 5, Batch 200, Loss: 1.238
Epoch 5, Batch 300, Loss: 1.224
Epoch 5, Batch 400, Loss: 1.201
Epoch 5, Batch 500, Loss: 1.178
Epoch 6, Batch 100, Loss: 1.134
Epoch 6, Batch 200, Loss: 1.118
Epoch 6, Batch 300, Loss: 1.141
Epoch 6, Batch 400, Loss: 1.111
Epoch 6, Batch 500, Loss: 1.086
Epoch 7, Batch 100, Loss: 1.034
Epoch 7,

#05.TEST



In [9]:
# 평가 루프
correct = 0
total = 0
net.eval()
with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')


Accuracy of the network on the 10000 test images: 72.77%
