## 모델 레이어 구현

In [1]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
import torch
import torch.nn as nn
import numpy as np
from torchinfo import summary

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
import torch.nn as nn

class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, pool=False):
        super(ConvBlock, self).__init__()
        layers = [
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        ]
        if pool:
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))

        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)


In [6]:
class VGG(nn.Module):
    def __init__(self, block, num_classes=1000):
        super(VGG, self).__init__()

        if block == 'vgg16':
          num = 3
        elif block == 'vgg19':
          num = 4

        self.conv1 = ConvBlock(3, 64)
        self.conv2 = ConvBlock(64, 64, pool=True)

        self.conv3 = ConvBlock(64, 128)
        self.conv4 = ConvBlock(128, 128, pool=True)

        self.conv5 = ConvBlock(128, 256)
        self.conv6 = self._make_layers(256, num)

        self.conv7 = ConvBlock(256, 512)
        self.conv8 = self._make_layers(512, num)

        self.conv9 = ConvBlock(512, 512)
        self.conv10 = self._make_layers(512, num)


        # Adaptive Average Pooling Layer
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))

        # Fully connected layer
        self.classifier = nn.Sequential(
            nn.Linear(7*7*512, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(4096, num_classes),
        )
        self.initialize_weights()

    def _make_layers(self, in_ch, num):
        layers = []
        for _ in range(num - 2):
            layers.append(ConvBlock(in_ch, in_ch))
        layers.append(ConvBlock(in_ch, in_ch,pool=True))
        return nn.Sequential(*layers)

    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                # 정규 분포에서 가중치 초기화
                nn.init.normal_(m.weight, mean=0, std=1e-2)
                # 편향을 0으로 초기화
                nn.init.constant_(m.bias, 0)

    def forward(self, x):

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.conv8(x)
        x = self.conv9(x)
        x = self.conv10(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [7]:
model = VGG('vgg16',num_classes=10).to(device)
features = torch.randn(1, 3, 224, 224).to(device)

summary(model, input_data=features, col_names=("input_size", "output_size", "num_params"), device=device.type)

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
VGG                                      [1, 3, 224, 224]          [1, 10]                   --
├─ConvBlock: 1-1                         [1, 3, 224, 224]          [1, 64, 224, 224]         --
│    └─Sequential: 2-1                   [1, 3, 224, 224]          [1, 64, 224, 224]         --
│    │    └─Conv2d: 3-1                  [1, 3, 224, 224]          [1, 64, 224, 224]         1,792
│    │    └─BatchNorm2d: 3-2             [1, 64, 224, 224]         [1, 64, 224, 224]         128
│    │    └─ReLU: 3-3                    [1, 64, 224, 224]         [1, 64, 224, 224]         --
├─ConvBlock: 1-2                         [1, 64, 224, 224]         [1, 64, 112, 112]         --
│    └─Sequential: 2-2                   [1, 64, 224, 224]         [1, 64, 112, 112]         --
│    │    └─Conv2d: 3-4                  [1, 64, 224, 224]         [1, 64, 224, 224]         36,928
│    │    └─BatchNorm2d: 3-

## 데이터셋

데이터셋은 원래 논문에서 ILSVRC 데이터를 사용하지만 교육용이기에 resnet에서 사용했던 CIFAR10을 사용하기로 함

In [8]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
import os
import json

In [9]:
path = '/content/drive/MyDrive/CIFAR10'
if not os.path.exists(path):
    os.mkdir(path)

In [10]:
# JSON 파일 경로
file_path = '/content/drive/MyDrive/resnet/mean_std.json'

# JSON 파일에서 데이터 불러오기
with open(file_path, 'r') as f:
    data = json.load(f)

mean = data['mean']
std = data['std']
mean,std

([0.491399884223938, 0.48215845227241516, 0.4465309679508209],
 [0.2023009955883026, 0.19941280782222748, 0.20096160471439362])

In [11]:
# 데이터셋 전처리
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  # 크기 변경
    transforms.ToTensor(),
    transforms.Normalize(mean, std),  # 계산된 평균과 표준편차를 이용한 정규화
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),  # 크기 변경
    transforms.ToTensor(),
    transforms.Normalize(mean, std),  # 계산된 평균과 표준편차를 이용한 정규화
])


# CIFAR-10
trainset = datasets.CIFAR10(root=path, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root=path, train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [12]:
# 데이터로더 정보를 출력하는 함수
def print_dataloader_info(dataloader, loader_name):
    print(f"{loader_name} 정보:")
    for batch_idx, (images, labels) in enumerate(dataloader):
        print(f"배치 인덱스: {batch_idx}")
        print(f"이미지 크기: {images.size()}")
        if isinstance(labels, torch.Tensor):
            print(f"라벨 크기: {labels.size()}")
            print(f'라벨의 데이터타입 : {labels[0].dtype}')
        else:
            print(f"라벨 크기: {len(labels)}")
            print(f'라벨의 데이터타입 : {type(labels[0])}')
        if batch_idx == 0:  # 첫 번째 배치 정보만 출력
            break

# train_loader 정보 출력
print_dataloader_info(trainloader, "Train Loader")
print("\n")
# test_loader 정보 출력
print_dataloader_info(testloader, "Test Loader")

Train Loader 정보:
배치 인덱스: 0
이미지 크기: torch.Size([4, 3, 224, 224])
라벨 크기: torch.Size([4])
라벨의 데이터타입 : torch.int64


Test Loader 정보:
배치 인덱스: 0
이미지 크기: torch.Size([4, 3, 224, 224])
라벨 크기: torch.Size([4])
라벨의 데이터타입 : torch.int64


In [13]:
img,label = next(iter(trainloader))
img[0],label[0]

(tensor([[[-0.4324, -0.4324, -0.4324,  ...,  1.0602,  1.0602,  1.0602],
          [-0.4324, -0.4324, -0.4324,  ...,  1.0602,  1.0602,  1.0602],
          [-0.4324, -0.4324, -0.4324,  ...,  1.0602,  1.0602,  1.0602],
          ...,
          [-0.9558, -0.9558, -0.9558,  ..., -1.5567, -1.5567, -1.5567],
          [-0.9558, -0.9558, -0.9558,  ..., -1.5567, -1.5567, -1.5567],
          [-0.9558, -0.9558, -0.9558,  ..., -1.5567, -1.5567, -1.5567]],
 
         [[-0.7660, -0.7660, -0.7660,  ...,  1.5742,  1.5742,  1.5742],
          [-0.7660, -0.7660, -0.7660,  ...,  1.5742,  1.5742,  1.5742],
          [-0.7660, -0.7660, -0.7660,  ...,  1.5742,  1.5742,  1.5742],
          ...,
          [-1.1986, -1.1986, -1.1986,  ..., -1.7689, -1.7689, -1.7689],
          [-1.1986, -1.1986, -1.1986,  ..., -1.7689, -1.7689, -1.7689],
          [-1.1986, -1.1986, -1.1986,  ..., -1.7689, -1.7689, -1.7689]],
 
         [[-1.2853, -1.2853, -1.2853,  ...,  2.5785,  2.5785,  2.5785],
          [-1.2853, -1.2853,

## 모델 훈련


In [14]:
# hyper parameters
num_epochs = 10
epoch_step = 2

# Loss function, optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

# scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [15]:
def model_train(model, data_loader, criterion, optimizer, epoch):
    model.train()
    global epoch_step
    running_size, running_loss, correct = 0.0, 0.0, 0.0

    if (epoch + 1) % epoch_step == 0 or epoch == 0:
        pbar = tqdm(data_loader)
    else:
        pbar = data_loader

    for images, labels in pbar:
        images,  labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item() * images.size(0)
        running_size += images.size(0)
        correct += (outputs.argmax(1) == labels).sum().item()

        if (epoch + 1) % epoch_step == 0 or epoch == 0:
            pbar.set_description('[Training] loss: ' +
                                f'{running_loss / running_size:.4f}, accuracy: ' +
                                f'{correct / running_size:.4f}')
        del images, labels, outputs, loss
        torch.cuda.empty_cache()

    avg_accuracy = correct / running_size
    avg_loss = running_loss / running_size

    return avg_loss, avg_accuracy

def model_eval(model, data_loader, criterion, epoch):
    model.eval()
    with torch.no_grad():
        running_loss, correct = 0.0, 0.0

        if (epoch + 1) % epoch_step == 0 or epoch == 0:
            pbar = tqdm(data_loader)
        else:
            pbar = data_loader

        for images, labels in pbar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            pred = outputs.argmax(dim=1)

            correct += torch.sum(pred == labels).item()
            running_loss += criterion(outputs, labels).item() * images.size(0)

        accuracy = correct / len(data_loader.dataset)
        loss = running_loss / len(data_loader.dataset)
        return loss, accuracy

In [16]:
directory = '/content/drive/MyDrive/vgg'

if not os.path.exists(directory):
    os.makedirs(directory)

In [None]:
# 모델 학습 및 평가 코드
loss, accuracy = [], []
num_epochs = 10
epoch_step = 2

for epoch in range(num_epochs):
    train_loss, train_accuracy = model_train(model, trainloader, criterion, optimizer, epoch)
    test_loss, test_accuracy = model_eval(model, testloader, criterion, epoch)

    loss.append([train_loss, test_loss])
    accuracy.append([train_accuracy, test_accuracy])

    if (epoch + 1) % epoch_step == 0 or epoch == 0:
        print(f"epoch {epoch+1:03d}, Training loss: " +
              f"{train_loss:.4f}, Training accuracy: {train_accuracy:.4f}")
        print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_accuracy:.4f}")

    model_save_path = os.path.join(directory, 'model.pth')

    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved to {model_save_path}")

[Training] loss: 21.8294, accuracy: 0.1050:   0%|          | 50/12500 [07:44<33:19:24,  9.64s/it]

## 그래프 확인

In [None]:
import matplotlib.pyplot as plt

# 손실 그래프
train_losses, val_losses = zip(*loss)
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='train')
plt.plot(val_losses, label='val')
plt.xlabel('Training Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Train-Val Loss')

# 정확도 그래프
train_accuracies, val_accuracies = zip(*accuracy)
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='train')
plt.plot(val_accuracies, label='val')
plt.xlabel('Training Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Train-Val Accuracy')

plt.tight_layout()
plt.show()

## 모델 실행


In [None]:
model_save_path = '/content/drive/MyDrive/vgg/model.pth'
model = VGG('vgg16',num_classes=10).to(device)
model.load_state_dict(torch.load(model_save_path))
model.eval()

In [None]:
images, labels = next(iter(testloader))
images[0]

In [None]:
images[0].size()

In [None]:
# 레이블 텐서를 넘파이 배열로 변환
label_indices = labels.numpy()

# 각 레이블 인덱스를 클래스 이름으로 변환
label_names = [class_labels[idx] for idx in label_indices]

print(label_names)

In [None]:
# 이미지 시각화
plt.imshow(images[0])
plt.axis('off')  # 축 숨기기
plt.show()

In [None]:
class_labels = trainset.classes
class_labels

In [None]:
test_image = images[0].unsqueeze(0)
test_image.size()

In [None]:
# 이미지를 모델에 통과시켜 예측 수행
with torch.no_grad():  # 평가 시에는 gradient를 계산할 필요가 없음
    outputs = model(test_image)
value, label_idx = torch.max(outputs, 1)
value, label_idx

In [None]:
# 예측된 클래스의 인덱스
predicted_index = label_idx.item()  # 텐서에서 값을 꺼내 정수로 변환

# 인덱스를 클래스 이름으로 매핑
predicted_label = class_labels[predicted_index]
predicted_label