In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
import os
import random
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader,Subset
from tqdm import tqdm
import pandas as pd
import time

In [2]:
# 데이터 전처리 (CIFAR10은 3채널 이미지)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

# CIFAR10 학습 및 테스트 데이터셋 불러오기
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

# CIFAR10의 targets는 리스트로 되어 있으므로 tensor로 변환
targets = torch.tensor(train_dataset.targets)

num_samples_per_class = 1700
selected_indices = []

# 0부터 9까지 각 클래스별로 인덱스를 추출하고, 랜덤하게 num_samples_per_class개 선택
for class_label in range(10):
    # 해당 클래스의 인덱스 추출
    indices = (targets == class_label).nonzero(as_tuple=True)[0]
    # 인덱스를 랜덤하게 섞은 후 원하는 개수만 선택
    selected = indices[torch.randperm(len(indices))[:num_samples_per_class]]
    selected_indices.extend(selected.tolist())

# 선택한 인덱스만 남긴 서브셋 생성
train_dataset = Subset(train_dataset, selected_indices)

100%|██████████| 170M/170M [00:02<00:00, 77.3MB/s]


In [3]:
import torch
import torch.nn as nn

class VGG11(nn.Module):
    def __init__(self):
        super(VGG11, self).__init__()
        self.features = nn.Sequential(
            # block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(256 * 2 * 2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 10),
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)


class VGG11_7x7(nn.Module):
    def __init__(self):
        super(VGG11_7x7, self).__init__()
        self.features = nn.Sequential(
            # block 1: 7x7 하나로 3x3 3개 대체
            nn.Conv2d(3, 64, kernel_size=7, padding=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # block 2
            nn.Conv2d(64, 128, kernel_size=7, padding=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # block 3
            nn.Conv2d(128, 256, kernel_size=7, padding=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(256 * 2 * 2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 10),
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)


In [4]:
import time
import torch
import random
import numpy as np
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm

def worker_init_fn(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

def run_experiment(model_cls, seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True, num_workers=8, worker_init_fn=worker_init_fn)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False, num_workers=8, worker_init_fn=worker_init_fn)

    model = model_cls().to(device)
    params = count_parameters(model)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=4e-4)
    num_epochs = 30

    print(f"\n=== {model_cls.__name__} ===")
    print(f"Parameters: {params:,}")

    # 전체 훈련 시간 측정
    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=True)
        for data, target in progress_bar:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            progress_bar.set_postfix({"Loss": f"{loss.item():.4f}"})
        avg_loss = epoch_loss / len(train_loader)
        print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}, lr = {optimizer.param_groups[0]['lr']}")
    end_time = time.time()

    total_train_time = end_time - start_time

    # 테스트 정확도 측정
    model.eval()
    correct = 0
    start_time = time.time()
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            correct += (output.argmax(dim=1) == target).sum().item()

    test_acc = 100. * correct / len(test_loader.dataset)
    test_time = time.time() - start_time

    print(f"Total Train Time: {total_train_time:.1f}s")
    print(f"Total Test Time       : {test_time:.1f}s")
    print(f"Test Accuracy   : {test_acc:.2f}%")

    return {
        'model': model_cls.__name__,
        'params': params,
        'train_time': total_train_time,
        'test_time': test_time,
        'test_acc': test_acc
    }

In [6]:
if __name__ == "__main__":
    results = []
    num_experiments = 3

    for i in range(num_experiments):
        seed = np.random.randint(10000)
        result = []
        print(f"[Seed : {seed} Experiment Start]\n")
        result.append(run_experiment(VGG11_7x7, seed))
        result.append(run_experiment(VGG11, seed))
        results.append(result)

[Seed : 934 Experiment Start]


=== VGG11_7x7 ===
Parameters: 23,037,578


Epoch 1/30: 100%|██████████| 34/34 [00:05<00:00,  5.78it/s, Loss=1.7344]


Epoch 1, Loss: 1.9979, lr = 0.0004


Epoch 2/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=1.6484]


Epoch 2, Loss: 1.6035, lr = 0.0004


Epoch 3/30: 100%|██████████| 34/34 [00:05<00:00,  5.81it/s, Loss=1.5059]


Epoch 3, Loss: 1.3871, lr = 0.0004


Epoch 4/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=1.0651]


Epoch 4, Loss: 1.2000, lr = 0.0004


Epoch 5/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.7779]


Epoch 5, Loss: 1.0411, lr = 0.0004


Epoch 6/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.7877]


Epoch 6, Loss: 0.8802, lr = 0.0004


Epoch 7/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.6316]


Epoch 7, Loss: 0.7278, lr = 0.0004


Epoch 8/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.6902]


Epoch 8, Loss: 0.5627, lr = 0.0004


Epoch 9/30: 100%|██████████| 34/34 [00:05<00:00,  5.90it/s, Loss=0.4942]


Epoch 9, Loss: 0.4171, lr = 0.0004


Epoch 10/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.3045]


Epoch 10, Loss: 0.2777, lr = 0.0004


Epoch 11/30: 100%|██████████| 34/34 [00:05<00:00,  5.90it/s, Loss=0.1925]


Epoch 11, Loss: 0.1736, lr = 0.0004


Epoch 12/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.1402]


Epoch 12, Loss: 0.1273, lr = 0.0004


Epoch 13/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.2672]


Epoch 13, Loss: 0.1071, lr = 0.0004


Epoch 14/30: 100%|██████████| 34/34 [00:05<00:00,  5.91it/s, Loss=0.0609]


Epoch 14, Loss: 0.1017, lr = 0.0004


Epoch 15/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.0560]


Epoch 15, Loss: 0.0630, lr = 0.0004


Epoch 16/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0587]


Epoch 16, Loss: 0.0510, lr = 0.0004


Epoch 17/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.0924]


Epoch 17, Loss: 0.0610, lr = 0.0004


Epoch 18/30: 100%|██████████| 34/34 [00:05<00:00,  5.90it/s, Loss=0.1007]


Epoch 18, Loss: 0.0510, lr = 0.0004


Epoch 19/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0172]


Epoch 19, Loss: 0.0370, lr = 0.0004


Epoch 20/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.0051]


Epoch 20, Loss: 0.0319, lr = 0.0004


Epoch 21/30: 100%|██████████| 34/34 [00:05<00:00,  5.90it/s, Loss=0.0036]


Epoch 21, Loss: 0.0257, lr = 0.0004


Epoch 22/30: 100%|██████████| 34/34 [00:05<00:00,  5.92it/s, Loss=0.0230]


Epoch 22, Loss: 0.0217, lr = 0.0004


Epoch 23/30: 100%|██████████| 34/34 [00:05<00:00,  5.91it/s, Loss=0.0198]


Epoch 23, Loss: 0.0291, lr = 0.0004


Epoch 24/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0360]


Epoch 24, Loss: 0.0264, lr = 0.0004


Epoch 25/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.0153]


Epoch 25, Loss: 0.0291, lr = 0.0004


Epoch 26/30: 100%|██████████| 34/34 [00:05<00:00,  5.90it/s, Loss=0.0119]


Epoch 26, Loss: 0.0248, lr = 0.0004


Epoch 27/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.0146]


Epoch 27, Loss: 0.0279, lr = 0.0004


Epoch 28/30: 100%|██████████| 34/34 [00:05<00:00,  5.91it/s, Loss=0.0134]


Epoch 28, Loss: 0.0384, lr = 0.0004


Epoch 29/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.0169]


Epoch 29, Loss: 0.0320, lr = 0.0004


Epoch 30/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.0384]

Epoch 30, Loss: 0.0357, lr = 0.0004





Total Train Time: 173.5s
Total Test Time       : 1.6s
Test Accuracy   : 63.77%

=== VGG11 ===
Parameters: 22,940,682


Epoch 1/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=1.9345]


Epoch 1, Loss: 2.1808, lr = 0.0004


Epoch 2/30: 100%|██████████| 34/34 [00:05<00:00,  5.67it/s, Loss=1.7324]


Epoch 2, Loss: 1.9711, lr = 0.0004


Epoch 3/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=1.6999]


Epoch 3, Loss: 1.7680, lr = 0.0004


Epoch 4/30: 100%|██████████| 34/34 [00:05<00:00,  5.67it/s, Loss=1.6379]


Epoch 4, Loss: 1.6424, lr = 0.0004


Epoch 5/30: 100%|██████████| 34/34 [00:05<00:00,  5.67it/s, Loss=1.6307]


Epoch 5, Loss: 1.5610, lr = 0.0004


Epoch 6/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=1.2539]


Epoch 6, Loss: 1.4552, lr = 0.0004


Epoch 7/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=1.3388]


Epoch 7, Loss: 1.3876, lr = 0.0004


Epoch 8/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=1.2303]


Epoch 8, Loss: 1.3539, lr = 0.0004


Epoch 9/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=1.2219]


Epoch 9, Loss: 1.2278, lr = 0.0004


Epoch 10/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.9958]


Epoch 10, Loss: 1.1676, lr = 0.0004


Epoch 11/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=1.0348]


Epoch 11, Loss: 1.1150, lr = 0.0004


Epoch 12/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.9000]


Epoch 12, Loss: 1.0404, lr = 0.0004


Epoch 13/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.9107]


Epoch 13, Loss: 0.9695, lr = 0.0004


Epoch 14/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.8266]


Epoch 14, Loss: 0.8812, lr = 0.0004


Epoch 15/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=0.9727]


Epoch 15, Loss: 0.8553, lr = 0.0004


Epoch 16/30: 100%|██████████| 34/34 [00:06<00:00,  5.62it/s, Loss=0.6892]


Epoch 16, Loss: 0.7496, lr = 0.0004


Epoch 17/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.5942]


Epoch 17, Loss: 0.7070, lr = 0.0004


Epoch 18/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.6768]


Epoch 18, Loss: 0.6500, lr = 0.0004


Epoch 19/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.4062]


Epoch 19, Loss: 0.5859, lr = 0.0004


Epoch 20/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.4935]


Epoch 20, Loss: 0.5138, lr = 0.0004


Epoch 21/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.5206]


Epoch 21, Loss: 0.4584, lr = 0.0004


Epoch 22/30: 100%|██████████| 34/34 [00:06<00:00,  5.62it/s, Loss=0.3471]


Epoch 22, Loss: 0.4223, lr = 0.0004


Epoch 23/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.6002]


Epoch 23, Loss: 0.3325, lr = 0.0004


Epoch 24/30: 100%|██████████| 34/34 [00:06<00:00,  5.61it/s, Loss=0.4578]


Epoch 24, Loss: 0.3126, lr = 0.0004


Epoch 25/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.4808]


Epoch 25, Loss: 0.2443, lr = 0.0004


Epoch 26/30: 100%|██████████| 34/34 [00:06<00:00,  5.60it/s, Loss=0.1739]


Epoch 26, Loss: 0.2462, lr = 0.0004


Epoch 27/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.2898]


Epoch 27, Loss: 0.1925, lr = 0.0004


Epoch 28/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=0.1174]


Epoch 28, Loss: 0.1485, lr = 0.0004


Epoch 29/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.1094]


Epoch 29, Loss: 0.1106, lr = 0.0004


Epoch 30/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=0.0625]

Epoch 30, Loss: 0.1046, lr = 0.0004





Total Train Time: 180.7s
Total Test Time       : 1.5s
Test Accuracy   : 66.25%
[Seed : 9456 Experiment Start]


=== VGG11_7x7 ===
Parameters: 23,037,578


Epoch 1/30: 100%|██████████| 34/34 [00:05<00:00,  5.84it/s, Loss=1.8373]


Epoch 1, Loss: 1.9826, lr = 0.0004


Epoch 2/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=1.4577]


Epoch 2, Loss: 1.5777, lr = 0.0004


Epoch 3/30: 100%|██████████| 34/34 [00:05<00:00,  5.85it/s, Loss=1.3453]


Epoch 3, Loss: 1.3542, lr = 0.0004


Epoch 4/30: 100%|██████████| 34/34 [00:05<00:00,  5.83it/s, Loss=1.2135]


Epoch 4, Loss: 1.2064, lr = 0.0004


Epoch 5/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.8511]


Epoch 5, Loss: 1.0200, lr = 0.0004


Epoch 6/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.7686]


Epoch 6, Loss: 0.8601, lr = 0.0004


Epoch 7/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.7148]


Epoch 7, Loss: 0.6764, lr = 0.0004


Epoch 8/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.4913]


Epoch 8, Loss: 0.5081, lr = 0.0004


Epoch 9/30: 100%|██████████| 34/34 [00:05<00:00,  5.85it/s, Loss=0.4083]


Epoch 9, Loss: 0.3723, lr = 0.0004


Epoch 10/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.2023]


Epoch 10, Loss: 0.2466, lr = 0.0004


Epoch 11/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.1476]


Epoch 11, Loss: 0.1732, lr = 0.0004


Epoch 12/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0920]


Epoch 12, Loss: 0.1117, lr = 0.0004


Epoch 13/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.0683]


Epoch 13, Loss: 0.0742, lr = 0.0004


Epoch 14/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0537]


Epoch 14, Loss: 0.0575, lr = 0.0004


Epoch 15/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.0292]


Epoch 15, Loss: 0.0513, lr = 0.0004


Epoch 16/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.0217]


Epoch 16, Loss: 0.0443, lr = 0.0004


Epoch 17/30: 100%|██████████| 34/34 [00:05<00:00,  5.67it/s, Loss=0.0627]


Epoch 17, Loss: 0.0451, lr = 0.0004


Epoch 18/30: 100%|██████████| 34/34 [00:05<00:00,  5.85it/s, Loss=0.0925]


Epoch 18, Loss: 0.0616, lr = 0.0004


Epoch 19/30: 100%|██████████| 34/34 [00:06<00:00,  5.42it/s, Loss=0.0686]


Epoch 19, Loss: 0.0661, lr = 0.0004


Epoch 20/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.0953]


Epoch 20, Loss: 0.0602, lr = 0.0004


Epoch 21/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.0672]


Epoch 21, Loss: 0.0612, lr = 0.0004


Epoch 22/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.0202]


Epoch 22, Loss: 0.0466, lr = 0.0004


Epoch 23/30: 100%|██████████| 34/34 [00:05<00:00,  5.84it/s, Loss=0.0257]


Epoch 23, Loss: 0.0298, lr = 0.0004


Epoch 24/30: 100%|██████████| 34/34 [00:05<00:00,  5.90it/s, Loss=0.0188]


Epoch 24, Loss: 0.0471, lr = 0.0004


Epoch 25/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0358]


Epoch 25, Loss: 0.0402, lr = 0.0004


Epoch 26/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0445]


Epoch 26, Loss: 0.0379, lr = 0.0004


Epoch 27/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.0605]


Epoch 27, Loss: 0.0411, lr = 0.0004


Epoch 28/30: 100%|██████████| 34/34 [00:05<00:00,  5.83it/s, Loss=0.0550]


Epoch 28, Loss: 0.0470, lr = 0.0004


Epoch 29/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.0589]


Epoch 29, Loss: 0.0444, lr = 0.0004


Epoch 30/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0241]

Epoch 30, Loss: 0.0337, lr = 0.0004





Total Train Time: 174.6s
Total Test Time       : 1.6s
Test Accuracy   : 64.81%

=== VGG11 ===
Parameters: 22,940,682


Epoch 1/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=2.0667]


Epoch 1, Loss: 2.1891, lr = 0.0004


Epoch 2/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=1.7437]


Epoch 2, Loss: 1.9421, lr = 0.0004


Epoch 3/30: 100%|██████████| 34/34 [00:05<00:00,  5.67it/s, Loss=1.6025]


Epoch 3, Loss: 1.7378, lr = 0.0004


Epoch 4/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=1.7737]


Epoch 4, Loss: 1.6285, lr = 0.0004


Epoch 5/30: 100%|██████████| 34/34 [00:05<00:00,  5.69it/s, Loss=1.3957]


Epoch 5, Loss: 1.5040, lr = 0.0004


Epoch 6/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=1.3554]


Epoch 6, Loss: 1.3961, lr = 0.0004


Epoch 7/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=1.1946]


Epoch 7, Loss: 1.2974, lr = 0.0004


Epoch 8/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=1.2173]


Epoch 8, Loss: 1.1905, lr = 0.0004


Epoch 9/30: 100%|██████████| 34/34 [00:05<00:00,  5.68it/s, Loss=1.0030]


Epoch 9, Loss: 1.1355, lr = 0.0004


Epoch 10/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=1.1411]


Epoch 10, Loss: 1.0661, lr = 0.0004


Epoch 11/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.8458]


Epoch 11, Loss: 0.9815, lr = 0.0004


Epoch 12/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.9027]


Epoch 12, Loss: 0.8853, lr = 0.0004


Epoch 13/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.7997]


Epoch 13, Loss: 0.8365, lr = 0.0004


Epoch 14/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=0.6822]


Epoch 14, Loss: 0.7724, lr = 0.0004


Epoch 15/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.8386]


Epoch 15, Loss: 0.7050, lr = 0.0004


Epoch 16/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.6137]


Epoch 16, Loss: 0.6235, lr = 0.0004


Epoch 17/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.6402]


Epoch 17, Loss: 0.5781, lr = 0.0004


Epoch 18/30: 100%|██████████| 34/34 [00:06<00:00,  5.61it/s, Loss=0.4611]


Epoch 18, Loss: 0.4802, lr = 0.0004


Epoch 19/30: 100%|██████████| 34/34 [00:06<00:00,  5.62it/s, Loss=0.3446]


Epoch 19, Loss: 0.4023, lr = 0.0004


Epoch 20/30: 100%|██████████| 34/34 [00:06<00:00,  5.55it/s, Loss=0.4092]


Epoch 20, Loss: 0.3540, lr = 0.0004


Epoch 21/30: 100%|██████████| 34/34 [00:06<00:00,  5.62it/s, Loss=0.2835]


Epoch 21, Loss: 0.3222, lr = 0.0004


Epoch 22/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.2037]


Epoch 22, Loss: 0.2629, lr = 0.0004


Epoch 23/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.3341]


Epoch 23, Loss: 0.2514, lr = 0.0004


Epoch 24/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.2441]


Epoch 24, Loss: 0.2147, lr = 0.0004


Epoch 25/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.1417]


Epoch 25, Loss: 0.1637, lr = 0.0004


Epoch 26/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.1823]


Epoch 26, Loss: 0.1255, lr = 0.0004


Epoch 27/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.0900]


Epoch 27, Loss: 0.1415, lr = 0.0004


Epoch 28/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.0357]


Epoch 28, Loss: 0.0992, lr = 0.0004


Epoch 29/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.0478]


Epoch 29, Loss: 0.0630, lr = 0.0004


Epoch 30/30: 100%|██████████| 34/34 [00:06<00:00,  5.61it/s, Loss=0.0576]

Epoch 30, Loss: 0.0795, lr = 0.0004





Total Train Time: 180.9s
Total Test Time       : 1.5s
Test Accuracy   : 66.91%
[Seed : 4306 Experiment Start]


=== VGG11_7x7 ===
Parameters: 23,037,578


Epoch 1/30: 100%|██████████| 34/34 [00:05<00:00,  5.82it/s, Loss=1.7492]


Epoch 1, Loss: 1.9657, lr = 0.0004


Epoch 2/30: 100%|██████████| 34/34 [00:05<00:00,  5.84it/s, Loss=1.5144]


Epoch 2, Loss: 1.5840, lr = 0.0004


Epoch 3/30: 100%|██████████| 34/34 [00:05<00:00,  5.82it/s, Loss=1.4388]


Epoch 3, Loss: 1.3771, lr = 0.0004


Epoch 4/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=1.2990]


Epoch 4, Loss: 1.1912, lr = 0.0004


Epoch 5/30: 100%|██████████| 34/34 [00:05<00:00,  5.85it/s, Loss=0.8946]


Epoch 5, Loss: 1.0372, lr = 0.0004


Epoch 6/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=1.0438]


Epoch 6, Loss: 0.8501, lr = 0.0004


Epoch 7/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.5256]


Epoch 7, Loss: 0.7000, lr = 0.0004


Epoch 8/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.3918]


Epoch 8, Loss: 0.5016, lr = 0.0004


Epoch 9/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.3356]


Epoch 9, Loss: 0.3660, lr = 0.0004


Epoch 10/30: 100%|██████████| 34/34 [00:05<00:00,  5.85it/s, Loss=0.2028]


Epoch 10, Loss: 0.2680, lr = 0.0004


Epoch 11/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.2092]


Epoch 11, Loss: 0.1697, lr = 0.0004


Epoch 12/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.1071]


Epoch 12, Loss: 0.1240, lr = 0.0004


Epoch 13/30: 100%|██████████| 34/34 [00:05<00:00,  5.83it/s, Loss=0.0572]


Epoch 13, Loss: 0.0705, lr = 0.0004


Epoch 14/30: 100%|██████████| 34/34 [00:05<00:00,  5.89it/s, Loss=0.0172]


Epoch 14, Loss: 0.0504, lr = 0.0004


Epoch 15/30: 100%|██████████| 34/34 [00:05<00:00,  5.90it/s, Loss=0.0863]


Epoch 15, Loss: 0.0540, lr = 0.0004


Epoch 16/30: 100%|██████████| 34/34 [00:05<00:00,  5.90it/s, Loss=0.1369]


Epoch 16, Loss: 0.1055, lr = 0.0004


Epoch 17/30: 100%|██████████| 34/34 [00:05<00:00,  5.84it/s, Loss=0.1096]


Epoch 17, Loss: 0.0628, lr = 0.0004


Epoch 18/30: 100%|██████████| 34/34 [00:05<00:00,  5.87it/s, Loss=0.0561]


Epoch 18, Loss: 0.0454, lr = 0.0004


Epoch 19/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.0209]


Epoch 19, Loss: 0.0424, lr = 0.0004


Epoch 20/30: 100%|██████████| 34/34 [00:05<00:00,  5.84it/s, Loss=0.0084]


Epoch 20, Loss: 0.0263, lr = 0.0004


Epoch 21/30: 100%|██████████| 34/34 [00:05<00:00,  5.79it/s, Loss=0.0059]


Epoch 21, Loss: 0.0297, lr = 0.0004


Epoch 22/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.0905]


Epoch 22, Loss: 0.0297, lr = 0.0004


Epoch 23/30: 100%|██████████| 34/34 [00:05<00:00,  5.78it/s, Loss=0.0347]


Epoch 23, Loss: 0.0545, lr = 0.0004


Epoch 24/30: 100%|██████████| 34/34 [00:05<00:00,  5.85it/s, Loss=0.0410]


Epoch 24, Loss: 0.0554, lr = 0.0004


Epoch 25/30: 100%|██████████| 34/34 [00:05<00:00,  5.84it/s, Loss=0.0558]


Epoch 25, Loss: 0.0491, lr = 0.0004


Epoch 26/30: 100%|██████████| 34/34 [00:05<00:00,  5.84it/s, Loss=0.0141]


Epoch 26, Loss: 0.0275, lr = 0.0004


Epoch 27/30: 100%|██████████| 34/34 [00:05<00:00,  5.82it/s, Loss=0.0192]


Epoch 27, Loss: 0.0244, lr = 0.0004


Epoch 28/30: 100%|██████████| 34/34 [00:05<00:00,  5.84it/s, Loss=0.0903]


Epoch 28, Loss: 0.0507, lr = 0.0004


Epoch 29/30: 100%|██████████| 34/34 [00:05<00:00,  5.88it/s, Loss=0.1350]


Epoch 29, Loss: 0.0627, lr = 0.0004


Epoch 30/30: 100%|██████████| 34/34 [00:05<00:00,  5.86it/s, Loss=0.0301]

Epoch 30, Loss: 0.0470, lr = 0.0004





Total Train Time: 174.4s
Total Test Time       : 1.6s
Test Accuracy   : 63.45%

=== VGG11 ===
Parameters: 22,940,682


Epoch 1/30: 100%|██████████| 34/34 [00:05<00:00,  5.67it/s, Loss=2.0697]


Epoch 1, Loss: 2.1939, lr = 0.0004


Epoch 2/30: 100%|██████████| 34/34 [00:05<00:00,  5.68it/s, Loss=1.8873]


Epoch 2, Loss: 1.9592, lr = 0.0004


Epoch 3/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=1.7748]


Epoch 3, Loss: 1.8134, lr = 0.0004


Epoch 4/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=1.5806]


Epoch 4, Loss: 1.6975, lr = 0.0004


Epoch 5/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=1.5002]


Epoch 5, Loss: 1.5837, lr = 0.0004


Epoch 6/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=1.4091]


Epoch 6, Loss: 1.4688, lr = 0.0004


Epoch 7/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=1.3593]


Epoch 7, Loss: 1.3799, lr = 0.0004


Epoch 8/30: 100%|██████████| 34/34 [00:05<00:00,  5.68it/s, Loss=1.2857]


Epoch 8, Loss: 1.3159, lr = 0.0004


Epoch 9/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=1.3001]


Epoch 9, Loss: 1.2237, lr = 0.0004


Epoch 10/30: 100%|██████████| 34/34 [00:06<00:00,  5.67it/s, Loss=1.0886]


Epoch 10, Loss: 1.1389, lr = 0.0004


Epoch 11/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.9721]


Epoch 11, Loss: 1.0783, lr = 0.0004


Epoch 12/30: 100%|██████████| 34/34 [00:06<00:00,  5.62it/s, Loss=0.8831]


Epoch 12, Loss: 1.0056, lr = 0.0004


Epoch 13/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.8034]


Epoch 13, Loss: 0.9724, lr = 0.0004


Epoch 14/30: 100%|██████████| 34/34 [00:06<00:00,  5.62it/s, Loss=1.0287]


Epoch 14, Loss: 0.8945, lr = 0.0004


Epoch 15/30: 100%|██████████| 34/34 [00:06<00:00,  5.62it/s, Loss=0.8800]


Epoch 15, Loss: 0.8175, lr = 0.0004


Epoch 16/30: 100%|██████████| 34/34 [00:06<00:00,  5.62it/s, Loss=0.7211]


Epoch 16, Loss: 0.8038, lr = 0.0004


Epoch 17/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.7103]


Epoch 17, Loss: 0.7259, lr = 0.0004


Epoch 18/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.6307]


Epoch 18, Loss: 0.6569, lr = 0.0004


Epoch 19/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=0.6503]


Epoch 19, Loss: 0.5982, lr = 0.0004


Epoch 20/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=0.5738]


Epoch 20, Loss: 0.5475, lr = 0.0004


Epoch 21/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.4051]


Epoch 21, Loss: 0.4839, lr = 0.0004


Epoch 22/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.4319]


Epoch 22, Loss: 0.4433, lr = 0.0004


Epoch 23/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.3061]


Epoch 23, Loss: 0.3618, lr = 0.0004


Epoch 24/30: 100%|██████████| 34/34 [00:06<00:00,  5.66it/s, Loss=0.3493]


Epoch 24, Loss: 0.3200, lr = 0.0004


Epoch 25/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.2246]


Epoch 25, Loss: 0.2951, lr = 0.0004


Epoch 26/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.3659]


Epoch 26, Loss: 0.2352, lr = 0.0004


Epoch 27/30: 100%|██████████| 34/34 [00:06<00:00,  5.63it/s, Loss=0.2101]


Epoch 27, Loss: 0.2268, lr = 0.0004


Epoch 28/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.1783]


Epoch 28, Loss: 0.1553, lr = 0.0004


Epoch 29/30: 100%|██████████| 34/34 [00:06<00:00,  5.64it/s, Loss=0.1531]


Epoch 29, Loss: 0.1308, lr = 0.0004


Epoch 30/30: 100%|██████████| 34/34 [00:06<00:00,  5.65it/s, Loss=0.1033]

Epoch 30, Loss: 0.1125, lr = 0.0004





Total Train Time: 180.7s
Total Test Time       : 1.6s
Test Accuracy   : 67.53%


In [23]:
print("[Experiment Result]\n")
metrics = ["Parameters", "Parameters(Only Conv)", "Param Efficiency (% per 10M)", "Train Time (s)", "Test Time (s)", "Test Accuracy (%)"]
dfs = []

for i, result in enumerate(results):
    print(f"[Experiment {i+1}]")
    # FC 레이어 파라미터 수 = 21,020,682
    data = {
        result[0]['model']: [
            result[0]['params'],
            result[0]['params'] - 21020682,
            round(result[0]['test_acc'] / (result[0]['params'] / 1e7), 2),
            round(result[0]['train_time'], 1),
            round(result[0]['test_time'], 1),
            result[0]['test_acc']
        ],
        result[1]['model']: [
            result[1]['params'],
            result[1]['params'] - 21020682,
            round(result[1]['test_acc'] / (result[1]['params'] / 1e7), 2),
            round(result[1]['train_time'], 1),
            round(result[1]['test_time'], 1),
            result[1]['test_acc']
        ],
    }


    df = pd.DataFrame(data, index=metrics)
    df = df.astype(float)
    df["Difference (3x3 - 7x7)"] = df["VGG11"] - df["VGG11_7x7"]
    print(df, "\n")
    dfs.append(df)

print("[Average Result]\n")
avg_df = sum(dfs) / num_experiments
print(avg_df.round(2))

print("\n[Analysis]")
print(f"- VGG11_7x7 구조는 파라미터가 평균적으로 {abs(avg_df.loc['Parameters','Difference (3x3 - 7x7)']):.0f}개 적다.")
print(f"- 파라미터 효율 차이는 {avg_df.loc['Param Efficiency (% per 10M)','Difference (3x3 - 7x7)']:.2f} (% per M params)")
print(f"- 학습 시간 차이는 {avg_df.loc['Train Time (s)','Difference (3x3 - 7x7)']:.2f}s")
print(f"- 테스트 시간 차이는 {avg_df.loc['Test Time (s)','Difference (3x3 - 7x7)']:.2f}s")
print(f"- 테스트 정확도 차이는 {avg_df.loc['Test Accuracy (%)','Difference (3x3 - 7x7)']:.2f}%")
print(f"- 학습 시간 효율(비율)은 {avg_df.loc['Train Time (s)','VGG11'] / avg_df.loc['Train Time (s)','VGG11_7x7']:.2f}배")

[Experiment Result]

[Experiment 1]
                                VGG11_7x7        VGG11  Difference (3x3 - 7x7)
Parameters                    23037578.00  22940682.00               -96896.00
Parameters(Only Conv)          2016896.00   1920000.00               -96896.00
Param Efficiency (% per 10M)        27.68        28.88                    1.20
Train Time (s)                     173.50       180.70                    7.20
Test Time (s)                        1.60         1.50                   -0.10
Test Accuracy (%)                   63.77        66.25                    2.48 

[Experiment 2]
                                VGG11_7x7        VGG11  Difference (3x3 - 7x7)
Parameters                    23037578.00  22940682.00               -96896.00
Parameters(Only Conv)          2016896.00   1920000.00               -96896.00
Param Efficiency (% per 10M)        28.13        29.17                    1.04
Train Time (s)                     174.60       180.90                    6.30

=> 기존 VGGNet(3x3 Conv×3) 방식이 7x7 Conv 대체 모델보다 모든 지표에서 우세하였다.
 구체적으로, VGGNet은 7x7로 대체한 네트워크에 비해 적은 파라미터에도 평균 정확도가 약 3%p 높고, 레이어 깊이 증가로 인해 늘어난 학습 시간에도, 파라미터 효율 및 학습 시간 대비 정확도(accuracy/time)가 일관되게 앞섰다,
 Test Time에는 큰 차이가 없는 것으로 미루어, 3x3 conv만을 사용하더라도 추가적인 시간 병목 없이 Inference를 수행할 수 있을 것이다.
 이를 통해 '깊은 3x3 합성곱 + 비선형성의 누적'을 통해 7x7 Conv 하나로는 대체할 수 없는 성능 이점을 제공함을 확인할 수 있다.