In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
import os
import random
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader,Subset
from tqdm import tqdm

In [25]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

# 데이터 전처리 (CIFAR10은 3채널 이미지)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# CIFAR10 학습 및 테스트 데이터셋 불러오기
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

# CIFAR10의 targets는 리스트로 되어 있으므로 tensor로 변환
targets = torch.tensor(train_dataset.targets)

num_samples_per_class = 300
selected_indices = []

# 0부터 9까지 각 클래스별로 인덱스를 추출하고, 랜덤하게 num_samples_per_class개 선택
for class_label in range(10):
    # 해당 클래스의 인덱스 추출
    indices = (targets == class_label).nonzero(as_tuple=True)[0]
    # 인덱스를 랜덤하게 섞은 후 원하는 개수만 선택
    selected = indices[torch.randperm(len(indices))[:num_samples_per_class]]
    selected_indices.extend(selected.tolist())

# 선택한 인덱스만 남긴 서브셋 생성
train_dataset = Subset(train_dataset, selected_indices)

# DataLoader 생성
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True, num_workers=8, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=8, pin_memory=True)


Files already downloaded and verified
Files already downloaded and verified


In [26]:
# K = 2P + 1 to make the output size same as the input size
class SigmoidCNN(nn.Module):
    def __init__(self, num_blocks):
        super(SigmoidCNN, self).__init__()
        def build_block():
            return nn.Sequential(
                nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
                nn.BatchNorm2d(32),
                nn.Sigmoid()
            )

        self.num_blocks = num_blocks
        self.base = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.Sigmoid())
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.blocks = nn.ModuleList([build_block() for _ in range(self.num_blocks)])
        self.classifier = nn.Linear(in_features=32, out_features=10)

    def forward(self, x):
        x = self.base(x)
        for block in self.blocks:
            x = block(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

class ReLUCNN(nn.Module):
    def __init__(self, num_blocks):
        super(ReLUCNN, self).__init__()
        def build_block():
            return nn.Sequential(
                nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU()
            )
        self.num_blocks = num_blocks
        self.base = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU())
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.blocks = nn.ModuleList([build_block() for _ in range(self.num_blocks)])
        self.classifier = nn.Linear(in_features=32, out_features=10)

    def forward(self, x):
        x = self.base(x)
        for block in self.blocks:
            x = block(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [27]:
def experiment():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    Accuracies = {"Sigmoid" : list(), "ReLU" : list()}
    for i in range(8):
        models = (model1:=SigmoidCNN(i), model2:=ReLUCNN(i))
        print(f"ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: {i+1}ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ")
        for model in models:
            model.to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.AdamW(model.parameters(), lr=0.001)
            #Train Code
            for epoch in range(10):
                model.train()
                for batch_idx, (data, target) in enumerate(train_loader):
                    data, target = data.to(device), target.to(device)
                    optimizer.zero_grad()
                    output = model(data)
                    loss = criterion(output, target)
                    loss.backward()
                    optimizer.step()

        for model in models:
            #Test Code
            model.to(device)
            model.eval()
            correct = 0
            with torch.no_grad():
                for batch_idx, (data, target) in enumerate(test_loader):
                    data, target = data.to(device), target.to(device)
                    output = model(data) # size = (64, 10)
                    correct += (output.argmax(dim=1) == target).sum().item()
            top1_acc = 100 * correct / len(test_loader.dataset)
            if model.__class__.__name__ == "SigmoidCNN":
                Accuracies["Sigmoid"].append(top1_acc)
            else:
                Accuracies["ReLU"].append(top1_acc)
            print(f"Model : {model.__class__.__name__}Block Number: {i+1}, Top1 Accuracy: {top1_acc:.2f}%")
        print(f"ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: {i+1}ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ\n")
    return Accuracies


In [28]:
if __name__ == "__main__":
    results = []
    num_experiments = 5

    for i in range(num_experiments):
        # 매 실험이 동일한 조건에서 시작하기 위해 시드 고정
        seed = np.random.randint(1, 1000)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        print(f"Experiment {i+1}")
        results.append(experiment())
        print("\n")

Experiment 1
ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 1ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ
Model : SigmoidCNNBlock Number: 1, Top1 Accuracy: 20.29%
Model : ReLUCNNBlock Number: 1, Top1 Accuracy: 23.88%
ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 1ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 2ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ
Model : SigmoidCNNBlock Number: 2, Top1 Accuracy: 21.42%
Model : ReLUCNNBlock Number: 2, Top1 Accuracy: 29.90%
ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 2ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 3ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ
Model : SigmoidCNNBlock Number: 3, Top1 Accuracy: 22.04%
Model : ReLUCNNBlock Number: 3, Top1 Accuracy: 32.82%
ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 3ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 4ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ
Model : SigmoidCNNBlock Number: 4, Top1 Accuracy: 22.77%
Model : ReLUCNNBlock Number: 4, Top1 Accuracy: 32.75%
ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 4ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 5ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ
Model : SigmoidCNNBlock Number: 5, Top1 Accuracy: 24.96%
Model : ReLUCNNBlock Number: 5, Top1 Accuracy: 34.11%
ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 5ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡNum_Blocks: 6ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

In [29]:
# 결과 정리
import pandas as pd

dfs = []
for i in range(num_experiments):
    print(f"[Experiment {i+1}]")
    Accuracies = results[i]
    differences = [relu - sig for sig, relu in zip(Accuracies["Sigmoid"], Accuracies["ReLU"])]

    index = [f"Num_Blocks : {i}" for i in range(1, 9)]
    data = {
        "Sigmoid": Accuracies["Sigmoid"],
        "ReLU": Accuracies["ReLU"],
        "Differences": differences
    }

    df = pd.DataFrame(data, index=index)
    dfs.append(df)

    print(df)
    print("\n")

print("[Averages]")
average_df = dfs[0]
for i in range(1, num_experiments):
    average_df += dfs[i]
average_df /= num_experiments
print(average_df)

[Experiment 1]
                Sigmoid   ReLU  Differences
Num_Blocks : 1    20.29  23.88         3.59
Num_Blocks : 2    21.42  29.90         8.48
Num_Blocks : 3    22.04  32.82        10.78
Num_Blocks : 4    22.77  32.75         9.98
Num_Blocks : 5    24.96  34.11         9.15
Num_Blocks : 6    23.68  36.33        12.65
Num_Blocks : 7    16.90  38.52        21.62
Num_Blocks : 8    18.87  42.19        23.32


[Experiment 2]
                Sigmoid   ReLU  Differences
Num_Blocks : 1    18.91  21.63         2.72
Num_Blocks : 2    20.46  28.89         8.43
Num_Blocks : 3    21.53  33.84        12.31
Num_Blocks : 4    24.91  36.78        11.87
Num_Blocks : 5    21.72  33.76        12.04
Num_Blocks : 6    23.20  36.89        13.69
Num_Blocks : 7    20.60  38.43        17.83
Num_Blocks : 8    19.00  35.46        16.46


[Experiment 3]
                Sigmoid   ReLU  Differences
Num_Blocks : 1    18.49  24.09         5.60
Num_Blocks : 2    21.69  30.01         8.32
Num_Blocks : 3    20.77  33