In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils import data

In [None]:
seed = 7
BATCH_SIZE = 64
EPOCHS = 30
LR = 0.01

In [None]:
# random.seed(seed)
# os.environ['PYTHONHASHSEED'] = str(seed)
# np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [None]:
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")

In [None]:
print(USE_CUDA)
print(DEVICE)

True
cuda


In [None]:
transform = transforms.Compose([
    transforms.ToTensor()
])

In [None]:
trainset = datasets.FashionMNIST(
    root      = './.data/', 
    train     = True,
    download  = True,
    transform = transform
)
testset = datasets.FashionMNIST(
    root      = './.data/', 
    train     = False,
    download  = True,
    transform = transform
)

In [None]:
train_loader = data.DataLoader(
    dataset     = trainset,
    batch_size  = BATCH_SIZE,
    shuffle     = True,
)
test_loader = data.DataLoader(
    dataset     = testset,
    batch_size  = BATCH_SIZE,
    shuffle     = True,
)

# 3-layer DN

In [None]:
class Net3(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.sigmoid(self.fc1(x))
        x = self.fc2(x)
        return x

# 5-layer DN

In [None]:
class Net5(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        x = self.fc4(x)
        return x

# extreme-layer DN

In [None]:
class Net10(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.enc_size = [28*28, 1024, 512, 256, 256,
                         256, 256, 256, 256, 256]
        self.layers = nn.ModuleList([self._make_block(in_f, out_f) for in_f, out_f in zip(self.enc_size, self.enc_size[1:])])
        
        self.classifier = nn.Linear(256, 10)
        
    def forward(self, x):
        x = x.view(-1, 28*28)
        for layer in self.layers:
            x = layer(x)
        x = self.classifier(x)
        return x
    
    def _make_block(self, input_num, output_num):
        return nn.Sequential(
            nn.Linear(input_num, output_num),
            nn.Sigmoid()
        )

# moduleNet = Net()
# print(moduleNet)

# output = moduleNet(torch.randn(64, 28, 28))
# print(output.shape)

# 5-layer DN (ReLU)

In [None]:
class Net5_R(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# 5-layer DN (ReLU, Dropout)

In [None]:
class Net_RD(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 10)

        self.dropout_p = 0.2
        # self.drop_layer = nn.Dropout(p=self.dropout_p)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training, p=self.dropout_p)
        # x = self.drop_layer(x)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, training=self.training, p=self.dropout_p)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, training=self.training, p=self.dropout_p)
        x = self.fc4(x)   
        return x

# Practice: 5-layer DN (ReLU, Dropout, BatchNorm1d)

In [None]:
class Net_RDB(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 1024)
        ################################


        ################################

        self.bn1 = nn.BatchNorm1d(1024)
        ################################


        ################################
        self.drop_layer = nn.Dropout(p=0.2)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.bn1(self.fc1(x)))
        ################################


        ################################  
        return x

# Practice: 5-layer DN (ReLU, Dropout, BatchNorm1d)
### ModuleList를 활용하여 구현

In [None]:
class Net_RDB(nn.Module):
    def __init__(self):
        super().__init__()
        
        ################################


        ################################
        
    def forward(self, x):
        ################################


        ################################
        return x
    
    def _make_block(self, input_num, output_num):
        return nn.Sequential(
            ################################


            ################################
        )

In [None]:
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

In [None]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)

            test_loss += F.cross_entropy(output, target,
                                         reduction='sum').item()
            
            pred = output.max(1, keepdim=True)[1]

            correct += pred.eq(target.view_as(pred)).sum().item()

            
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy


In [None]:
model        = Net().to(DEVICE)
optimizer    = optim.SGD(model.parameters(), lr=LR)

In [None]:
test_loss, test_accuracy = evaluate(model, test_loader)
print('[0] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
    test_loss, test_accuracy))
for epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
          epoch, test_loss, test_accuracy))

[1] Test Loss: 0.4035, Accuracy: 85.49%
[2] Test Loss: 0.3567, Accuracy: 87.09%
[3] Test Loss: 0.4063, Accuracy: 85.00%
[4] Test Loss: 0.3303, Accuracy: 87.82%
[5] Test Loss: 0.3371, Accuracy: 87.72%
[6] Test Loss: 0.4342, Accuracy: 84.86%
[7] Test Loss: 0.4297, Accuracy: 84.89%
[8] Test Loss: 0.3480, Accuracy: 88.16%
[9] Test Loss: 0.3538, Accuracy: 88.44%
[10] Test Loss: 0.3899, Accuracy: 87.19%
[11] Test Loss: 0.3362, Accuracy: 88.83%
[12] Test Loss: 0.3788, Accuracy: 87.82%
[13] Test Loss: 0.3795, Accuracy: 88.18%
[14] Test Loss: 0.3711, Accuracy: 88.08%
[15] Test Loss: 0.3931, Accuracy: 88.19%
[16] Test Loss: 0.4051, Accuracy: 88.52%
[17] Test Loss: 0.5065, Accuracy: 86.00%
[18] Test Loss: 0.3655, Accuracy: 89.36%
[19] Test Loss: 0.3636, Accuracy: 89.31%
[20] Test Loss: 0.4273, Accuracy: 88.17%
[21] Test Loss: 0.4147, Accuracy: 89.13%
[22] Test Loss: 0.3685, Accuracy: 89.47%
[23] Test Loss: 0.6035, Accuracy: 85.96%
[24] Test Loss: 0.4548, Accuracy: 88.66%
[25] Test Loss: 0.3964, A

# Additional 0: Optimizer 변경하여 학습
# Additional 1: CIFAR10 데이터 학습
# Additional 2: extreme-layer 학습