In [2]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms


transform = transforms.Compose([transforms.Resize((32, 32)), # (h, w) 순서
            #transforms.ToPILImage(),
            transforms.Grayscale(),
            transforms.ToTensor(),
            ])

train = ImageFolder(root="./_data/syllable/", transform=transform, target_transform=None)
test = ImageFolder(root="./_data/syllable_test/", transform=transform, target_transform=None)

In [3]:
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

batch_size = 64
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=batch_size)

for X, y in train_loader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 32, 32])


In [4]:
# 파이토치
import torch

# 파이토치 레이어 정의를 위한 torch.nn
import torch.nn as nn


def make_layer(config=[64, 64, 'M', 128, 128, 128, 'M', 256, 256, 256, 'M', 512, 'B', 'M', 512, 'B', 'M']):
    layers = []
    in_channel = 1
    for out_channel in config:
        if out_channel == "M":
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        elif out_channel == 'B':
            layers.append(nn.Conv2d(in_channel, in_channel, kernel_size=3, padding=1, bias=False))
            layers.append(nn.BatchNorm2d(in_channel))
            layers.append(nn.ReLU(inplace=True))
        else:
            layers.append(nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1))
            layers.append(nn.ReLU(inplace=True))
            in_channel = out_channel
    
    #layers.append(nn.AdaptiveAvgPool2d((2, 2)))
    return nn.Sequential(*layers)

class NET(nn.Module):
    
    def __init__(self, num_classes=2350):
        super(NET, self).__init__()
        self.features = make_layer()
        
        # ImageNet
        self.classifier = nn.Sequential(
            nn.Linear(512, num_classes),
        )
        
    def forward(self, x):
        out = self.features(x)
        out = torch.flatten(out,1)
        out = self.classifier(out)
        return out

In [5]:
import torchsummary

model = NET()
total_epoch = 10
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torchsummary.summary(model, (1, 32, 32), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]             640
              ReLU-2           [-1, 64, 32, 32]               0
            Conv2d-3           [-1, 64, 32, 32]          36,928
              ReLU-4           [-1, 64, 32, 32]               0
         MaxPool2d-5           [-1, 64, 16, 16]               0
            Conv2d-6          [-1, 128, 16, 16]          73,856
              ReLU-7          [-1, 128, 16, 16]               0
            Conv2d-8          [-1, 128, 16, 16]         147,584
              ReLU-9          [-1, 128, 16, 16]               0
           Conv2d-10          [-1, 128, 16, 16]         147,584
             ReLU-11          [-1, 128, 16, 16]               0
        MaxPool2d-12            [-1, 128, 8, 8]               0
           Conv2d-13            [-1, 256, 8, 8]         295,168
             ReLU-14            [-1, 25

In [10]:
model = NET()
model.load_state_dict(torch.load('./model_1.pth'))

<All keys matched successfully>

In [None]:
total_iteration_per_epoch = int(np.ceil(len(train)/batch_size))

for epoch in range(2, total_epoch + 1):
    model.train()
    for iteration, (images, labels) in enumerate(train_loader):

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print('Epoch [{}/{}], iteration [{}/{}] Loss: {:.4f}'.format(epoch, total_epoch, iteration+1, total_iteration_per_epoch, loss.item()))
    torch.save(model.state_dict(), 'model_' + str(epoch) + ".pth")
    
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for input, target in test_loader:
            images = input
            labels = target
            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += len(labels)
            correct += (predicted == labels).sum().item()

        print('Epoch [{}/{}], Test Accuracy of the model on the {} test images: {} %'.format(epoch, total_epoch, total, 100 * correct / total))

Epoch [2/10], iteration [1/147] Loss: 7.7717
Epoch [2/10], iteration [2/147] Loss: 7.7564
Epoch [2/10], iteration [3/147] Loss: 7.7502
Epoch [2/10], iteration [4/147] Loss: 7.7818
Epoch [2/10], iteration [5/147] Loss: 7.7686
Epoch [2/10], iteration [6/147] Loss: 7.7942
Epoch [2/10], iteration [7/147] Loss: 7.7621
Epoch [2/10], iteration [8/147] Loss: 7.7963
Epoch [2/10], iteration [9/147] Loss: 7.8041
Epoch [2/10], iteration [10/147] Loss: 7.7921
Epoch [2/10], iteration [11/147] Loss: 7.7350
Epoch [2/10], iteration [12/147] Loss: 7.7931
Epoch [2/10], iteration [13/147] Loss: 7.7829
Epoch [2/10], iteration [14/147] Loss: 7.8040
Epoch [2/10], iteration [15/147] Loss: 7.7799
Epoch [2/10], iteration [16/147] Loss: 7.7800
Epoch [2/10], iteration [17/147] Loss: 7.7762
Epoch [2/10], iteration [18/147] Loss: 7.7618
Epoch [2/10], iteration [19/147] Loss: 7.7840
Epoch [2/10], iteration [20/147] Loss: 7.7492
Epoch [2/10], iteration [21/147] Loss: 7.7476
Epoch [2/10], iteration [22/147] Loss: 7.76