In [1]:
import torch
import torch.nn as nn
from sklearn.model_selection import KFold

def make_layer(config=[64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 'M', 'B', 'M']):
    layers = []
    in_channel = 1
    for out_channel in config:
        if out_channel == "M":
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        elif out_channel == 'B':
            layers.append(nn.Conv2d(in_channel, in_channel, kernel_size=3, padding=1, bias=False))
            layers.append(nn.BatchNorm2d(in_channel))
            layers.append(nn.ReLU(inplace=True))
        else:
            layers.append(nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1))
            layers.append(nn.ReLU(inplace=True))
            in_channel = out_channel
    
    #layers.append(nn.AdaptiveAvgPool2d((2, 2)))
    return nn.Sequential(*layers)

class NET(nn.Module):
    
    def __init__(self, num_classes=2350):
        super(NET, self).__init__()
        self.features = make_layer()
        
        # ImageNet
        self.classifier = nn.Sequential(
            nn.Linear(2048, num_classes),
        )
        
    def forward(self, x):
        out = self.features(x)
        out = torch.flatten(out,1)
        out = self.classifier(out)
        return out

In [6]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader


transform = transforms.Compose([transforms.Resize((64, 64)), # (h, w) 순서
            #transforms.ToPILImage(),
            transforms.Grayscale(),
            transforms.ToTensor(),
            ])

train = ImageFolder(root="./_data/syllable/", transform=transform, target_transform=None)
test = ImageFolder(root="./_data/syllable_test/", transform=transform, target_transform=None)

batch_size = int(len(train)/500)
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=batch_size)

for X, y in train_loader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([672, 1, 64, 64])
Shape of y: torch.Size([672]) torch.int64


In [7]:
import torchsummary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NET().to(device)
total_epoch = 10
torchsummary.summary(model, (1, 64, 64), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]             640
              ReLU-2           [-1, 64, 64, 64]               0
            Conv2d-3           [-1, 64, 64, 64]          36,928
              ReLU-4           [-1, 64, 64, 64]               0
         MaxPool2d-5           [-1, 64, 32, 32]               0
            Conv2d-6          [-1, 128, 32, 32]          73,856
              ReLU-7          [-1, 128, 32, 32]               0
            Conv2d-8          [-1, 128, 32, 32]         147,584
              ReLU-9          [-1, 128, 32, 32]               0
        MaxPool2d-10          [-1, 128, 16, 16]               0
           Conv2d-11          [-1, 256, 16, 16]         295,168
             ReLU-12          [-1, 256, 16, 16]               0
           Conv2d-13          [-1, 256, 16, 16]         590,080
             ReLU-14          [-1, 256,

In [8]:
#model = NET().to(device)
#model.load_state_dict(torch.load('./model_5.pth'))
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)

In [None]:
import numpy as np
total_iteration_per_epoch = int(np.ceil(len(train)/batch_size))

for epoch in range(1, total_epoch + 1):
    model.train()
    for iteration, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print('Epoch [{}/{}], iteration [{}/{}] Loss: {:.4f}'.format(epoch, total_epoch, iteration+1, total_iteration_per_epoch, loss.item()))
    torch.save(model.state_dict(), 'model_' + str(epoch) + ".pth")
    
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for input, target in test_loader:
            images = input.to(device)
            labels = target.to(device)
            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += len(labels)
            correct += (predicted == labels).sum().item()

        print('Epoch [{}/{}], Test Accuracy of the model on the {} test images: {} %'.format(epoch, total_epoch, total, 100 * correct / total))

Epoch [1/10], iteration [1/501] Loss: 7.8653
Epoch [1/10], iteration [2/501] Loss: 34.9906
Epoch [1/10], iteration [3/501] Loss: 52.7796
Epoch [1/10], iteration [4/501] Loss: 51.1427
Epoch [1/10], iteration [5/501] Loss: 50.4409
Epoch [1/10], iteration [6/501] Loss: 47.9556
Epoch [1/10], iteration [7/501] Loss: 47.1412
Epoch [1/10], iteration [8/501] Loss: 46.6265
Epoch [1/10], iteration [9/501] Loss: 45.1092
Epoch [1/10], iteration [10/501] Loss: 42.3813
Epoch [1/10], iteration [11/501] Loss: 40.3010
Epoch [1/10], iteration [12/501] Loss: 37.4843
Epoch [1/10], iteration [13/501] Loss: 35.3916
Epoch [1/10], iteration [14/501] Loss: 31.9658
Epoch [1/10], iteration [15/501] Loss: 29.4778
Epoch [1/10], iteration [16/501] Loss: 27.5530
Epoch [1/10], iteration [17/501] Loss: 26.2237
Epoch [1/10], iteration [18/501] Loss: 24.1562
Epoch [1/10], iteration [19/501] Loss: 21.8740
Epoch [1/10], iteration [20/501] Loss: 20.9914
Epoch [1/10], iteration [21/501] Loss: 19.5983
Epoch [1/10], iteration

Epoch [1/10], iteration [177/501] Loss: 7.2353
Epoch [1/10], iteration [178/501] Loss: 7.2632
Epoch [1/10], iteration [179/501] Loss: 7.2854
Epoch [1/10], iteration [180/501] Loss: 7.1083
Epoch [1/10], iteration [181/501] Loss: 7.2463
Epoch [1/10], iteration [182/501] Loss: 7.1578
Epoch [1/10], iteration [183/501] Loss: 7.2414
Epoch [1/10], iteration [184/501] Loss: 7.2470
Epoch [1/10], iteration [185/501] Loss: 7.1710
Epoch [1/10], iteration [186/501] Loss: 7.1698
Epoch [1/10], iteration [187/501] Loss: 7.1697
Epoch [1/10], iteration [188/501] Loss: 7.1680
Epoch [1/10], iteration [189/501] Loss: 7.1455
Epoch [1/10], iteration [190/501] Loss: 7.1589
Epoch [1/10], iteration [191/501] Loss: 7.0815
Epoch [1/10], iteration [192/501] Loss: 7.1554
Epoch [1/10], iteration [193/501] Loss: 7.0172
Epoch [1/10], iteration [194/501] Loss: 7.0171
Epoch [1/10], iteration [195/501] Loss: 6.9331
Epoch [1/10], iteration [196/501] Loss: 6.9752
Epoch [1/10], iteration [197/501] Loss: 7.0257
Epoch [1/10],