In [1]:
import torch
import torch.nn as nn
from sklearn.model_selection import KFold

def make_layer(config=[64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 'M', 'B', 'M']):
    layers = []
    in_channel = 1
    for out_channel in config:
        if out_channel == "M":
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        elif out_channel == 'B':
            layers.append(nn.Conv2d(in_channel, in_channel, kernel_size=3, padding=1, bias=False))
            layers.append(nn.BatchNorm2d(in_channel))
            layers.append(nn.ReLU(inplace=True))
        else:
            layers.append(nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1))
            layers.append(nn.ReLU(inplace=True))
            in_channel = out_channel
    
    #layers.append(nn.AdaptiveAvgPool2d((2, 2)))
    return nn.Sequential(*layers)

class NET(nn.Module):
    
    def __init__(self, num_classes=2350):
        super(NET, self).__init__()
        self.features = make_layer()
        
        # ImageNet
        self.classifier = nn.Sequential(
            nn.Linear(2048, num_classes),
        )
        
    def forward(self, x):
        out = self.features(x)
        out = torch.flatten(out,1)
        out = self.classifier(out)
        return out

In [6]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader


transform = transforms.Compose([transforms.Resize((64, 64)), # (h, w) 순서
            #transforms.ToPILImage(),
            transforms.Grayscale(),
            transforms.ToTensor(),
            ])

train = ImageFolder(root="./_data/syllable/", transform=transform, target_transform=None)
test = ImageFolder(root="./_data/syllable_test/", transform=transform, target_transform=None)

batch_size = int(len(train)/500)
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=batch_size)

for X, y in train_loader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([672, 1, 64, 64])
Shape of y: torch.Size([672]) torch.int64


In [7]:
import torchsummary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NET().to(device)
total_epoch = 10
torchsummary.summary(model, (1, 64, 64), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]             640
              ReLU-2           [-1, 64, 64, 64]               0
            Conv2d-3           [-1, 64, 64, 64]          36,928
              ReLU-4           [-1, 64, 64, 64]               0
         MaxPool2d-5           [-1, 64, 32, 32]               0
            Conv2d-6          [-1, 128, 32, 32]          73,856
              ReLU-7          [-1, 128, 32, 32]               0
            Conv2d-8          [-1, 128, 32, 32]         147,584
              ReLU-9          [-1, 128, 32, 32]               0
        MaxPool2d-10          [-1, 128, 16, 16]               0
           Conv2d-11          [-1, 256, 16, 16]         295,168
             ReLU-12          [-1, 256, 16, 16]               0
           Conv2d-13          [-1, 256, 16, 16]         590,080
             ReLU-14          [-1, 256,

In [8]:
#model = NET().to(device)
#model.load_state_dict(torch.load('./model_5.pth'))
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)

In [9]:
import numpy as np
total_iteration_per_epoch = int(np.ceil(len(train)/batch_size))

for epoch in range(1, total_epoch + 1):
    model.train()
    for iteration, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print('Epoch [{}/{}], iteration [{}/{}] Loss: {:.4f}'.format(epoch, total_epoch, iteration+1, total_iteration_per_epoch, loss.item()))
    torch.save(model.state_dict(), 'model_' + str(epoch) + ".pth")
    
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for input, target in test_loader:
            images = input.to(device)
            labels = target.to(device)
            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += len(labels)
            correct += (predicted == labels).sum().item()

        print('Epoch [{}/{}], Test Accuracy of the model on the {} test images: {} %'.format(epoch, total_epoch, total, 100 * correct / total))

Epoch [1/10], iteration [1/501] Loss: 7.8653
Epoch [1/10], iteration [2/501] Loss: 34.9906
Epoch [1/10], iteration [3/501] Loss: 52.7796
Epoch [1/10], iteration [4/501] Loss: 51.1427
Epoch [1/10], iteration [5/501] Loss: 50.4409
Epoch [1/10], iteration [6/501] Loss: 47.9556
Epoch [1/10], iteration [7/501] Loss: 47.1412
Epoch [1/10], iteration [8/501] Loss: 46.6265
Epoch [1/10], iteration [9/501] Loss: 45.1092
Epoch [1/10], iteration [10/501] Loss: 42.3813
Epoch [1/10], iteration [11/501] Loss: 40.3010
Epoch [1/10], iteration [12/501] Loss: 37.4843
Epoch [1/10], iteration [13/501] Loss: 35.3916
Epoch [1/10], iteration [14/501] Loss: 31.9658
Epoch [1/10], iteration [15/501] Loss: 29.4778
Epoch [1/10], iteration [16/501] Loss: 27.5530
Epoch [1/10], iteration [17/501] Loss: 26.2237
Epoch [1/10], iteration [18/501] Loss: 24.1562
Epoch [1/10], iteration [19/501] Loss: 21.8740
Epoch [1/10], iteration [20/501] Loss: 20.9914
Epoch [1/10], iteration [21/501] Loss: 19.5983
Epoch [1/10], iteration

Epoch [1/10], iteration [177/501] Loss: 7.2353
Epoch [1/10], iteration [178/501] Loss: 7.2632
Epoch [1/10], iteration [179/501] Loss: 7.2854
Epoch [1/10], iteration [180/501] Loss: 7.1083
Epoch [1/10], iteration [181/501] Loss: 7.2463
Epoch [1/10], iteration [182/501] Loss: 7.1578
Epoch [1/10], iteration [183/501] Loss: 7.2414
Epoch [1/10], iteration [184/501] Loss: 7.2470
Epoch [1/10], iteration [185/501] Loss: 7.1710
Epoch [1/10], iteration [186/501] Loss: 7.1698
Epoch [1/10], iteration [187/501] Loss: 7.1697
Epoch [1/10], iteration [188/501] Loss: 7.1680
Epoch [1/10], iteration [189/501] Loss: 7.1455
Epoch [1/10], iteration [190/501] Loss: 7.1589
Epoch [1/10], iteration [191/501] Loss: 7.0815
Epoch [1/10], iteration [192/501] Loss: 7.1554
Epoch [1/10], iteration [193/501] Loss: 7.0172
Epoch [1/10], iteration [194/501] Loss: 7.0171
Epoch [1/10], iteration [195/501] Loss: 6.9331
Epoch [1/10], iteration [196/501] Loss: 6.9752
Epoch [1/10], iteration [197/501] Loss: 7.0257
Epoch [1/10],

Epoch [1/10], iteration [352/501] Loss: 2.8735
Epoch [1/10], iteration [353/501] Loss: 2.9156
Epoch [1/10], iteration [354/501] Loss: 2.9178
Epoch [1/10], iteration [355/501] Loss: 2.9068
Epoch [1/10], iteration [356/501] Loss: 2.8184
Epoch [1/10], iteration [357/501] Loss: 2.7864
Epoch [1/10], iteration [358/501] Loss: 2.8371
Epoch [1/10], iteration [359/501] Loss: 2.7650
Epoch [1/10], iteration [360/501] Loss: 2.6528
Epoch [1/10], iteration [361/501] Loss: 2.6944
Epoch [1/10], iteration [362/501] Loss: 2.6955
Epoch [1/10], iteration [363/501] Loss: 2.6307
Epoch [1/10], iteration [364/501] Loss: 2.6076
Epoch [1/10], iteration [365/501] Loss: 2.6115
Epoch [1/10], iteration [366/501] Loss: 2.6787
Epoch [1/10], iteration [367/501] Loss: 2.6628
Epoch [1/10], iteration [368/501] Loss: 2.6732
Epoch [1/10], iteration [369/501] Loss: 2.4472
Epoch [1/10], iteration [370/501] Loss: 2.5337
Epoch [1/10], iteration [371/501] Loss: 2.6163
Epoch [1/10], iteration [372/501] Loss: 2.4324
Epoch [1/10],

Epoch [2/10], iteration [25/501] Loss: 0.5267
Epoch [2/10], iteration [26/501] Loss: 0.6093
Epoch [2/10], iteration [27/501] Loss: 0.6538
Epoch [2/10], iteration [28/501] Loss: 0.5363
Epoch [2/10], iteration [29/501] Loss: 0.5373
Epoch [2/10], iteration [30/501] Loss: 0.4678
Epoch [2/10], iteration [31/501] Loss: 0.5266
Epoch [2/10], iteration [32/501] Loss: 0.5196
Epoch [2/10], iteration [33/501] Loss: 0.5069
Epoch [2/10], iteration [34/501] Loss: 0.4691
Epoch [2/10], iteration [35/501] Loss: 0.4977
Epoch [2/10], iteration [36/501] Loss: 0.5485
Epoch [2/10], iteration [37/501] Loss: 0.4962
Epoch [2/10], iteration [38/501] Loss: 0.4775
Epoch [2/10], iteration [39/501] Loss: 0.4651
Epoch [2/10], iteration [40/501] Loss: 0.5343
Epoch [2/10], iteration [41/501] Loss: 0.5464
Epoch [2/10], iteration [42/501] Loss: 0.4189
Epoch [2/10], iteration [43/501] Loss: 0.5244
Epoch [2/10], iteration [44/501] Loss: 0.4487
Epoch [2/10], iteration [45/501] Loss: 0.4471
Epoch [2/10], iteration [46/501] L

Epoch [2/10], iteration [201/501] Loss: 0.2390
Epoch [2/10], iteration [202/501] Loss: 0.2458
Epoch [2/10], iteration [203/501] Loss: 0.2279
Epoch [2/10], iteration [204/501] Loss: 0.2235
Epoch [2/10], iteration [205/501] Loss: 0.2028
Epoch [2/10], iteration [206/501] Loss: 0.2497
Epoch [2/10], iteration [207/501] Loss: 0.2627
Epoch [2/10], iteration [208/501] Loss: 0.2202
Epoch [2/10], iteration [209/501] Loss: 0.2285
Epoch [2/10], iteration [210/501] Loss: 0.2302
Epoch [2/10], iteration [211/501] Loss: 0.2688
Epoch [2/10], iteration [212/501] Loss: 0.2626
Epoch [2/10], iteration [213/501] Loss: 0.2568
Epoch [2/10], iteration [214/501] Loss: 0.2636
Epoch [2/10], iteration [215/501] Loss: 0.2179
Epoch [2/10], iteration [216/501] Loss: 0.2214
Epoch [2/10], iteration [217/501] Loss: 0.2194
Epoch [2/10], iteration [218/501] Loss: 0.2605
Epoch [2/10], iteration [219/501] Loss: 0.2255
Epoch [2/10], iteration [220/501] Loss: 0.2282
Epoch [2/10], iteration [221/501] Loss: 0.1948
Epoch [2/10],

Epoch [2/10], iteration [376/501] Loss: 0.1659
Epoch [2/10], iteration [377/501] Loss: 0.2019
Epoch [2/10], iteration [378/501] Loss: 0.1672
Epoch [2/10], iteration [379/501] Loss: 0.1778
Epoch [2/10], iteration [380/501] Loss: 0.1653
Epoch [2/10], iteration [381/501] Loss: 0.1814
Epoch [2/10], iteration [382/501] Loss: 0.2004
Epoch [2/10], iteration [383/501] Loss: 0.1401
Epoch [2/10], iteration [384/501] Loss: 0.1357
Epoch [2/10], iteration [385/501] Loss: 0.1768
Epoch [2/10], iteration [386/501] Loss: 0.1824
Epoch [2/10], iteration [387/501] Loss: 0.2125
Epoch [2/10], iteration [388/501] Loss: 0.1279
Epoch [2/10], iteration [389/501] Loss: 0.1459
Epoch [2/10], iteration [390/501] Loss: 0.1970
Epoch [2/10], iteration [391/501] Loss: 0.1633
Epoch [2/10], iteration [392/501] Loss: 0.1695
Epoch [2/10], iteration [393/501] Loss: 0.1902
Epoch [2/10], iteration [394/501] Loss: 0.1805
Epoch [2/10], iteration [395/501] Loss: 0.2025
Epoch [2/10], iteration [396/501] Loss: 0.1585
Epoch [2/10],

Epoch [3/10], iteration [49/501] Loss: 0.0747
Epoch [3/10], iteration [50/501] Loss: 0.0661
Epoch [3/10], iteration [51/501] Loss: 0.0798
Epoch [3/10], iteration [52/501] Loss: 0.1335
Epoch [3/10], iteration [53/501] Loss: 0.0694
Epoch [3/10], iteration [54/501] Loss: 0.0816
Epoch [3/10], iteration [55/501] Loss: 0.1013
Epoch [3/10], iteration [56/501] Loss: 0.0905
Epoch [3/10], iteration [57/501] Loss: 0.0862
Epoch [3/10], iteration [58/501] Loss: 0.0617
Epoch [3/10], iteration [59/501] Loss: 0.0881
Epoch [3/10], iteration [60/501] Loss: 0.0624
Epoch [3/10], iteration [61/501] Loss: 0.1016
Epoch [3/10], iteration [62/501] Loss: 0.0660
Epoch [3/10], iteration [63/501] Loss: 0.0758
Epoch [3/10], iteration [64/501] Loss: 0.1046
Epoch [3/10], iteration [65/501] Loss: 0.0810
Epoch [3/10], iteration [66/501] Loss: 0.0668
Epoch [3/10], iteration [67/501] Loss: 0.0648
Epoch [3/10], iteration [68/501] Loss: 0.0841
Epoch [3/10], iteration [69/501] Loss: 0.1016
Epoch [3/10], iteration [70/501] L

Epoch [3/10], iteration [225/501] Loss: 0.0999
Epoch [3/10], iteration [226/501] Loss: 0.1049
Epoch [3/10], iteration [227/501] Loss: 0.1017
Epoch [3/10], iteration [228/501] Loss: 0.0994
Epoch [3/10], iteration [229/501] Loss: 0.1196
Epoch [3/10], iteration [230/501] Loss: 0.0999
Epoch [3/10], iteration [231/501] Loss: 0.0671
Epoch [3/10], iteration [232/501] Loss: 0.1379
Epoch [3/10], iteration [233/501] Loss: 0.0717
Epoch [3/10], iteration [234/501] Loss: 0.1125
Epoch [3/10], iteration [235/501] Loss: 0.0720
Epoch [3/10], iteration [236/501] Loss: 0.0750
Epoch [3/10], iteration [237/501] Loss: 0.1007
Epoch [3/10], iteration [238/501] Loss: 0.1032
Epoch [3/10], iteration [239/501] Loss: 0.0649
Epoch [3/10], iteration [240/501] Loss: 0.0751
Epoch [3/10], iteration [241/501] Loss: 0.1319
Epoch [3/10], iteration [242/501] Loss: 0.1014
Epoch [3/10], iteration [243/501] Loss: 0.0949
Epoch [3/10], iteration [244/501] Loss: 0.0926
Epoch [3/10], iteration [245/501] Loss: 0.0631
Epoch [3/10],

Epoch [3/10], iteration [400/501] Loss: 0.0799
Epoch [3/10], iteration [401/501] Loss: 0.1301
Epoch [3/10], iteration [402/501] Loss: 0.0976
Epoch [3/10], iteration [403/501] Loss: 0.1285
Epoch [3/10], iteration [404/501] Loss: 0.0925
Epoch [3/10], iteration [405/501] Loss: 0.0977
Epoch [3/10], iteration [406/501] Loss: 0.0812
Epoch [3/10], iteration [407/501] Loss: 0.1274
Epoch [3/10], iteration [408/501] Loss: 0.1180
Epoch [3/10], iteration [409/501] Loss: 0.0764
Epoch [3/10], iteration [410/501] Loss: 0.1381
Epoch [3/10], iteration [411/501] Loss: 0.1051
Epoch [3/10], iteration [412/501] Loss: 0.1685
Epoch [3/10], iteration [413/501] Loss: 0.1152
Epoch [3/10], iteration [414/501] Loss: 0.1359
Epoch [3/10], iteration [415/501] Loss: 0.0917
Epoch [3/10], iteration [416/501] Loss: 0.0709
Epoch [3/10], iteration [417/501] Loss: 0.1141
Epoch [3/10], iteration [418/501] Loss: 0.1529
Epoch [3/10], iteration [419/501] Loss: 0.1050
Epoch [3/10], iteration [420/501] Loss: 0.0907
Epoch [3/10],

KeyboardInterrupt: 