In [3]:
import torch
import torch.nn as nn
from sklearn.model_selection import KFold

def make_layer(config=[64, 64, 'M', 128, 128, 128, 'M', 256, 256, 'M', 512, 512, 'B', 'M']):
    layers = []
    in_channel = 1
    for out_channel in config:
        if out_channel == "M":
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        elif out_channel == 'B':
            layers.append(nn.Conv2d(in_channel, in_channel, kernel_size=3, padding=1, bias=False))
            layers.append(nn.BatchNorm2d(in_channel))
            layers.append(nn.ReLU(inplace=True))
        else:
            layers.append(nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1))
            layers.append(nn.ReLU(inplace=True))
            in_channel = out_channel
    
    #layers.append(nn.AdaptiveAvgPool2d((2, 2)))
    return nn.Sequential(*layers)

class NET(nn.Module):
    
    def __init__(self, num_classes=2350):
        super(NET, self).__init__()
        self.features = make_layer()
        
        # ImageNet
        self.classifier = nn.Sequential(
            nn.Linear(2048, num_classes),
        )
        
    def forward(self, x):
        out = self.features(x)
        out = torch.flatten(out,1)
        out = self.classifier(out)
        return out

In [1]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms


transform = transforms.Compose([transforms.Resize((32, 32)), # (h, w) 순서
            #transforms.ToPILImage(),
            transforms.Grayscale(),
            transforms.ToTensor(),
            ])

train = ImageFolder(root="./_data/syllable/", transform=transform, target_transform=None)
test = ImageFolder(root="./_data/syllable_test/", transform=transform, target_transform=None)

In [4]:
import torchsummary
from torch.utils.data import Dataset,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset

model = NET()
total_epoch = 10

k = 3
splits = KFold(n_splits=k, shuffle=True, random_state=42)
dataset = ConcatDataset([train, test])

torchsummary.summary(model, (1, 32, 32), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]             640
              ReLU-2           [-1, 64, 32, 32]               0
            Conv2d-3           [-1, 64, 32, 32]          36,928
              ReLU-4           [-1, 64, 32, 32]               0
         MaxPool2d-5           [-1, 64, 16, 16]               0
            Conv2d-6          [-1, 128, 16, 16]          73,856
              ReLU-7          [-1, 128, 16, 16]               0
            Conv2d-8          [-1, 128, 16, 16]         147,584
              ReLU-9          [-1, 128, 16, 16]               0
           Conv2d-10          [-1, 128, 16, 16]         147,584
             ReLU-11          [-1, 128, 16, 16]               0
        MaxPool2d-12            [-1, 128, 8, 8]               0
           Conv2d-13            [-1, 256, 8, 8]         295,168
             ReLU-14            [-1, 25

In [5]:
#model = NET().to(device)
#model.load_state_dict(torch.load('./model_5.pth'))

In [6]:
def train_epoch(model,device,dataloader,loss_fn,optimizer, total_iteration_per_epoch):
    train_loss,train_correct=0.0,0
    model.train()
    for iteration, (images, labels) in enumerate(train_loader):
        images,labels = images.to(device),labels.to(device)
        optimizer.zero_grad()
        output = model(images)
        loss = loss_fn(output,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(output.data, 1)
        train_correct += (predictions == labels).sum().item()
        print('iteration [{}/{}] Loss: {:.4f}'.format(iteration+1, total_iteration_per_epoch, loss.item()))
        
    return train_loss,train_correct
  
def valid_epoch(model,device,dataloader,loss_fn):
    valid_loss, val_correct = 0.0, 0
    model.eval()
    for images, labels in dataloader:

        images,labels = images.to(device),labels.to(device)
        output = model(images)
        loss=loss_fn(output,labels)
        valid_loss+=loss.item()*images.size(0)
        scores, predictions = torch.max(output.data,1)
        val_correct+=(predictions == labels).sum().item()

    return valid_loss,val_correct

In [None]:
for fold, (train_idx,val_idx) in enumerate(splits.split(np.arange(len(dataset)))):

    print('Fold {}'.format(fold + 1))

    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(val_idx)
    
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    model = NET()
    model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
    total_iteration_per_epoch = int(np.ceil(len(train)/batch_size))

    history = {'train_loss': [], 'test_loss': [],'train_acc':[],'test_acc':[]}

    for epoch in range(total_epoch):
        train_loss, train_correct=train_epoch(model,device,train_loader,criterion,optimizer, total_iteration_per_epoch)
        test_loss, test_correct=valid_epoch(model,device,test_loader,criterion)

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100
        test_loss = test_loss / len(test_loader.sampler)
        test_acc = test_correct / len(test_loader.sampler) * 100

        print("Epoch:{}/{} AVG Training Loss:{:.3f} AVG Test Loss:{:.3f} AVG Training Acc {:.2f} % AVG Test Acc {:.2f} %".format(epoch + 1,
                                                                                                             num_epochs,
                                                                                                             train_loss,
                                                                                                             test_loss,
                                                                                                             train_acc,
                                                                                                             test_acc))
        history['train_loss'].append(train_loss)
        history['test_loss'].append(test_loss)
        history['train_acc'].append(train_acc)
        history['test_acc'].append(test_acc)

    foldperf['fold{}'.format(fold+1)] = history  

torch.save(model.state_dict(), 'new_model_10' + str(epoch) + ".pth")

Fold 1
iteration [1/294] Loss: 7.8855
iteration [2/294] Loss: 38.5708
iteration [3/294] Loss: 38.3186
iteration [4/294] Loss: 33.8886
iteration [5/294] Loss: 37.8646
iteration [6/294] Loss: 39.7794
iteration [7/294] Loss: 43.2154
iteration [8/294] Loss: 45.2724
iteration [9/294] Loss: 40.4005
iteration [10/294] Loss: 40.2954
iteration [11/294] Loss: 35.1984
iteration [12/294] Loss: 36.5220
iteration [13/294] Loss: 32.7782
iteration [14/294] Loss: 33.0174
iteration [15/294] Loss: 32.4663
iteration [16/294] Loss: 28.8667
iteration [17/294] Loss: 28.7435
iteration [18/294] Loss: 26.0019
iteration [19/294] Loss: 27.9787
iteration [20/294] Loss: 27.3793
iteration [21/294] Loss: 23.8304
iteration [22/294] Loss: 25.2063
iteration [23/294] Loss: 25.0864
iteration [24/294] Loss: 21.0367
iteration [25/294] Loss: 22.0966
iteration [26/294] Loss: 19.7100
iteration [27/294] Loss: 17.3887
iteration [28/294] Loss: 17.7266
iteration [29/294] Loss: 17.4536
iteration [30/294] Loss: 16.7856
iteration [31