In [1]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms


transform = transforms.Compose([transforms.Resize((32, 32)), # (h, w) 순서
            #transforms.ToPILImage(),
            transforms.Grayscale(),
            transforms.ToTensor(),
            ])

train = ImageFolder(root="./_data/syllable/", transform=transform, target_transform=None)
test = ImageFolder(root="./_data/syllable_test/", transform=transform, target_transform=None)

In [28]:
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

batch_size = 32
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=batch_size)

for X, y in train_loader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    break

Shape of X [N, C, H, W]: torch.Size([32, 1, 32, 32])


In [29]:
import torch
import torch.nn as nn
from sklearn.model_selection import KFold

def make_layer(config=[64, 64, 'M', 128, 128, 128, 'M', 256, 256, 'M', 512, 512, 'B', 'M']):
    layers = []
    in_channel = 1
    for out_channel in config:
        if out_channel == "M":
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        elif out_channel == 'B':
            layers.append(nn.Conv2d(in_channel, in_channel, kernel_size=3, padding=1, bias=False))
            layers.append(nn.BatchNorm2d(in_channel))
            layers.append(nn.ReLU(inplace=True))
        else:
            layers.append(nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1))
            layers.append(nn.ReLU(inplace=True))
            in_channel = out_channel
    
    #layers.append(nn.AdaptiveAvgPool2d((2, 2)))
    return nn.Sequential(*layers)

class NET(nn.Module):
    
    def __init__(self, num_classes=2350):
        super(NET, self).__init__()
        self.features = make_layer()
        
        # ImageNet
        self.classifier = nn.Sequential(
            nn.Linear(2048, num_classes),
        )
        
    def forward(self, x):
        out = self.features(x)
        out = torch.flatten(out,1)
        out = self.classifier(out)
        return out

In [30]:
import torchsummary
from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset

model = NET()
total_epoch = 10

k = 3
splits = KFold(n_splits=k, shuffle=True, random_state=42)
dataset = ConcatDataset([train_loader, test_loader])

torchsummary.summary(model, (1, 32, 32), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]             640
              ReLU-2           [-1, 64, 32, 32]               0
            Conv2d-3           [-1, 64, 32, 32]          36,928
              ReLU-4           [-1, 64, 32, 32]               0
         MaxPool2d-5           [-1, 64, 16, 16]               0
            Conv2d-6          [-1, 128, 16, 16]          73,856
              ReLU-7          [-1, 128, 16, 16]               0
            Conv2d-8          [-1, 128, 16, 16]         147,584
              ReLU-9          [-1, 128, 16, 16]               0
           Conv2d-10          [-1, 128, 16, 16]         147,584
             ReLU-11          [-1, 128, 16, 16]               0
        MaxPool2d-12            [-1, 128, 8, 8]               0
           Conv2d-13            [-1, 256, 8, 8]         295,168
             ReLU-14            [-1, 25

In [31]:
model = NET().to(device)
#model.load_state_dict(torch.load('./model_5.pth'))

In [35]:
def train_epoch(model,device,dataloader,loss_fn,optimizer):
    train_loss,train_correct=0.0,0
    model.train()
    for images, labels in dataloader:
        images,labels = images.to(device),labels.to(device)
        optimizer.zero_grad()
        output = model(images)
        loss = loss_fn(output,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(output.data, 1)
        train_correct += (predictions == labels).sum().item()

    return train_loss,train_correct
  
def valid_epoch(model,device,dataloader,loss_fn):
    valid_loss, val_correct = 0.0, 0
    model.eval()
    for images, labels in dataloader:

        images,labels = images.to(device),labels.to(device)
        output = model(images)
        loss=loss_fn(output,labels)
        valid_loss+=loss.item()*images.size(0)
        scores, predictions = torch.max(output.data,1)
        val_correct+=(predictions == labels).sum().item()

    return valid_loss,val_correct

In [36]:
for fold, (train_idx,val_idx) in enumerate(splits.split(np.arange(len(dataset)))):

    print('Fold {}'.format(fold + 1))

    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(val_idx)
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
        
    model = NET()
    model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    history = {'train_loss': [], 'test_loss': [],'train_acc':[],'test_acc':[]}

    for epoch in range(total_epoch):
        train_loss, train_correct=train_epoch(model,device,train_loader,criterion,optimizer)
        test_loss, test_correct=valid_epoch(model,device,test_loader,criterion)

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100
        test_loss = test_loss / len(test_loader.sampler)
        test_acc = test_correct / len(test_loader.sampler) * 100

        print("Epoch:{}/{} AVG Training Loss:{:.3f} AVG Test Loss:{:.3f} AVG Training Acc {:.2f} % AVG Test Acc {:.2f} %".format(epoch + 1,
                                                                                                             num_epochs,
                                                                                                             train_loss,
                                                                                                             test_loss,
                                                                                                             train_acc,
                                                                                                             test_acc))
        history['train_loss'].append(train_loss)
        history['test_loss'].append(test_loss)
        history['train_acc'].append(train_acc)
        history['test_acc'].append(test_acc)

    foldperf['fold{}'.format(fold+1)] = history  

torch.save(model.state_dict(), 'new_model_10' + str(epoch) + ".pth")

Fold 1


TypeError: 'DataLoader' object is not subscriptable

In [11]:
total_iteration_per_epoch = int(np.ceil(len(train)/batch_size))

for epoch in range(4, total_epoch + 1):
    model.train()
    for iteration, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print('Epoch [{}/{}], iteration [{}/{}] Loss: {:.4f}'.format(epoch, total_epoch, iteration+1, total_iteration_per_epoch, loss.item()))
    torch.save(model.state_dict(), 'new_model_' + str(epoch) + ".pth")
    
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for input, target in test_loader:
            images = input.to(device)
            labels = target.to(device)
            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += len(labels)
            correct += (predicted == labels).sum().item()

        print('Epoch [{}/{}], Test Accuracy of the model on the {} test images: {} %'.format(epoch, total_epoch, total, 100 * correct / total))

Epoch [4/10], iteration [1/147] Loss: 7.7608
Epoch [4/10], iteration [2/147] Loss: 7.7569
Epoch [4/10], iteration [3/147] Loss: 7.7708
Epoch [4/10], iteration [4/147] Loss: 7.7930
Epoch [4/10], iteration [5/147] Loss: 7.7553
Epoch [4/10], iteration [6/147] Loss: 7.7638
Epoch [4/10], iteration [7/147] Loss: 7.7738
Epoch [4/10], iteration [8/147] Loss: 7.7797
Epoch [4/10], iteration [9/147] Loss: 7.7545
Epoch [4/10], iteration [10/147] Loss: 7.7763
Epoch [4/10], iteration [11/147] Loss: 7.7817
Epoch [4/10], iteration [12/147] Loss: 7.7928
Epoch [4/10], iteration [13/147] Loss: 7.7663
Epoch [4/10], iteration [14/147] Loss: 7.7835
Epoch [4/10], iteration [15/147] Loss: 7.8249
Epoch [4/10], iteration [16/147] Loss: 7.7926
Epoch [4/10], iteration [17/147] Loss: 7.7567
Epoch [4/10], iteration [18/147] Loss: 7.7729
Epoch [4/10], iteration [19/147] Loss: 7.7664
Epoch [4/10], iteration [20/147] Loss: 7.7964
Epoch [4/10], iteration [21/147] Loss: 7.8024
Epoch [4/10], iteration [22/147] Loss: 7.76

Epoch [5/10], iteration [30/147] Loss: 7.7529
Epoch [5/10], iteration [31/147] Loss: 7.7667
Epoch [5/10], iteration [32/147] Loss: 7.7691
Epoch [5/10], iteration [33/147] Loss: 7.7313
Epoch [5/10], iteration [34/147] Loss: 7.7650
Epoch [5/10], iteration [35/147] Loss: 7.7620
Epoch [5/10], iteration [36/147] Loss: 7.7749
Epoch [5/10], iteration [37/147] Loss: 7.7692
Epoch [5/10], iteration [38/147] Loss: 7.7464
Epoch [5/10], iteration [39/147] Loss: 7.7635
Epoch [5/10], iteration [40/147] Loss: 7.8024
Epoch [5/10], iteration [41/147] Loss: 7.7855
Epoch [5/10], iteration [42/147] Loss: 7.7851
Epoch [5/10], iteration [43/147] Loss: 7.7585
Epoch [5/10], iteration [44/147] Loss: 7.7686
Epoch [5/10], iteration [45/147] Loss: 7.7755
Epoch [5/10], iteration [46/147] Loss: 7.7886
Epoch [5/10], iteration [47/147] Loss: 7.7742
Epoch [5/10], iteration [48/147] Loss: 7.7605
Epoch [5/10], iteration [49/147] Loss: 7.7769
Epoch [5/10], iteration [50/147] Loss: 7.7804
Epoch [5/10], iteration [51/147] L

KeyboardInterrupt: 