https://www.kaggle.com/misrakahmed/vegetable-image-dataset

데이터를 분류하는 데에 여러 모델들을 사용해 보고 성능, 학습 시간 비교

In [2]:
# 필수 코드

import torch
import torchvision
import visdom

vis = visdom.Visdom()
vis.close(env='main')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.random.manual_seed(0)
if device == 'cuda':
    torch.cuda.manual_seed_all(0)

Setting up a new session...


In [3]:
# hyperparameters
batch_size = 256
learning_rate = 1e-4
epoch = 15

image_size = 56
class_num = 15
class_name = ['Bean', 'Bitter_Gourd', 'Bottle_Gourd', 'Brinjal', 'Broccoli', 'Cabbage', 'Capsicum', 'Carrot', 'Cauliflower', 'Cucumber', 'Papaya', 'Potato', 'Pumpkin', 'Radish', 'Tomato']

In [4]:
# train data loader를 반환함
def get_train_data_loaders(resize = False, rotate = False):
    transforms = []

    if rotate:
        transforms.append(torchvision.transforms.transforms.RandomRotation(360))

    if resize:
        transforms.append(torchvision.transforms.RandomResizedCrop((image_size, image_size)))
        
    transforms.append(torchvision.transforms.Resize((image_size, image_size)))
    transforms.append(torchvision.transforms.ToTensor())
    transform = torchvision.transforms.Compose(transforms)

    train_data = torchvision.datasets.ImageFolder(root='train', transform=transform)
    train_data_loader =torch.utils.data.DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)

    return train_data_loader

In [5]:
# test data loader와 validation data loader를 반환함
def get_test_validation_data_loaders():
    transform = torchvision.transforms.Compose([torchvision.transforms.Resize((image_size, image_size)), torchvision.transforms.ToTensor()])
    test_data = torchvision.datasets.ImageFolder(root='test', transform=transform)
    validation_data = torchvision.datasets.ImageFolder(root='validation', transform=transform)

    test_data_loader = torch.utils.data.DataLoader(dataset = test_data, batch_size=batch_size, shuffle=False, drop_last=True, num_workers=4)
    validation_data_loader =torch.utils.data.DataLoader(dataset = validation_data, batch_size=batch_size, shuffle=False, drop_last=True, num_workers=4)

    return test_data_loader, validation_data_loader

In [6]:
def test_model(model, data_loader):
    with torch.no_grad():
        accuracy = 0
        for X, Y in data_loader:
            X = X.to(device)
            Y = Y.to(device)
            prediction = model(X)
            correct_prediction = torch.argmax(prediction, dim=1) == Y
            accuracy += correct_prediction.float().mean()
        accuracy /= len(data_loader)
        print("accuracy : {:.4f}%".format(accuracy.item() * 100))
    return accuracy.item() * 100

In [7]:
#model을 data_loader로 epoch만큼 학습함, print_loss가 True이면 trainloss를 출력함
def train_model(model, data_loader, test_loader, epochs, print_loss=False):

    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    total_batch = len(data_loader)

    if print_loss:
        loss_plot = vis.line(Y=torch.Tensor(1).zero_(),opts=dict(title='loss_tracker', legend=['loss'], showlegend=True), env='main')
        acc_plot = vis.line(Y=torch.Tensor(1).zero_(),opts=dict(title='accuracy_tracker', legend=['accuracy'], showlegend=True), env='main')

    for epoch in range(epochs):
        avg_cost = 0.0
        for X, Y in data_loader:
            X = X.to(device)
            Y = Y.to(device)
            output = model(X)
            loss = criterion(output, Y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            avg_cost += loss/total_batch
        
        if print_loss:
            print('[EPOCH:{:3d}] cost:{:.5f}'.format(epoch + 1, avg_cost))
            vis.line(X=torch.Tensor([epoch]), Y=torch.Tensor([avg_cost]), win=loss_plot, update="append")
            acc = test_model(model, test_loader)
            vis.line(X=torch.Tensor([epoch]), Y=torch.Tensor([acc]), win=loss_plot, update="append")

In [8]:
# Fully connected model
class FCmodel(torch.nn.Module):
    def __init__(self, config):
        super().__init__()
        layer_list = []
        in_size = config[0]
        for i in range(1, len(config) - 1):
            out_size = config[i]
            layer_list.append(torch.nn.Linear(in_size, out_size, bias=True))
            layer_list.append(torch.nn.ReLU())
            in_size = out_size
        
        out_size = config[-1]
        layer_list.append(torch.nn.Linear(in_size, out_size, bias=True))

        self.layers = torch.nn.Sequential(*layer_list)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.layers(x)

In [9]:
# CNN model
class CNN(torch.nn.Module):
    def __init__(self, config, config_fc = None):
        super().__init__()
        layer_list = []
        channel_in_size = 3
        in_size = image_size
        for cont in config:
            if cont == 'M':
                layer_list.append(torch.nn.MaxPool2d(2))
                in_size = in_size // 2
            else:
                channel, kernel_size, padding = cont
                layer_list.append(torch.nn.Conv2d(channel_in_size, channel, kernel_size, padding=padding))
                layer_list.append(torch.nn.ReLU())
                channel_in_size = channel
                in_size = in_size - kernel_size + 2 * padding + 1
        
        self.layers = torch.nn.Sequential(*layer_list)

        if config_fc == None:
            self.fc = FCmodel([in_size * in_size * channel_in_size, 128, 32, class_num])
        else:
            self.fc = FCmodel(config_fc)
        
    def forward(self, x):
        x = self.layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [10]:
# VGG model
class VGG(torch.nn.Module):
    def __init__(self, config, init_weights=True):
        super(VGG, self).__init__()
        
        channel_in_size = 3
        layer_list = []
        for cont in config:
            if cont == 'M':
                layer_list.append(torch.nn.MaxPool2d(2))
            else:
                layer_list.append(torch.nn.Conv2d(channel_in_size, cont, 3, padding=1))
                layer_list.append(torch.nn.ReLU())
                channel_in_size = cont
        
        self.features = torch.nn.Sequential(*layer_list)

        
        self.avgpool = torch.nn.AdaptiveAvgPool2d((7, 7))
        
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(channel_in_size * 7 * 7, 512),
            torch.nn.ReLU(True),
            torch.nn.Dropout(),
            torch.nn.Linear(512, 128),
            torch.nn.ReLU(True),
            torch.nn.Dropout(),
            torch.nn.Linear(128, class_num),
        )#FC layer
        
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x) #Convolution 
        x = self.avgpool(x) # avgpool
        x = x.view(x.size(0), -1) #
        x = self.classifier(x) #FC layer
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, torch.nn.BatchNorm2d):
                torch.nn.init.constant_(m.weight, 1)
                torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, torch.nn.Linear):
                torch.nn.init.normal_(m.weight, 0, 0.01)
                torch.nn.init.constant_(m.bias, 0)

In [11]:
#model1 = FCmodel([image_size*image_size*3, 2048, 512, 128, class_num]).to(device)
#model2 = CNN([(32, 3, 0), 'M', (64, 3, 0), 'M', (128, 3, 0), 'M']).to(device)
model3 = VGG([64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512]).to(device)
print(model3)
print(model3)
train_loader = get_train_data_loaders()
test_loader, valid_loader = get_test_validation_data_loaders()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (13): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (class

In [12]:
train_model(model3, train_loader, test_loader, epoch, print_loss=True)

KeyboardInterrupt: 

In [None]:
test_model(model2, train_loader)

test_model(model2, test_loader)
test_model(model2, valid_loader)

NameError: name 'model2' is not defined