In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### 데이터 분할을 위한 폴더 생성

In [None]:
import os
import shutil
 
# 원본 데이터의 위치
original_dataset_dir = '/content/drive/MyDrive/Data/plat_leaf'
# os.listdir() = 해당 경로 하위에 있는 모든 폴더의 목록을 가져오는 메서드
classes_list = os.listdir(original_dataset_dir) 
 
base_dir = './splitted' 
os.mkdir(base_dir)

# train, val, test 각 폴더 생성
train_dir = os.path.join(base_dir, 'train') 
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

for cls in classes_list:     
    os.mkdir(os.path.join(train_dir, cls))
    os.mkdir(os.path.join(validation_dir, cls))
    os.mkdir(os.path.join(test_dir, cls))

### 데이터 분할과 클래스별 데이터 수 확인

In [None]:
import math

for cls in classes_list:
    path = os.path.join(original_dataset_dir, cls)
    fnames = os.listdir(path)
 
    train_size = math.floor(len(fnames) * 0.6)
    validation_size = math.floor(len(fnames) * 0.2)
    test_size = math.floor(len(fnames) * 0.2)
    
    train_fnames = fnames[:train_size]
    print("Train size(",cls,"): ", len(train_fnames))
    for fname in train_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(train_dir, cls), fname)
        shutil.copyfile(src, dst)
        
    validation_fnames = fnames[train_size:(validation_size + train_size)]
    print("Validation size(",cls,"): ", len(validation_fnames))
    for fname in validation_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(validation_dir, cls), fname)
        shutil.copyfile(src, dst)
        
    test_fnames = fnames[(train_size+validation_size):(validation_size + train_size +test_size)]

    print("Test size(",cls,"): ", len(test_fnames))
    for fname in test_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(test_dir, cls), fname)
        shutil.copyfile(src, dst)

Train size( Corn___Northern_Leaf_Blight ):  591
Validation size( Corn___Northern_Leaf_Blight ):  197
Test size( Corn___Northern_Leaf_Blight ):  197
Train size( Corn___healthy ):  697
Validation size( Corn___healthy ):  232
Test size( Corn___healthy ):  232
Train size( Apple___healthy ):  987
Validation size( Apple___healthy ):  329
Test size( Apple___healthy ):  329
Train size( Corn___Common_rust ):  715
Validation size( Corn___Common_rust ):  238
Test size( Corn___Common_rust ):  238
Train size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  307
Validation size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  102
Test size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  102
Train size( Apple___Black_rot ):  372
Validation size( Apple___Black_rot ):  124
Test size( Apple___Black_rot ):  124
Train size( Apple___Apple_scab ):  378
Validation size( Apple___Apple_scab ):  126
Test size( Apple___Apple_scab ):  126
Train size( Cherry___Powdery_mildew ):  631
Validation size( Cherry___Powdery_

### 베이스라인 모델 학습을 위한 준비

In [None]:
import time
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.datasets
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')

In [None]:
# cuda 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Current cuda device is', device)

Current cuda device is cuda


In [None]:
batch_size = 256
epochs = 30
learning_rate = 0.001

In [None]:
transform_base = transforms.Compose([transforms.Resize((64, 64)),
                                     transforms.ToTensor()])

In [None]:
train_dataset = ImageFolder(root='./splitted/train',
                            transform=transform_base)

val_dataset = ImageFolder(root='./splitted/val',
                          transform=transform_base)

In [None]:
# mini-batch 설정
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=4)

val_loader = DataLoader(dataset=val_dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=4)

### 베이스라인 모델 설계

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        self.fc1 = nn.Linear(4096, 512)
        self.fc2 = nn.Linear(512, 33)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = x.view(-1, 4096)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)

        return F.log_softmax(x, dim=1)

model_base = Net().to(device)
print(model_base)

Net(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=4096, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=33, bias=True)
)


In [None]:
# optimizer 및 손실 함수 설정
optimizer = optim.Adam(model_base.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

### 모델 학습을 위한 함수

In [None]:
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

### 모델 평가를 위한 함수

In [None]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100 * correct / len(test_loader.dataset)
    
    return test_loss, test_accuracy

### 모델 학습 실행하기

In [None]:
def train_baseline(model, train_loader, val_loader, oprimizer, num_epochs=epochs):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(1, num_epochs + 1):
        since = time.time()
        train(model, train_loader, optimizer)
        train_loss, train_acc = evaluate(model, train_loader)
        val_loss, val_acc = evaluate(model, val_loader)
        
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since

        print('---------- epoch {} ----------'.format(epoch))
        
        print('train Loss: {:.4f}, Accuracy: {:.2f}%'.format(train_loss, train_acc))
        print('val Loss: {:.4f}, Accuracy: {:.2f}%'.format(val_loss, val_acc))
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    model.load_state_dict(best_model_wts)

    return model

base = train_baseline(model_base, train_loader, val_loader, optimizer, epochs)

torch.save(base, 'baseline.pt')

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


---------- epoch 1 ----------
train Loss: 1.8567, Accuracy: 48.78%
val Loss: 1.8659, Accuracy: 48.55%
Completed in 0m 42s
---------- epoch 2 ----------
train Loss: 1.0138, Accuracy: 70.05%
val Loss: 1.0263, Accuracy: 69.05%
Completed in 0m 44s
---------- epoch 3 ----------
train Loss: 0.7153, Accuracy: 78.90%
val Loss: 0.7398, Accuracy: 77.45%
Completed in 0m 43s
---------- epoch 4 ----------
train Loss: 0.6222, Accuracy: 80.41%
val Loss: 0.6561, Accuracy: 79.44%
Completed in 0m 43s
---------- epoch 5 ----------
train Loss: 0.4895, Accuracy: 84.98%
val Loss: 0.5392, Accuracy: 82.74%
Completed in 0m 43s
---------- epoch 6 ----------
train Loss: 0.4225, Accuracy: 86.36%
val Loss: 0.4757, Accuracy: 84.42%
Completed in 0m 42s
---------- epoch 7 ----------
train Loss: 0.3640, Accuracy: 88.36%
val Loss: 0.4197, Accuracy: 86.27%
Completed in 0m 42s
---------- epoch 8 ----------
train Loss: 0.3286, Accuracy: 89.75%
val Loss: 0.4035, Accuracy: 86.75%
Completed in 0m 42s
---------- epoch 9 -----