In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### 데이터 분할을 위한 폴더 생성

In [2]:
import os
import shutil
 
# 원본 데이터의 위치
original_dataset_dir = '/content/drive/MyDrive/Data/plat_leaf'
# os.listdir() = 해당 경로 하위에 있는 모든 폴더의 목록을 가져오는 메서드
classes_list = os.listdir(original_dataset_dir) 
 
base_dir = './splitted' 
os.mkdir(base_dir)

# train, val, test 각 폴더 생성
train_dir = os.path.join(base_dir, 'train') 
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

for cls in classes_list:     
    os.mkdir(os.path.join(train_dir, cls))
    os.mkdir(os.path.join(validation_dir, cls))
    os.mkdir(os.path.join(test_dir, cls))

### 데이터 분할과 클래스별 데이터 수 확인

In [3]:
import math

for cls in classes_list:
    path = os.path.join(original_dataset_dir, cls)
    fnames = os.listdir(path)
 
    train_size = math.floor(len(fnames) * 0.6)
    validation_size = math.floor(len(fnames) * 0.2)
    test_size = math.floor(len(fnames) * 0.2)
    
    train_fnames = fnames[:train_size]
    print("Train size(",cls,"): ", len(train_fnames))
    for fname in train_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(train_dir, cls), fname)
        shutil.copyfile(src, dst)
        
    validation_fnames = fnames[train_size:(validation_size + train_size)]
    print("Validation size(",cls,"): ", len(validation_fnames))
    for fname in validation_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(validation_dir, cls), fname)
        shutil.copyfile(src, dst)
        
    test_fnames = fnames[(train_size+validation_size):(validation_size + train_size +test_size)]

    print("Test size(",cls,"): ", len(test_fnames))
    for fname in test_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(test_dir, cls), fname)
        shutil.copyfile(src, dst)

Train size( Corn___Northern_Leaf_Blight ):  591
Validation size( Corn___Northern_Leaf_Blight ):  197
Test size( Corn___Northern_Leaf_Blight ):  197
Train size( Corn___healthy ):  697
Validation size( Corn___healthy ):  232
Test size( Corn___healthy ):  232
Train size( Apple___healthy ):  987
Validation size( Apple___healthy ):  329
Test size( Apple___healthy ):  329
Train size( Corn___Common_rust ):  715
Validation size( Corn___Common_rust ):  238
Test size( Corn___Common_rust ):  238
Train size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  307
Validation size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  102
Test size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  102
Train size( Apple___Black_rot ):  372
Validation size( Apple___Black_rot ):  124
Test size( Apple___Black_rot ):  124
Train size( Apple___Apple_scab ):  378
Validation size( Apple___Apple_scab ):  126
Test size( Apple___Apple_scab ):  126
Train size( Cherry___Powdery_mildew ):  631
Validation size( Cherry___Powdery_

In [4]:
import time
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.datasets
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torchvision import models

import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')

In [5]:
from torch.types import Device
# cuda 설정
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Current cuda device is', DEVICE)

Current cuda device is cuda


In [6]:
BATCH_SIZE = 256
EPOCHS = 30
LEARNING_RATE = 0.001

### 모델 평가를 위한 함수

In [22]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100 * correct / len(test_loader.dataset)
    
    return test_loss, test_accuracy

### Transfer Learning을 위한 준비

In [7]:
data_transforms = {
    'train' : transforms.Compose([
                                  transforms.Resize([64, 64]),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomVerticalFlip(),
                                  transforms.RandomCrop(52),
                                  transforms.ToTensor(),
                                  transforms.Normalize([0.485, 0.456, 0.406],
                                                       [0.299, 0.224, 0.255])
    ]),
    'val' : transforms.Compose([
                                transforms.Resize([64, 64]),
                                transforms.RandomCrop(52),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406],
                                                     [0.299, 0.224, 0.255])
    ])
}

In [8]:
data_dir = './splitted'

image_datasets = {x : ImageFolder(root=os.path.join(data_dir, x),
                                  transform=data_transforms[x]) for x in ['train', 'val']}

dataloaders = {x : torch.utils.data.DataLoader(image_datasets[x],
                                               batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train', 'val']}

dataset_sizes = {x : len(image_datasets[x]) for x in ['train', 'val']}

class_names = image_datasets['train'].classes

### Pre-Trainde Model 불러오기

In [9]:
resnet = models.resnet50(pretrained=True)

num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 33)
resnet = resnet.to(DEVICE)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [10]:
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [11]:
criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.Adam(filter(lambda p : p.requires_grad,
                                 resnet.parameters()), lr=LEARNING_RATE)

In [12]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

### Pre-Trainde Model의 일부 Layer Freeze하기

In [13]:
ct=0
for child in resnet.children():
    ct += 1
    if ct < 6:
        for param in child.parameters():
            param.requires_grad = False

### Transfer Learning 모델 학습과 검증을 위한 함수

In [14]:
def train_resnet(model, criterion, optimizer, scheduler, num_epochs=25):

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('--------------------EPOCH {}--------------------'.format(epoch+1))
        since = time.time()

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            
            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()
                l_r = [x['lr'] for x in optimizer_ft.param_groups]
                print('learning rate: ', l_r)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} ACC: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    
    return model

In [15]:
model_resnet50 = train_resnet(resnet, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=EPOCHS)

torch.save(model_resnet50, 'resnet50.pt')

--------------------EPOCH 1--------------------
learning rate:  [0.001]
train Loss: 0.6362 ACC: 0.8090
val Loss: 0.3040 ACC: 0.8978
Completed in 0m 25s
--------------------EPOCH 2--------------------
learning rate:  [0.001]
train Loss: 0.2393 ACC: 0.9212
val Loss: 0.2406 ACC: 0.9208
Completed in 0m 25s
--------------------EPOCH 3--------------------
learning rate:  [0.001]
train Loss: 0.1736 ACC: 0.9436
val Loss: 0.2060 ACC: 0.9347
Completed in 0m 25s
--------------------EPOCH 4--------------------
learning rate:  [0.001]
train Loss: 0.1445 ACC: 0.9532
val Loss: 0.1419 ACC: 0.9544
Completed in 0m 25s
--------------------EPOCH 5--------------------
learning rate:  [0.001]
train Loss: 0.1070 ACC: 0.9650
val Loss: 0.1768 ACC: 0.9471
Completed in 0m 25s
--------------------EPOCH 6--------------------
learning rate:  [0.001]
train Loss: 0.1097 ACC: 0.9645
val Loss: 0.1129 ACC: 0.9635
Completed in 0m 25s
--------------------EPOCH 7--------------------
learning rate:  [0.0001]
train Loss: 0.0

### Transfer Learning 모델 평가를 위한 전처리

In [18]:
transform_resNet = transforms.Compose([
                                        transforms.Resize([64, 64]),
                                        transforms.RandomCrop(52),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.299, 0.224, 0.255])
])

test_resNet = ImageFolder(root='./splitted/test',
                        transform=transform_resNet)

test_loader_resNet = torch.utils.data.DataLoader(test_resNet,
                                                 batch_size=BATCH_SIZE, shuffle=True,
                                                 num_workers=4)

### Transfer Learning 모델 성능 평가하기

In [23]:
resnet50 = torch.load('resnet50.pt')
resnet50.eval()
test_loss, test_accuracy = evaluate(resnet50, test_loader_resNet)

In [24]:
print('ResNet test acc: ', test_accuracy)

ResNet test acc:  98.86121887123014
