In [9]:
# zip 파일 압출 풀기
!unzip -qq '/content/drive/MyDrive/Colab Notebooks/Deep Learning/ML_DL_STUDY/deep_learning/data/dataset.zip' -d './dataset'

In [10]:
import os

original_dataset_dir = './dataset'
classes_list = os.listdir(original_dataset_dir)

base_dir = './splitted'
os.mkdir(base_dir)

In [11]:
import shutil

In [12]:
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)


In [13]:
for cls in classes_list:
  os.mkdir(os.path.join(train_dir, cls))
  os.mkdir(os.path.join(test_dir, cls))
  os.mkdir(os.path.join(validation_dir, cls))


In [14]:
import math

for cls in classes_list:
    path = os.path.join(original_dataset_dir, cls)
    fnames = os.listdir(path)

    # data를 6:2:2 로 나눈다
    train_size = math.floor(len(fnames) * 0.6)
    validation_size = math.floor(len(fnames) * 0.2)
    test_size = math.floor(len(fnames) * 0.2)


    train_fnames = fnames[:train_size]
    for fname in train_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(train_dir, cls), fname)
        shutil.copyfile(src, dst)

    validation_fnames = fnames[train_size:(validation_size + train_size)]
    print("Validation size((",cls,") :", len(validation_fnames))
    for fname in validation_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(validation_dir, cls), fname)
        shutil.copyfile(src, dst)

    test_fnames = fnames[(train_size + validation_size):(validation_size + train_size + test_size)]

    print("Test size((",cls,") :", len(test_fnames))
    for fname in test_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(test_dir, cls), fname)
        shutil.copyfile(src, dst)

Validation size(( Corn___healthy ) : 232
Test size(( Corn___healthy ) : 232
Validation size(( Tomato___Tomato_Yellow_Leaf_Curl_Virus ) : 1071
Test size(( Tomato___Tomato_Yellow_Leaf_Curl_Virus ) : 1071
Validation size(( Apple___healthy ) : 329
Test size(( Apple___healthy ) : 329
Validation size(( Grape___Black_rot ) : 236
Test size(( Grape___Black_rot ) : 236
Validation size(( Potato___Early_blight ) : 200
Test size(( Potato___Early_blight ) : 200
Validation size(( Tomato___Leaf_Mold ) : 190
Test size(( Tomato___Leaf_Mold ) : 190
Validation size(( Tomato___Septoria_leaf_spot ) : 354
Test size(( Tomato___Septoria_leaf_spot ) : 354
Validation size(( Peach___healthy ) : 72
Test size(( Peach___healthy ) : 72
Validation size(( Grape___healthy ) : 84
Test size(( Grape___healthy ) : 84
Validation size(( Cherry___healthy ) : 170
Test size(( Cherry___healthy ) : 170
Validation size(( Potato___healthy ) : 30
Test size(( Potato___healthy ) : 30
Validation size(( Cherry___Powdery_mildew ) : 210
Te

In [2]:
import torch
import os

In [7]:
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else "cpu")
BATCH_SIZE = 256
EPOCH = 30

#### DATA RESIZE

In [1]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

In [15]:
transforms_base = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor()])
train_dataset = ImageFolder(root='./splitted/train', transform=transforms_base)
val_dataset = ImageFolder(root = './splitted/val', transform=transforms_base)

In [3]:
from torch.utils.data import DataLoader

In [16]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size = BATCH_SIZE,
                                           shuffle = True,
                                           num_workers=4)

val_loader = torch.utils.data.DataLoader(val_dataset,
                                           batch_size = BATCH_SIZE,
                                           shuffle = True,
                                           num_workers=4)



In [4]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [17]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)

        self.fc1 = nn.Linear(4096, 512)
        self.fc2 = nn.Linear(512, 33)

    def forward(self, x):

        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = x.view(-1, 4096)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training = self.training)
        x = self.fc2(x)

        return F.log_softmax(x, dim=1)




In [18]:
model_base = Net().to(DEVICE)

In [19]:
optimizer = optim.Adam(model_base.parameters(), lr = 1e-3)

In [20]:
def train(model, train_loader, optimizer):
  model.train()

  for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(DEVICE), target.to(DEVICE)
    optimizer.zero_grad()
    output = model(data)
    loss = F.cross_entropy(output, target)
    loss.backward()
    optimizer.step()


In [21]:
def evaluate(model, test_loader):
  model.eval()
  test_loss = 0
  correct = 0


  with torch.no_grad(): #모델 평가 시 weight update 안함
      for data, target in test_loader:
        data, target = data.to(DEVICE), target.to(DEVICE)
        output = model(data)

        test_loss += F.cross_entropy(output, target, reduction = 'sum').item() # reduction='sum'은 손실 값을 미니배치 단위가 아닌 전체 데이터셋에 대해 합산

        pred = output.max(1, keepdim=True)[1]
        correct += pred.eq(target.view_as(pred)).sum().item()

  test_loss /= len(test_loader.dataset)
  test_accuracy = 100. * correct / len(test_loader.dataset)

  return test_loss, test_accuracy


In [22]:
import time
import copy

In [23]:
def train_baseline(model, train_loader, val_loader, optimizer, num_epochs = 30):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict()) # 가장 정확도가 높은 weight 저장

    for epoch in range(1, num_epochs + 1):
        since = time.time()
        train(model, train_loader, optimizer)
        train_loss, train_acc = evaluate(model, train_loader)
        val_loss, val_acc = evaluate(model, val_loader)

        if val_acc > best_acc:
            best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('----------------- epoch {} -----------------'.format(epoch))
        print('train Loss: {:.4f}, Accuracy: {:.2f}%'.format(train_loss, train_acc))
        print('val Loss: {:.4f}, Accuracy: {:.2f}%'.format(val_loss, val_acc))
        print('Complieted in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    model.load_state_dict(best_model_wts)
    return model


In [24]:
base = train_baseline(model_base, train_loader, val_loader, optimizer, EPOCH)
torch.save(base, 'baseline.pt')

----------------- epoch 1 -----------------
train Loss: 1.6853, Accuracy: 52.29%
val Loss: 1.7013, Accuracy: 50.77%
Complieted in 1m 47s
----------------- epoch 2 -----------------
train Loss: 1.0474, Accuracy: 68.80%
val Loss: 1.0744, Accuracy: 67.99%
Complieted in 1m 37s
----------------- epoch 3 -----------------
train Loss: 0.7981, Accuracy: 74.38%
val Loss: 0.8292, Accuracy: 73.45%
Complieted in 1m 57s
----------------- epoch 4 -----------------
train Loss: 0.6692, Accuracy: 79.22%
val Loss: 0.7114, Accuracy: 78.01%
Complieted in 1m 59s
----------------- epoch 5 -----------------
train Loss: 0.5392, Accuracy: 83.28%
val Loss: 0.5962, Accuracy: 81.60%
Complieted in 1m 55s
----------------- epoch 6 -----------------
train Loss: 0.4816, Accuracy: 84.61%
val Loss: 0.5520, Accuracy: 82.29%
Complieted in 1m 59s
----------------- epoch 7 -----------------
train Loss: 0.3727, Accuracy: 88.39%
val Loss: 0.4498, Accuracy: 85.61%
Complieted in 2m 3s
----------------- epoch 8 ----------------

In [25]:
data_transforms = {
    'train' : transforms.Compose([transforms.Resize([64, 64]),
                                  transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(),
                                  transforms.RandomCrop(52), transforms.ToTensor(),
                                  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                  ]),
    'val' : transforms.Compose([transforms.Resize([64, 64]),
                              transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(),
                              transforms.RandomCrop(52), transforms.ToTensor(),
                              transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                              ])
}

In [28]:
data_dir = './splitted'
image_datasets = {x : ImageFolder(root = os.path.join(data_dir, x),
                                  transform=data_transforms[x]) for x in ['train','val']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                              num_workers=4) for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

class_names = image_datasets['train'].classes



In [29]:
from torchvision import models

In [30]:
resnet = models.resnet50(pretrained = True)
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 33)
resnet = resnet.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, resnet.parameters()), lr=0.001)

from torch.optim import lr_scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 309MB/s]


In [31]:
ct = 0

for child in resnet.children():
  ct +=1
  if ct<6:
    for param in child.parameters():
        param.requires_grad = False

In [36]:
def train_resnet(model, criterion, optimizer, scheduler, num_epochs=25):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('----------------- epoch {} -----------------'.format(epoch+1))
        since = time.time()
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            runnung_corrects = 0


            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                runnung_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss/dataset_sizes[phase]
            epoch_acc = runnung_corrects.double()/dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('Complieted in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:.4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)

    return model



In [None]:
model_resnet50 = train_resnet(resnet, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

torch.save(model_resnet50, 'resnet50.pt')


In [40]:
transform_resnet = transforms.Compose([
        transforms.Resize([64, 64]),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])

test_resNet = ImageFolder(root='./splitted/test', transform=transform_resnet)
test_loader_resNet = torch.utils.data.DataLoader(test_resNet,
                                              batch_size = BATCH_SIZE,
                                              shuffle=True,
                                              num_workers=4)

In [41]:
resnet50 = torch.load('resnet50.pt')
resnet50.eval()
test_loss, test_accuracy = evaluate(resnet50, test_loader_resNet)

print('ResNet test acc:  ', test_accuracy)

ResNet test acc:   98.93603705094505
