In [1]:
import os 
import shutil


In [None]:
# Don't run this again
original_dataset_dir = './dataset'
classes_list = os.listdir(original_dataset_dir)

base_dir = './splitted'
os.mkdir(base_dir)

train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

for clss in classes_list:
    os.mkdir(os.path.join(train_dir, clss))
    os.mkdir(os.path.join(validation_dir, clss))
    os.mkdir(os.path.join(test_dir, clss))

In [5]:
# Don't run this again
import math

for clss in classes_list:
    path = os.path.join(original_dataset_dir, clss)
    fnames = os.listdir(path)

    train_size = math.floor(len(fnames) * 0.6)
    validation_size = math.floor(len(fnames) * 0.2)
    test_size = math.floor(len(fnames) * 0.2)

    train_fnames = fnames[:train_size]
    print('Train size(',clss,'): ', len(train_fnames))
    for fname in train_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(train_dir, clss), fname)
        shutil.copyfile(src, dst)

    validation_fnames = fnames[train_size:(validation_size + train_size)]
    print('validation size(',clss,'):', len(train_fnames))
    for fname in validation_fnames:
        src = os.path.join(path,fname)
        dst = os.path.join(os.path.join(validation_dir, clss), fname)
        shutil.copyfile(src,dst)

    test_fnames = fnames[(train_size + validation_size):
                         (validation_size + train_size + test_size)]
    print('test size(',clss,'):', len(test_fnames))
    for fname in test_fnames:
        src = os.path.join(path,fname)
        dst = os.path.join(os.path.join(test_dir, clss),fname)
        shutil.copyfile(src,dst)


Train size( Apple___Apple_scab ):  378
validation size( Apple___Apple_scab ): 378
test size( Apple___Apple_scab ): 126
Train size( Apple___Black_rot ):  372
validation size( Apple___Black_rot ): 372
test size( Apple___Black_rot ): 124
Train size( Apple___Cedar_apple_rust ):  165
validation size( Apple___Cedar_apple_rust ): 165
test size( Apple___Cedar_apple_rust ): 55
Train size( Apple___healthy ):  987
validation size( Apple___healthy ): 987
test size( Apple___healthy ): 329
Train size( Cherry___healthy ):  512
validation size( Cherry___healthy ): 512
test size( Cherry___healthy ): 170
Train size( Cherry___Powdery_mildew ):  631
validation size( Cherry___Powdery_mildew ): 631
test size( Cherry___Powdery_mildew ): 210
Train size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  307
validation size( Corn___Cercospora_leaf_spot Gray_leaf_spot ): 307
test size( Corn___Cercospora_leaf_spot Gray_leaf_spot ): 102
Train size( Corn___Common_rust ):  715
validation size( Corn___Common_rust ): 715

In [4]:
print(DEVICE)

cuda


In [3]:
import torch

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')

BATCH_SIZE = 256
EPOCH = 30 

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

transform_base = transforms.Compose([transforms.Resize((64,64)),
                                     transforms.ToTensor()])
train_dataset = ImageFolder(root='./splitted/train',
                            transform=transform_base)
val_dataset = ImageFolder(root='./splitted/val',
                          transform = transform_base)

from torch.utils.data import DataLoader

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size = BATCH_SIZE, shuffle=True,
                                           num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)


In [5]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):

    def __init__(self):

        super(Net,self).__init__()

        self.conv1 = nn.Conv2d(3,32,3, padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(32,64,3, padding=1)
        self.conv3 = nn.Conv2d(64,64,3, padding=1)

        self.fc1 = nn.Linear(4096, 512)
        self.fc2 = nn.Linear(512,33)

    def forward(self,x):

        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training = self.training)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x,p=0.25, training = self.training)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training = self.training)

        x = x.view(-1, 4096) # flatten with feature map
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training = self.training)
        x = self.fc2(x)

        return F.log_softmax(x, dim=1)
model_base = Net().to(DEVICE)
optimizer = optim.Adam(model_base.parameters(), lr= 0.001)


In [6]:
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data,target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        

In [7]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0 

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)

            test_loss += F.cross_entropy(output, target, reduction = 'sum').item()

            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy


In [14]:
import time
import copy

def train_baseline(model, train_loader, val_loader, optimizer, num_epochs = 30):
    best_acc = 0.0 
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(1, num_epochs + 1):
        since = time.time()
        train(model, train_loader, optimizer)
        train_loss, train_acc = evaluate(model, train_loader)
        val_loss, val_acc = evaluate(model, val_loader)
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print(' ------ epoch{} ------'.format(epoch))
        print('train_loss: {:.4f}, Accuracy: {:.2f}%'.format(train_loss, train_acc))
        print('val_loss : {:.4f}, Accuracy: {:2f}%'.format(val_loss, val_acc))
        print('completed in {:0f}m{:0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    model.load_state_dict(best_model_wts)
    return model



# base = train_baseline(model_base, train_loader, val_loader, optimizer, EPOCH)

# torch.save(base,'baseline.pt')

In [12]:

# prep for transfer learning 
data_transforms = {
    'train':transforms.Compose([
        transforms.Resize([64,64]),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize([64,64]),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

}
data_dir = './splitted'
image_datasets = {x: ImageFolder(root=os.path.join(data_dir, x),
                                 transform=data_transforms[x]) for x in ['train','val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                              batch_size = BATCH_SIZE, shuffle=True, num_workers=4)
                                              for x in ['train','val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train','val']}

class_names = image_datasets['train'].classes


In [8]:
# importing pre-trained model

from torchvision import models

resnet = models.resnet50(pretrained=True)
num_ftrs = resnet.fc.in_features # resnet50 has different feature #, fetch it and make another layer
resnet.fc = nn.Linear(num_ftrs,33)
resnet = resnet.to(DEVICE)

criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad,
                                 resnet.parameters()), lr=0.001)

from torch.optim import lr_scheduler

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, 
                                       step_size = 7, gamma = 0.1)



In [9]:
# freeze pre-trained model's layers

ct = 0
for child in resnet.children():
    ct += 1 
    if ct<6:
        for param in child.parameters():
            param.required_grad = False # parameters not to be updated

In [16]:
# function for training & verifying resnet 

def train_resnet(model, criterion, optimizer, scheduler, num_epochs = 25):

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('-----epoch{}------'.format(epoch+1))
        since = time.time()

        for phase in ['train','val']:
            if phase =='train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)

                optimizer.zero_grad()

                with torch.set_grad_enabled (phase == 'train'): # not to update on 'eval'
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1 )
                    loss = criterion(outputs, labels) # loss is for each, cost is for entire set

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0) # inputs.size(0) == batch size
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()
                l_r = [x['lr'] for x in optimizer_ft.param_groups]
                print('learning rate:',l_r)

            epoch_loss = running_loss/dataset_sizes[phase]
            epoch_acc = running_corrects.double()/dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        
        time_elapsed = time.time() - since
        print('completed in {:0f}m {:0f}s'.format(time_elapsed//60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)

    return model


In [18]:
# Completed! don't run this again.
model_resnet50 = train_resnet(resnet, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=EPOCH)

torch.save(model_resnet50, 'resnet50.pt')

-----epoch1------
learning rate: [1.0000000000000002e-07]
train Loss: 0.0122 Acc: 0.9959
val Loss: 0.0279 Acc: 0.9914
completed in 0.000000m 57.273361s
-----epoch2------
learning rate: [1.0000000000000002e-07]
train Loss: 0.0110 Acc: 0.9965
val Loss: 0.0278 Acc: 0.9909
completed in 0.000000m 56.763905s
-----epoch3------
learning rate: [1.0000000000000002e-07]
train Loss: 0.0114 Acc: 0.9966
val Loss: 0.0273 Acc: 0.9912
completed in 0.000000m 56.774662s
-----epoch4------
learning rate: [1.0000000000000004e-08]
train Loss: 0.0121 Acc: 0.9962
val Loss: 0.0264 Acc: 0.9916
completed in 0.000000m 56.875372s
-----epoch5------
learning rate: [1.0000000000000004e-08]
train Loss: 0.0126 Acc: 0.9961
val Loss: 0.0280 Acc: 0.9905
completed in 1.000000m 1.055234s
-----epoch6------
learning rate: [1.0000000000000004e-08]
train Loss: 0.0117 Acc: 0.9963
val Loss: 0.0278 Acc: 0.9909
completed in 1.000000m 1.097868s
-----epoch7------
learning rate: [1.0000000000000004e-08]
train Loss: 0.0115 Acc: 0.9962
v

In [1]:
#preprocessing for eval baseline model
transform_base = transforms.Compose([transforms.Resize([64,64]),
                                     transforms.ToTensor()])
test_base = ImageFolder(root='./splitted/test',
                        transform = transform_base)
test_loader_base = torch.utils.data.DataLoader(test_base, 
                                               batch_size = BATCH_SIZE, shuffle= True,
                                               num_workers=4)

NameError: name 'transforms' is not defined

In [None]:
#preprocessing for eval transfer learning
transform_resNet = transforms.Compose([
    transforms.Resize([64,64]),
    transforms.RandomCrop(52),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

])

test_resNet = ImageFolder(root='.splitted/test',
                          transform = transform_resNet)

test_loader_resNet = torch.utils.data.DataLoader(test_resNet,
                                                 batch_size = BATCH_SIZE, shuffle = True, num_workers=4)

In [None]:
# Practiced with Resource from 
# book: "한 줄씩 따라 해보는 파이토치 딥러닝 프로젝트 모음집" by 이경택(KyungTaek Lee)
# book's link: https://www.yes24.com/Product/Goods/102911732
# data's link: https://data.mendeley.com/datasets/tywbtsjrjv/1
