In [None]:
# !sudo add-apt-repository ppa:ubuntu-toolchain-r/test 
# !sudo apt-get update
# !sudo apt-get upgrade
# !sudo apt-get dist-upgrade

In [None]:
# !pip install coremltools==5.0b2

# System details

In [None]:
!nvidia-smi -L

In [None]:
!lscpu |grep 'Model name'

# Dataset setup

In [None]:
import random
import os
import shutil

## Dataset download

In [None]:
!git clone https://github.com/spMohanty/PlantVillage-Dataset.git 

In [None]:
!mkdir Dataset
!mkdir Dataset/Color
!mkdir Dataset/Grayscale
!mkdir Dataset/Segmented

!mkdir Dataset/Color/Training
!mkdir Dataset/Grayscale/Training
!mkdir Dataset/Segmented/Training

!mkdir Dataset/Color/Training/Black_rot
!mkdir Dataset/Color/Training/Leaf_blight
!mkdir Dataset/Color/Training/Esca
!mkdir Dataset/Color/Training/Healthy

!mkdir Dataset/Grayscale/Training/Black_rot
!mkdir Dataset/Grayscale/Training/Leaf_blight
!mkdir Dataset/Grayscale/Training/Esca
!mkdir Dataset/Grayscale/Training/Healthy

!mkdir Dataset/Segmented/Training/Black_rot
!mkdir Dataset/Segmented/Training/Leaf_blight
!mkdir Dataset/Segmented/Training/Esca
!mkdir Dataset/Segmented/Training/Healthy

In [None]:
def move_files(source_folder, destination_folder):
    total_images = len(os.listdir(source_folder))
    images_moved = 0
    for file_name in os.listdir(source_folder):
        # construct full file path
        source = os.path.join(source_folder, file_name)
        destination = os.path.join(destination_folder, file_name)
        # move only files
        if os.path.isfile(source):
            shutil.move(source, destination)
            images_moved += 1
    print(f"{images_moved}/{total_images}")
    print("Images moved")

In [None]:
# Color
move_files("/content/PlantVillage-Dataset/raw/color/Grape___Black_rot", "/content/Dataset/Color/Training/Black_rot")
move_files("/content/PlantVillage-Dataset/raw/color/Grape___Esca_(Black_Measles)","/content/Dataset/Color/Training/Esca")
move_files("/content/PlantVillage-Dataset/raw/color/Grape___healthy","/content/Dataset/Color/Training/Healthy")
move_files("/content/PlantVillage-Dataset/raw/color/Grape___Leaf_blight_(Isariopsis_Leaf_Spot)","/content/Dataset/Color/Training/Leaf_blight")

# Grayscale
move_files("/content/PlantVillage-Dataset/raw/grayscale/Grape___Black_rot", "/content/Dataset/Grayscale/Training/Black_rot")
move_files("/content/PlantVillage-Dataset/raw/grayscale/Grape___Esca_(Black_Measles)","/content/Dataset/Grayscale/Training/Esca")
move_files("/content/PlantVillage-Dataset/raw/grayscale/Grape___healthy","/content/Dataset/Grayscale/Training/Healthy")
move_files("/content/PlantVillage-Dataset/raw/grayscale/Grape___Leaf_blight_(Isariopsis_Leaf_Spot)","/content/Dataset/Grayscale/Training/Leaf_blight")

# Segmented
move_files("/content/PlantVillage-Dataset/raw/segmented/Grape___Black_rot", "/content/Dataset/Segmented/Training/Black_rot")
move_files("/content/PlantVillage-Dataset/raw/segmented/Grape___Esca_(Black_Measles)","/content/Dataset/Segmented/Training/Esca")
move_files("/content/PlantVillage-Dataset/raw/segmented/Grape___healthy","/content/Dataset/Segmented/Training/Healthy")
move_files("/content/PlantVillage-Dataset/raw/segmented/Grape___Leaf_blight_(Isariopsis_Leaf_Spot)","/content/Dataset/Segmented/Training/Leaf_blight")


In [None]:
!rm -rf /content/PlantVillage-Dataset
!rm -rf /content/sample_data

## Setting up training, validation and testing dataset

In [None]:
num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Color/Training", i)))] for i in os.listdir("/content/Dataset/Color/Training")]
print(f"Number of training images in Color folder {num_images}")

num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Grayscale/Training", i)))] for i in os.listdir("/content/Dataset/Color/Training")]
print(f"Number of training images in Grayscale folder {num_images}")

num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Segmented/Training", i)))] for i in os.listdir("/content/Dataset/Color/Training")]
print(f"Number of training images in Segmented folder {num_images}")

In [None]:
def create_folder(source, fol_name,factor):
  classes = os.listdir(source)
  for cl in classes:
      file_path = os.path.join(source,cl)
      exist = False
      exist = [True for fol in os.listdir(os.path.join(os.curdir,fol_name)) if fol==cl]
      path1 = os.path.join(os.curdir,fol_name,cl)
      if(len(exist) == 0):
          os.mkdir(path1)

          res = os.listdir(file_path)
          val = int(len(res) * factor)
          imgList = random.sample(range(0, len(res)), val)

          for index in imgList:
              shutil.move(os.path.join(file_path,res[index]), path1)

In [None]:
!mkdir /content/Dataset/Color/Validation
!mkdir /content/Dataset/Color/Test

!mkdir /content/Dataset/Grayscale/Validation
!mkdir /content/Dataset/Grayscale/Test

!mkdir /content/Dataset/Segmented/Validation
!mkdir /content/Dataset/Segmented/Test


create_folder("/content/Dataset/Color/Training", "/content/Dataset/Color/Validation",0.1)
create_folder("/content/Dataset/Color/Training", "/content/Dataset/Color/Test",0.2)

create_folder("/content/Dataset/Grayscale/Training", "/content/Dataset/Grayscale/Validation",0.1)
create_folder("/content/Dataset/Grayscale/Training", "/content/Dataset/Grayscale/Test",0.2)


create_folder("/content/Dataset/Segmented/Training", "/content/Dataset/Segmented/Validation",0.1)
create_folder("/content/Dataset/Segmented/Training", "/content/Dataset/Segmented/Test",0.2)

## Dataset info

In [None]:
# Confirming the division of training, testing and validation dataset

print("Color")
num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Color/Training", i)))] for i in os.listdir("/content/Dataset/Color/Training")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of training images in Color folder {num_images}")
print(f"Total: {total}")

num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Color/Validation", i)))] for i in os.listdir("/content/Dataset/Color/Validation")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of validation images in Color folder {num_images}")
print(f"Total: {total}")

num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Color/Test", i)))] for i in os.listdir("/content/Dataset/Color/Test")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of testing images in Color folder {num_images}")
print(f"Total: {total}")

print("----------------------")
print("Grayscale")
num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Grayscale/Training", i)))] for i in os.listdir("/content/Dataset/Grayscale/Training")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of training images in Grayscale folder {num_images}")
print(f"Total: {total}")

num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Grayscale/Validation", i)))] for i in os.listdir("/content/Dataset/Grayscale/Validation")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of validation images in Grayscale folder {num_images}")
print(f"Total: {total}")

num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Grayscale/Test", i)))] for i in os.listdir("/content/Dataset/Grayscale/Test")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of testing images in Grayscale folder {num_images}")
print(f"Total: {total}")

print("----------------------")
print("Segmented")
num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Segmented/Training", i)))] for i in os.listdir("/content/Dataset/Segmented/Training")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of training images in Segmented folder {num_images}")
print(f"Total: {total}")

num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Segmented/Validation", i)))] for i in os.listdir("/content/Dataset/Segmented/Validation")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of validation images in Segmented folder {num_images}")
print(f"Total: {total}")

num_images = [[i, len(os.listdir(os.path.join("/content/Dataset/Segmented/Test", i)))] for i in os.listdir("/content/Dataset/Segmented/Test")]
total = 0
for i in num_images:
    total += i[1]
print(f"Number of testing images in Segmented folder {num_images}")
print(f"Total: {total}")

# Dataset preprocessing and augmentation

In [None]:
import torch
import numpy as np
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt

#### No augmentation

In [None]:
# def to_tensor(pil):
#     return torch.tensor(np.array(pil)).permute(2,0,1).float()
# t = transforms.Compose([
#                         transforms.Resize((256, 256)),
#                         transforms.Lambda(to_tensor)
#                         ])

# train_dataloader = torchvision.datasets.ImageFolder("/content/Dataset/Color/Training", t)
# train_dataloader = DataLoader(train_dataloader, batch_size = 32, shuffle=True)

# validation_dataloader = torchvision.datasets.ImageFolder("/content/Dataset/Color/Validation", t)
# validation_dataloader = DataLoader(validation_dataloader, batch_size = 1)

#### Augmentation

In [None]:
def to_tensor(pil):
    return torch.tensor(np.array(pil)).permute(2,0,1).float()
t = transforms.Compose([
                        transforms.Resize((256, 256)),
                        transforms.RandomHorizontalFlip(p=0.5),
                        transforms.RandomRotation(90),
                        transforms.RandomRotation(180),
                        transforms.RandomRotation(270),
                        transforms.RandomVerticalFlip(p=0.5),
                        transforms.Lambda(to_tensor)
                        ])

train_dataloader = torchvision.datasets.ImageFolder("/content/Dataset/Color/Training", t)
train_dataloader = DataLoader(train_dataloader, batch_size = 32, shuffle=True)

validation_dataloader = torchvision.datasets.ImageFolder("/content/Dataset/Color/Validation", t)
validation_dataloader = DataLoader(validation_dataloader, batch_size = 1)

In [None]:
# count = 0
# for inputs, labels in train_dataloader:
#     count += 1
# count*32

#### Dataloaders

In [None]:
dataloaders = {'train':train_dataloader, 'val':validation_dataloader}
dataset_sizes = {'train':2927, 'val': 405}

In [None]:
# Display image and label.
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

# Classifier

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
import torchvision.models as models
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

## Models

### Resnet18

In [None]:
resnet18 = models.resnet18(pretrained=True)
resnet18.to('cuda')
resnet18.fc.out_features = 4
print(resnet18)

### Mobilenet_v2

In [None]:
# mobilenet_v2 = models.mobilenet_v2(pretrained=True)
# mobilenet_v2.to('cuda')
# mobilenet_v2.classifier[1].out_features = 4
# print(mobilenet_v2)

## Optimiser and scheduler

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet18.parameters(), lr=0.001, momentum=0.9)

In [None]:
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(mobilenet_v2.parameters(), lr=0.001, momentum=0.9)

In [None]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

## Training

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, device='cuda'):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model = train_model(resnet18, criterion, optimizer, exp_lr_scheduler, num_epochs=25)

In [None]:
# model = train_model(mobilenet_v2, criterion, optimizer, exp_lr_scheduler, num_epochs=25)

### Model testing

In [None]:
from sklearn.metrics import precision_score,recall_score, f1_score, accuracy_score

In [None]:
def to_tensor(pil):
    return torch.tensor(np.array(pil)).permute(2,0,1).float()
t = transforms.Compose([
                        transforms.Resize((256, 256)),
                        transforms.Lambda(to_tensor)
                        ])

test_dataloader = torchvision.datasets.ImageFolder("/content/Dataset/Color/Test", t)
test_dataloader = DataLoader(test_dataloader, batch_size = 1, shuffle=True)

In [None]:
def model_testing(model, dataloader):
    model.eval()
    op = []
    gt = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to('cuda')
            labels = labels.to('cuda')
            output = model(inputs)
            _, preds = torch.max(output, 1)
            op.append(preds.detach().cpu().numpy()[0])
            gt.append(labels.item())
    
    prec = precision_score(gt, op, average=None)
    rec = recall_score(gt, op, average=None)
    f1 = f1_score(gt, op, average=None)
    acc = accuracy_score(gt,op)
    print(f"Precision: {prec}")
    print(f"Recall: {rec}")
    print(f"F1 Score: {f1}")
    print(f"Accuracy: {acc}")


In [None]:
print(test_dataloader.dataset.class_to_idx)

In [None]:
model_testing(model, test_dataloader)


## Save model

In [None]:
# import coremltools as ct

In [None]:
# torch.save(model, "grape_classification.pt") # In PyTorch format

In [None]:
# for example_input, labels in dataloaders['val']:
#     example_input = example_input.to('cuda')
#     break

In [None]:
# model.eval()
# trace = torch.jit.trace(model, example_input)

In [None]:
# # Convert to Core ML using the Unified Conversion API
# model = ct.convert(
#     trace,
#     inputs=[ct.ImageType(name="input_1", shape=example_input.shape)],
# )


In [None]:
# model.save("GrapeDiseaseDetectionMNV2.mlmodel")