In [1]:
# Voeg imports toe als je ze nodig hebt
import random
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import ToTensor
import torch
import torchvision.io as tio
import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {device} device")
%matplotlib inline
from torchensemble.utils.logging import set_logger

Using cuda device


In [2]:
# hyperparameters
mylearnrate = 1e-5
batchsize = 16
mymomentum = 0.9
epochs = 300
optchoice = 'adamw'
generator1 = torch.Generator().manual_seed(42)
inputdata = 'original_augm_cropped' 
test_batchsize = 16 #total pictures in test
trainingepochs = 40

### TRYING TO GENERALIZE SO THE VAL LOSS DROPS
resizevalue = 224 
dropout = 0.2 # added extra linear layer to resnet with dropout, transformers model has p = 0 standard, this will change that value
noise_factor = random.uniform(0, 0.4) # transformer noise 
myweight_decay=0.01  ##optimizer sgd parameter 
# L2 regularization, which is the sum of squares of all weights in the model, 
# and L1 regularization, which is the sum of the absolute values of all weights in the model.
# Both of them are scaled by a (small) factor, which is a hyperparameter we set prior to training.
# Typically, the parameter for weight decay is set on a logarithmic scale between 0 (overfitting) and 0.1(underftting) 
# (0.1, 0.01, 0.001, ...)
##############################################

In [3]:
if inputdata == 'apple_cropped_r224_augm':
    dataset_path = "./apple_cropped_r224_augm"
elif inputdata == 'resized224':
    dataset_path = './apple_resized_224/Train'
elif inputdata == 'resized224_augm':
    dataset_path = './apple_resized_224_augm/Train'
elif inputdata == 'original_augm':
    dataset_path = './apple_original_augm/Train'
elif inputdata == 'original_augm_cropped':
    dataset_path = './apple_original_augm_cropped/'


print(dataset_path)

./apple_original_augm_cropped/


In [4]:
# get length of each folder in imagefolder dataset
import os
imagefolder = dataset_path
for folder in os.listdir(imagefolder):
    print(folder, len(os.listdir(os.path.join(imagefolder, folder))))

Blotch_Apple 75
Normal_Apple 77
Rot_Apple 102
Scab_Apple 59


Transformer Choice


In [5]:
#Swin Transformer: image classification, 87.3 top-1 accuracy on ImageNet-1
#https://pytorch.org/vision/main/models.html
transformersmodel = torch.hub.load(
    "pytorch/vision", "vit_b_16", weights="IMAGENET1K_V1") #85% swin_t | 84% swin_v2_t  | 85% vit_b_16
    #"pytorch/vision", "swin_t", weights="IMAGENET1K_V1") #85% swin_t | 84% swin_v2_t  | 85% vit_b_16
    #"pytorch/vision", "swin_v2_t", weights="IMAGENET1K_V1") #85% swin_t | 84% swin_v2_t  | 85% vit_b_16

Using cache found in C:\Users\tinke/.cache\torch\hub\pytorch_vision_main


In [6]:

from torchvision import transforms

# https://towardsdatascience.com/data-augmentations-in-torchvision-5d56d70c372e
transform_img_normal = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize([resizevalue ,resizevalue ]),
    transforms.Normalize(mean = [0.6453, 0.4631, 0.3085],
                          std= [0.2000, 0.2238, 0.2254]),
    #transforms.Grayscale(3) #expected is 3 channels for the model, r == g == b
])

dataset = ImageFolder(dataset_path, transform=transform_img_normal)


In [7]:
dataset.class_to_idx

{'Blotch_Apple': 0, 'Normal_Apple': 1, 'Rot_Apple': 2, 'Scab_Apple': 3}

In [8]:
# split in train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size],generator=generator1)

In [9]:
#The data loader will apply the composed transform to the images as they are loaded and returned as batches 
# during training or evaluation. This allows you to perform data augmentation or other preprocessing steps on-the-fly 
# as the data is loaded, which is especially useful for training deep learning models.

trainloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batchsize, shuffle=True, num_workers=0)  # numworkers parallel/subprocesses
testloader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batchsize, shuffle=False)  # no need to shuffle when evaluating


In [10]:
transform = transforms.Compose([
    #transforms.RandomCrop(224,224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(180),
    #adding noise because the train-loss was going down but the val-loss didnt = overfitting, need to regularize (dropout, more data, noise)
    transforms.Lambda(lambda x: x + torch.randn(x.size()) * noise_factor), 
    transforms.RandomResizedCrop(size=224, scale=(0.08, 1.0)),
    #normalizing after adding noise
    transforms.Normalize(mean = [0.6453, 0.4631, 0.3085],
                          std= [0.2000, 0.2238, 0.2254]),
])

trainloader.dataset.transform = transform
testloader.dataset.transform = transform

In [11]:
transformersmodel.eval()

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [12]:
#in_features = transformersmodel.head.in_features #for transformer swin_t
in_features = transformersmodel.heads.head.in_features  #for vision transformer vit16
print(in_features)

768


In [13]:
# Here the size of each output sample is set to 4
#transformersmodel.heads = nn.Linear(in_features, 4)
transformersmodel.heads.head = nn.Linear(in_features, 4) #vit16

#set dropout rate in the entire model
for name, layer in transformersmodel.named_modules():
    if isinstance(layer, nn.Dropout):
         layer.p = dropout

In [14]:
transformersmodel.eval()

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.2, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.2, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.2, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [15]:
from torchensemble import VotingClassifier

transformersmodel1 = VotingClassifier(
    estimator=transformersmodel,
    n_estimators=7,
    cuda=True,
)

In [16]:
criterion = nn.CrossEntropyLoss()
transformersmodel1.set_criterion(criterion)
transformersmodel1.set_optimizer('AdamW',             # parameter optimizer
                    lr=mylearnrate,            # learning rate of the optimizer
                    weight_decay=myweight_decay)  # weight decay of the optimizer

#automatically gets saved in the directory this is run in *.pth

In [17]:
# Training
transformersmodel1.fit(train_loader=trainloader,  # training data
          epochs=trainingepochs)                 # the number of training epochs


  if not is_compiling() and torch.has_cuda and torch.cuda.is_available():


Estimator: 000 | Epoch: 000 | Batch: 000 | Loss: 1.52699 | Correct: 2/16
Estimator: 001 | Epoch: 000 | Batch: 000 | Loss: 1.41631 | Correct: 2/16
Estimator: 002 | Epoch: 000 | Batch: 000 | Loss: 1.45675 | Correct: 4/16
Estimator: 003 | Epoch: 000 | Batch: 000 | Loss: 1.47277 | Correct: 2/16


OutOfMemoryError: CUDA out of memory. Tried to allocate 28.00 MiB. GPU 0 has a total capacty of 8.00 GiB of which 0 bytes is free. Of the allocated memory 6.66 GiB is allocated by PyTorch, and 582.16 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# Evaluating
transformersmodel1.evaluate(testloader, return_loss=False)

In [None]:
dataset_path_test = "D:/apple_disease_classification/Test/"
transform_img_normal = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize([resizevalue ,resizevalue]),
    transforms.Normalize(mean = [0.6453, 0.4631, 0.3085],
                          std= [0.2000, 0.2238, 0.2254]),
])
dataset_test = ImageFolder(dataset_path_test, transform=transform_img_normal)
dataset_test_loader = torch.utils.data.DataLoader(
    dataset_test, batch_size=test_batchsize, shuffle=False)

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np

correct = 0
total = 0
confusion_matrix = np.zeros((4, 4))  # Initialize the confusion matrix

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    #for idx, data in enumerate(dataset_test_loader):
    for data in dataset_test_loader:
        inputs, labels = data
        
        # print(inputs.shape) = torch.Size([4, 3, 32, 32])

        # ---load data into GPU----
        inputs = inputs.to(device)
        labels = labels.to(device)
        # -------------------------

        # calculate outputs by running images through the network
        outputs =transformersmodel1.forward(inputs)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

            
        # Update confusion matrix
        for i in range(len(labels)):
            confusion_matrix[predicted[i]][labels[i]] += 1


print(f'Accuracy of the network on the test images: {100 * correct // total} %')
print('Confusion Matrix:')
print(confusion_matrix)
print(total)