In [1]:
import sys
sys.path.insert(0,"..")
from broncode.cnn import CNN
import torch.optim as optim
import torch
from torch import nn
from torchvision.datasets import ImageFolder
import torchvision.transforms.functional as F
import torchvision.transforms as T
from torch.utils.data import Subset, Dataset, DataLoader, random_split
from torchvision.io import read_image
import os

In [2]:

# The ImageFolder class in torchvision expects the data to be organized in separate folders,
# where each folder represents a different class
 

dataset_path = "../informatie/apple_disease_classification/images/Train/Dataset/"
transform = T.ToTensor()
dataset = ImageFolder(dataset_path, transform=transform)

 

dataset.class_to_idx
 

{'bad_apples': 0, 'good_apples': 1}

In [3]:
# build subset
idx = [i for i in range(len(dataset)) if (dataset.imgs[i][1] == dataset.class_to_idx['bad_apples'] or dataset.imgs[i][1] == dataset.class_to_idx['good_apples']) ]
subset = Subset(dataset, idx)
 


for sample_idx in range(len(subset)):
    sample = subset[sample_idx]
    
    # Access the image and label from the sample
    image, label = sample
    
    # Print or examine the sample
    if sample_idx % 1000 == 0:
        print("Sample:", sample_idx,"Image shape:", image.shape,"Label:", label)

Sample: 0 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 1000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 2000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 3000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 4000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 5000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 6000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 7000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 8000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 9000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 10000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 11000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 12000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 13000 Image shape: torch.Size([3, 128, 128]) Label: 0
Sample: 14000 Image shape: torch.Size([3, 128, 128]) Label: 0


In [4]:
counter = 0
bad_counter = 0
for i in range(len(subset)):
    if subset[i][1] == 1:
        counter += 1
    else:
        bad_counter += 1

print("good apples", counter)
print("bad apples", bad_counter)


good apples 208
bad apples 14544


In [5]:
# create a random generator
generator1 = torch.Generator().manual_seed(13)

# create a train test split with 70% train, 30% test
train_dataset, test_dataset = random_split(subset, [0.7, 0.3], generator=generator1)

# check length of train and test dataset
print(len(train_dataset), len(test_dataset))

# create train and test dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# train_features, train_labels = next(iter(train_dataloader))
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)
test_features, test_labels = next(iter(test_dataloader))


print(test_features[0], test_labels[0])

10327 4425
tensor([[[0.5490, 0.4745, 0.4000,  ..., 0.5373, 0.5843, 0.6275],
         [0.6275, 0.5608, 0.4941,  ..., 0.5137, 0.5765, 0.6235],
         [0.6078, 0.5569, 0.5098,  ..., 0.4863, 0.5647, 0.6235],
         ...,
         [0.3569, 0.3882, 0.4235,  ..., 0.3490, 0.3490, 0.3373],
         [0.2510, 0.2941, 0.3294,  ..., 0.3373, 0.3333, 0.3176],
         [0.1804, 0.2235, 0.2667,  ..., 0.3216, 0.3098, 0.2980]],

        [[0.4118, 0.3373, 0.2667,  ..., 0.4706, 0.5098, 0.5490],
         [0.4745, 0.4078, 0.3490,  ..., 0.4549, 0.5098, 0.5529],
         [0.4275, 0.3843, 0.3412,  ..., 0.4431, 0.5137, 0.5686],
         ...,
         [0.3765, 0.4157, 0.4588,  ..., 0.3725, 0.3725, 0.3686],
         [0.2667, 0.3176, 0.3608,  ..., 0.3529, 0.3490, 0.3412],
         [0.1882, 0.2431, 0.2902,  ..., 0.3333, 0.3255, 0.3137]],

        [[0.3020, 0.2275, 0.1608,  ..., 0.3608, 0.4510, 0.5059],
         [0.3529, 0.2941, 0.2353,  ..., 0.3412, 0.4471, 0.5059],
         [0.2941, 0.2471, 0.2157,  ..., 0.3255,

In [6]:
def evaluate_accuracy(logits, y_true):
    
    # get index with the largest logit value PER OBSERVATION
    _, y_pred = torch.max(logits, dim=1)
    # calculate proportion of correct prediction
    correct_pred = (y_pred == y_true).float()
    acc = correct_pred.sum() / len(correct_pred)

    return acc * 100

net = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

running_loss = 0 
printfreq = 10
for epoch in range(2):
    for i, data in enumerate(train_dataloader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)  # forward pass 
        loss = criterion(outputs, labels) # calculate loss
        loss.backward() # calculate gradients (training)
        optimizer.step() # update weights of cnn

        running_loss += loss.item()
        if i % printfreq == printfreq-1:  
            print(epoch, i+1, running_loss / printfreq)
            running_loss = 0
    
    
    # disable gradient calculation
    with torch.no_grad():
        # initialize tracker for validation performance
        val_acc = 0
        val_loss = 0

        # prepare model for evaluation
        net.eval()

        # loop for each batch
        for data, target in test_dataloader:
            # STEP 1: forward pass
            output = net(data)
            # STEP 2: calculate the loss
            loss = criterion(output, target)
            # STEP 3: accumulate validation loss and accuracy
            acc = evaluate_accuracy(output, target)
        
        print(acc)

torch.Size([64, 16, 29, 29])


RuntimeError: shape '[-1, 2704]' is invalid for input of size 861184