<h1>Train and test the CNN</h1>

In [None]:
import sys
sys.path.append("../")
from broncode.apple_classifyer import AppleClassifyer
from sentence_transformers import SentenceTransformer, util

import torchvision.transforms as T
from torch.utils.data import Subset, Dataset, DataLoader, random_split
from torch import nn
import torch.nn.functional
from torchvision.datasets import ImageFolder
from torchvision.io import read_image, ImageReadMode
import torch
import pickle as pkl
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
dataset_path = "../informatie/apple_disease_classification/images/Train/Dataset/"

In [None]:
# Class to create a dataset.
class DatasetAppels(Dataset):
    def __init__(self, img_folder_path, transform):
        # I use the function ImageFolder from pytorch to do most of the heavy lifting for me.
        image_folder = ImageFolder(img_folder_path, transform=transform)
        print(image_folder.classes)
        # I create images and labels variables for later use.
        self.images = [image[0] for image in image_folder]
        self.labels = image_folder.targets
        self.class_dict = image_folder.class_to_idx

    def __len__(self):
        return len(self.images)
    
    # Function to retrieve an image and/or label at specified index.
    def __getitem__(self, idx):
    
        return [self.images[idx], self.labels[idx]]
    


In [None]:
# Use the class and create a dataset.
dataset = DatasetAppels(dataset_path, T.ToTensor())

next(iter(dataset))


In [None]:
print(dataset.class_dict)

In [None]:
# I decided to resize my images to 64 by 64 for faster training and less memory usage.
resize_data = T.Resize((64,64))
print(dataset.images[0].shape)

for i in range(len(dataset.images)):
    dataset.images[i] = resize_data(dataset.images[i])

print(dataset.images[0].shape)

In [None]:
generator1 = torch.Generator().manual_seed(13)

# create a train test split with 60% train, 20% test, 20% val. For later use
train_dataset, test_dataset, val_dataset = random_split(dataset, [0.6, 0.2, 0.2], generator=generator1)
print(len(train_dataset), len(test_dataset), len(val_dataset))

# Create train, test and val dataloaders for later use.
train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)

test_loader = DataLoader(test_dataset, batch_size=50, shuffle=False)

val_loader = DataLoader(val_dataset, batch_size=50, shuffle=False)

In [None]:
# Time to train!
net = AppleClassifyer()
# After fiddeling a lot, this is the learningrate/epochs I had the best result with
history, acc = net.fit(train_loader, val_loader, test_loader, lr = 0.0015, epochs=80)

In [None]:
# I created a function in my AppleClassifyer() to predict a single image so I can test with different images and use this function for the aql function later on.

# Lets try with normal images
normal = [32, 33, 34]

for i in normal:
    test_path = f"../informatie/apple_disease_classification/images/Test/Normal_Apple/{i}.jpg"
    test_img = read_image(test_path, ImageReadMode.RGB)/255
    test_img = resize_data(test_img)

    # Since the model expects a batch I have to use the unsqueeze() function
    test_img = test_img.unsqueeze(0)
    result = net.predict_image(test_img)
    print(result)
    

In [None]:
# I then use pickle as pkl to save the model using the acc (accuracy percentage) in the filename. I don't mind if it overwrites an older one with the same percentage.
model_path = f"..//models/AppleClassifyer_{acc}"

with open(model_path, 'wb') as f:
        pkl.dump(net, f)