# Terragon.de Image Classifier (TIC-Torch)
### with a PyTorch Convolutional Neural Network

## Why TIC-Torch?

The TIC-Torch workflow offers a convenient, fast and effective workflow to create, train and deploy individual and versatile image classifiers.

## What's the workflow?

1. Collect many jpg files of the objects you want to classify and arrange them in the "raw" directory. Create one subfolder for each object you want to classify and put the images inside. Example:
    - raw/flowers
    - raw/houses
    - raw/sea
3. Run TIC-Torch to resize and create variations of the images to train the Neural Network.
4. Run TIC-Torch to classify a new jpg.


# Import Python libraries for OS, PyTorch and Image manipulation

In [None]:
# System Libraries

# Standard Python libraries for operating system and system-related functionalities.
import os, sys 
# A function from distutils.dir_util to recursively copy directories.
from distutils.dir_util import copy_tree 
# Classes and functions from the Python Imaging Library (PIL) for working with images and image processing.
from PIL import Image, ImageEnhance 
import PIL.ImageOps
# module for generating random numbers.
import random 

# PyTorch Libraries
# Core libraries of the PyTorch framework.
import torch 
import torchvision
# Module from torchvision for data transformations on images.
import torchvision.transforms as transforms 
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
# Modules from PyTorch for defining neural networks, optimization, autograd, and functional operations.
import torch.nn.functional as F 

# Other Libraries we'll use
# A library for numerical computing in Python.
import numpy as np 
# A library for creating visualizations and plots.
import matplotlib.pyplot as plt 
# a magic command in Jupyter notebooks that ensures that plots are displayed within the notebook itself.
%matplotlib inline 

print("Libraries imported - ready to use PyTorch", torch.__version__)

# Define Variables

In [None]:
# Path to folder where you put your source images
path_images_raw = "raw/" 
# Path where this script will place the prepared images for training
path_images_train = "train/" 

# we use 128 x 128 pixelssize for our training images
resize_x = 128 
resize_y = 128

# so many images will be trained in one batch
batch_size = 50 
# training images will not be feeded one after the other, but it will shuffle the order randomly
shuffle = True 

# how many epochs will be trained, An epoch is a complete pass through the entire training dataset
epochs = 20 

# list subdirs in raw/

In [None]:
# the number and names of your subdirs is read from the raw directory, to prepare your classifications
path_images_raw_subdirs = os.listdir(path_images_raw) 
print("raw/ subdirs: ",path_images_raw_subdirs)

# copy all images from raw/ to train/ 

In [None]:
# path for raw and train are put into function
def ImagesCopy(path_images_raw,path_images_train):
    # copy all files from raw to train directory
    copy_tree(path_images_raw, path_images_train) 
    # generate classes from subfolders in train directory (like above for raw directory)
    path_images_train_subdirs = os.listdir(path_images_train) 
    print("copy from raw to train/ folder. done.")
    print("train subdirs: ",path_images_train_subdirs)
# execute the above ImagesCopy() function
ImagesCopy(path_images_raw,path_images_train) 

# resize all images in train/ to neural network size 

In [None]:
def ImagesResize():
    # loop for all images in train directory
    for subdir, dirs, files in os.walk(path_images_train): 
        # sub-loop for all images in one subdirectory
        for file in files: 
            # path to a single image
            path_image = os.path.join(subdir, file) 
            # check if it is really a file
            if os.path.isfile(path_image): 
                # open that image
                im = Image.open(path_image) 
                # split the filename in filename f and extension e
                f, e = os.path.splitext(path_image) 
                # resize image to resolution given above (128x128)
                imResize = im.resize((resize_x,resize_y), Image.Resampling.LANCZOS) 
                # save resized image as file with the name f + "_resized.jpg"
                imResize.save(f + '_resized.jpg', 'JPEG', quality=90) 
                # delete original image from train folder (it remains in raw folder)
                os.remove(path_image) 
    print("resized all images in folder train/")
# execute the above ImagesResize() function
ImagesResize() 

# create variations of all images in train/ 

In [None]:
def ImageVariations():
    # loop for all images in train directory
    for subdir, dirs, files in os.walk(path_images_train): 
        # sub-loop for all images in one subdirectory
        for file in files: 
            # path to a single image
            path_image = os.path.join(subdir, file) 
            # check if it is really a file
            if os.path.isfile(path_image): 
                # open that image
                im = Image.open(path_image) 
                # split the filename in filename f and extension e
                f, e = os.path.splitext(path_image) 
                # Make image lighter
                enhancer = ImageEnhance.Brightness(im)
                imLighter = enhancer.enhance(1.8)
                # save resized image as file with the name f + "_lighter.jpg"
                imLighter.save(f + '_lighter.jpg', 'JPEG', quality=90) 
                # make image darker
                imDarker = enhancer.enhance(0.5)
                imDarker.save(f + '_darker.jpg', 'JPEG', quality=90)
                # invert image
                imInverted = PIL.ImageOps.invert(im)
                imInverted.save(f + '_inverted.jpg', 'JPEG', quality=90)
                # rotate n random degree between -45 and +45 degree
                randomRotate = random.randrange(-45,45)
                imRotated = im.rotate(randomRotate)
                imRotated.save(f + '_rotated.jpg', 'JPEG', quality=90)
                # mirror image
                imMirror = im.transpose(Image.FLIP_LEFT_RIGHT)
                imMirror.save(f + '_mirrored.jpg', 'JPEG', quality=90)
                # make image grayscale
                imGrey = im.convert('L')
                imGrey.save(f + '_greyscale.jpg', 'JPEG', quality=90)
    print("added a mirror and greyscale images for all images in folder train/")
# execute the above ImageVariations() function
ImageVariations()

# Transfer all Images to Tensors

In [None]:
# Function to ingest data using training and test loaders
# Now load the images from the train folder

def load_dataset(path_images_train): # path to te train directory with all the image variations
    # Load all of the images
    # this will be used with torchvision, Composes several transforms together, https://pytorch.org/vision/main/generated/torchvision.transforms.Compose.html
    transformation = transforms.Compose([ 
        # transform imagefile to tensors
        transforms.ToTensor(), 
        # Normalize the pixel values (in R, G, and B channels) , 
        # Normalize a tensor image with mean and standard deviation, https://pytorch.org/vision/main/generated/torchvision.transforms.Normalize.html
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) 
    ])

    # Load all of the images, transforming them
    full_dataset = torchvision.datasets.ImageFolder( 
            # use torchvision "ImageFolder"
            #A generic data loader where the images are arranged in this way by default:
            #root/dog/xxx.png
            #root/dog/xxy.png
            #root/dog/[...]/xxz.png

            #root/cat/123.png
            #root/cat/nsdf3.png
            #root/cat/[...]/asd932_.png
        root=path_images_train,       # give the path to train folder
        transform=transformation      # apply the "transformation" function above, for every image
    )
    
    
    # Split into training (70% and testing (30%) datasets)
    train_size = int(0.7 * len(full_dataset))      # take the length of "full_dataset" from above and take 0.7 = 70% as a int
    test_size = len(full_dataset) - train_size     # take len of "full_dataset" - the int of train_size
    # now divide the "full_dataset" with the two ints in "train_dataset" and "test_dataset" in random order
    train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) 
    
    # define a loader for the training data we can iterate through in 50-image batches
    train_loader = torch.utils.data.DataLoader( # At the heart of PyTorch data loading utility is the torch.utils.data.DataLoader class. 
                                                # It represents a Python iterable over a dataset, with support for
                                                # map-style and iterable-style datasets,
                                                # customizing data loading order,
                                                # automatic batching,
                                                # single- and multi-process data loading,
                                                # automatic memory pinning.
                                        #DataLoader(dataset, batch_size=1, shuffle=False, sampler=None,
                                           #batch_sampler=None, num_workers=0, collate_fn=None,
                                           #pin_memory=False, drop_last=False, timeout=0,
                                           #worker_init_fn=None, *, prefetch_factor=2,
                                           # persistent_workers=False)
        train_dataset,               # from above
        batch_size=batch_size,       # from above
        num_workers=0,               # Num_workers tells the data loader instance how many sub-processes to use for data loading. 
                                     # If the num_worker is zero (default) the GPU has to weight for CPU to load data
        shuffle=shuffle              # here we shuffle the order of training images again 
    )
    
    # define a loader for the testing data (like "train_loader" before)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=batch_size,
        num_workers=0,
        shuffle=shuffle
    )
        
    return train_loader, test_loader    # returns the "train_loader" and "test_loader"


# Get the class names
classes = os.listdir(path_images_train)      # imports all the folder names in the train folder and puts them into classes
classes.sort()                               # sorts the classes by name
classes_amount = len(classes)                # count the classes
print(len(classes), 'classes are found in your train folder:')
print(classes)

# Get the iterative dataloaders for test and training data
# starts the load_dataset() function from above
train_loader, test_loader = load_dataset(path_images_train)          

# Show the shape and content of the train_loader
print("Shape of the train_loader and test_loader (images per batch, RGB channels, width, height):")
for epoch in range(1):                            # range is the number of epochs that will be shown, for example 1 epoch
    for i, data in enumerate(train_loader, 0):    # enumerate() is a built-in function that allows you to iterate over a sequence (train_loader)
                                                  # (like a list, tuple, or iterator) while keeping track of both the index and the element in the sequence. 
                                                  # The second argument you provide to enumerate() is a starting value for the index.
                                                  # train_loader is enumerated into "data"
        # get the inputs
        inputs, labels = data                     # data is split into inputs (image data) and labels (classes)
        inputs = np.array(inputs)                 # inputs (image data) is converted to numpy array
        print(inputs.shape)                       # show shape of inputs
        # simulate the training process
        # print(f'Epoch: {i} | Inputs {inputs} | Labels {labels}')      # prints the content of all tensors (many numbers...)
                                                                        # In the end there is also a "Labels tensor", with the classes
        

# Define the Neural Net Model

In [None]:
# Create a neural net class
# When you define a class that inherits from nn.Module, you are creating a custom neural network architecture. 
# This class allows you to define the structure of your neural network, including its layers and operations.
class Net(nn.Module): 
    # Constructor
    def __init__(self, num_classes=classes_amount):
        super(Net, self).__init__()
        
        # Our images are RGB, so input channels = 3. wW'll apply 12 filters in the first convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)

        # self.conv1: creating an instance variable named conv1 within the class instance. 
        # The self keyword refers to the instance of the class you are defining, and this line of code is creating a convolutional layer and 
        # assigning it to this instance variable.
        # nn.Conv2d: This is the class in PyTorch's nn (neural network) module that represents a 2-dimensional convolutional layer.
        # in_channels=3: specifies the number of input channels for the convolutional layer. 
        # 3 for RGB color images where each channel represents red, green, and blue.
        # out_channels=12: the layer will produce 12 different feature maps as output.
        # kernel_size=3: size of the convolutional kernel (filter) that will slide over the input data. In this case, it's a 3x3 kernel.
        # stride=1: the stride of the convolution operation. A stride of 1 means that the kernel moves one pixel at a time.
        # padding=1: This specifies the amount of zero-padding to be added to the input data before applying the convolution. 
        # Padding helps control the spatial dimensions of the output. A padding of 1 means that a border of one pixel of zeros will be added around the input.
        
        # We'll apply max pooling with a kernel size of 2
        self.pool = nn.MaxPool2d(kernel_size=2)

        # self.pool: This is creating an instance variable named pool within the class instance. 
        # The self keyword refers to the instance of the class you are defining, and this line of code is creating a 
        # max pooling layer and assigning it to this instance variable.
        # nn.MaxPool2d: This is the class in PyTorch's nn (neural network) module that represents a 2-dimensional max pooling layer.
        # kernel_size=2: This specifies the size of the pooling window. In this case, it's a 2x2 window. Max pooling takes the 
        # maximum value from the elements within the window.
        
        # A second convolutional layer takes 12 input channels, and generates 12 outputs
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)

        # stride=1: This specifies the stride of the convolution operation. 
        # It determines the step size at which the kernel moves over the input data. 
        # A stride of 1 means that the kernel moves one pixel at a time.
        # padding=1: This specifies the amount of zero-padding to be added to the input data before applying the convolution. 
        # Padding helps control the spatial dimensions of the output. 
        # A padding of 1 means that a border of one pixel of zeros will be added around the input.
        
        # A third convolutional layer takes 12 inputs and generates 24 outputs
        self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
        
        # A drop layer deletes 20% of the features to help prevent overfitting
        self.drop = nn.Dropout2d(p=0.2)

        # nn.Dropout2d: This is the class in PyTorch's nn (neural network) module that represents a 2D dropout layer. 
        # Dropout is a regularization technique used during training to prevent overfitting by 
        # randomly "dropping out" (setting to zero) a fraction of the input units.
        # p=0.2: This specifies the probability of dropping out a unit. In this case, it's set to 0.2, 
        # meaning that during training, each element in the input tensor will have a 20% chance of being set to zero.
        
        # Our 128x128 image tensors will be pooled twice with a kernel size of 2. 128/2/2 is 32.
        # So our feature tensors are now 32 x 32, and we've generated 24 of them
        # We need to flatten these to map them to  the probability for each class
        self.fc = nn.Linear(in_features=32 * 32 * 24, out_features=num_classes)

        #in_features=32 * 32 * 24: This specifies the number of input features (neurons) to the fully connected layer. 
        # The value 32 * 32 * 24 represents the flattened size of the input feature map. It appears to be assuming a 32x32 feature map with 24 channels.
        # out_features=num_classes: This specifies the number of output features (neurons) from the fully connected layer. 
        # In the context of classification, this typically corresponds to the number of classes in your classification problem. 
        # The variable num_classes should be defined earlier in your code to indicate the number of classes.

    def forward(self, x):
        # Use a relu activation function after layer 1 (convolution 1 and pool)
        x = F.relu(self.pool(self.conv1(x)))
        # Use a relu activation function after layer 1 (convolution 2 and drop)

        # self.conv1(x): This applies the first convolutional layer (conv1) to the input tensor x. 
        # It convolves the input tensor with the learnable filters of the convolutional layer, extracting features from the input. 
        # The output of this operation will be a tensor containing the feature maps produced by the convolution.
        # self.pool(...): This applies a pooling operation to the tensor. In this case, self.pool likely refers to a 
        # max pooling layer (MaxPool2d) that was defined earlier in the neural network architecture. 
        # Pooling helps downsample the feature maps while retaining important information. 
        # It's common to use pooling to reduce the spatial dimensions of the data and control the number of parameters in the network.
        # F.relu(...): This applies the Rectified Linear Unit (ReLU) activation function to the tensor. 
        # ReLU is a non-linear activation function that replaces negative values with zero while leaving positive values unchanged.
        # It introduces non-linearity into the network and helps the network learn complex relationships in the data.
        
        # Use a relu activation function after layer 3 (convolution 3)
        x = F.relu(self.pool(self.conv2(x)))
        
        # Drop some features after the 3rd convolution to prevent overfitting
        x = F.relu(self.drop(self.conv3(x)))
        # Only drop the features if this is a training pass
        x = F.dropout(x, training=self.training)

        # x = F.relu(self.drop(self.conv3(x))):
        # self.conv3(x): Applies the third convolutional layer (conv3) to the input tensor x.
        # self.drop(...): Applies dropout to the tensor. self.drop likely refers to a dropout layer (Dropout2d) defined earlier in the network.
        # F.relu(...): Applies the ReLU activation function to the tensor, introducing non-linearity.
        # So, this line first applies a convolutional layer, then dropout, and finally the ReLU activation function to the feature maps in sequence.

        # x = F.dropout(x, training=self.training):
        # F.dropout(...): Applies dropout using the F.dropout function from the torch.nn.functional module. 
        # This function is used to apply dropout to the input tensor x.
        # x: The tensor on which the dropout is being applied.
        # training=self.training: The self.training attribute is a boolean value that indicates whether the model 
        # is in training mode or evaluation mode. Dropout is only applied during training, not during evaluation or inference.
        
        # Flatten
        x = x.view(-1, 32 * 32 * 24)
        x = self.fc(x)

        # x = x.view(-1, 32 * 32 * 24):
        # x: The input tensor, likely containing the feature maps from the previous layers.
        # .view(...): This method reshapes the tensor. The -1 argument indicates that the size of that dimension 
        # should be inferred to maintain the total number of elements in the tensor. 
        # The second argument 32 * 32 * 24 specifies the desired size of the other dimension, which corresponds to the flattened size of the feature maps.
        # This line is reshaping the tensor x from its current shape to a 2D tensor where each row represents a flattened feature map.

        # x = self.fc(x):
        # self.fc: A fully connected (fc, dense) layer defined earlier in the network.
        # x: The reshaped tensor from the previous line.
        # This line applies the fully connected layer (fc) to the reshaped tensor x. 
        # The fully connected layer performs a linear transformation on the input data and produces an output tensor suitable for classification.

        # Return class probabilities via a softmax function 
        return F.log_softmax(x, dim=1)

        # F.log_softmax(...): computes the log of the softmax of the input tensor along a specified dimension. 
        # it produces normalized log probabilities for each class.
        # x: The input tensor to which the log softmax function is applied. 
        # This is typically the output of the fully connected layer or the final layer of your neural network.
        # dim=1: This specifies the dimension along which the log softmax operation is performed. 
        # In most cases, this is set to 1, which corresponds to the class dimension in a classification problem.
    
print("CNN model class defined!")

net = Net() # create a net copy to print the structure
print(net)

# Train and Test the model

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    # model: The neural network model that will be trained.
    # device: The device (CPU or GPU) on which the training will be performed.
    # train_loader: A data loader that provides batches of training data.
    # optimizer: The optimization algorithm used for updating the model's parameters.
    # epoch: The current epoch number.
    
    # Set the model to training mode
    model.train() # Sets the model in training mode. This is necessary to activate certain behaviors like dropout and batch normalization during training.
    train_loss = 0 # Initializes the running total of the training loss and prints the current epoch number.
    print("Epoch:", epoch)
    # Process the images in batches
    #This loop iterates through batches of training data, where each batch consists of data (input images) and their corresponding target labels.
    for batch_idx, (data, target) in enumerate(train_loader): 
        # Transfers both the input data and target labels to the specified device (CPU or GPU).
        data, target = data.to(device), target.to(device)
        # Reset the optimizer, Resets the gradients of the model's parameters. This is necessary before performing backpropagation.
        optimizer.zero_grad()
        # Push the data forward through the model layers, Passes the input data through the model's layers to obtain the predicted output.
        output = model(data)
        # Get the loss, Computes the loss between the predicted output and the actual target labels.
        loss = loss_criteria(output, target)
        # Keep a running total, Accumulates the loss for the current batch to the running total.
        train_loss += loss.item()
        # Backpropagate
        loss.backward() # Computes gradients of the loss with respect to the model's parameters using backpropagation.
        optimizer.step() # Updates the model's parameters using the computed gradients.
        # Print metrics for every 10 batches so we see some progress, showing the batch number, total number of batches, 
        # progress percentage, and the current batch's loss.
        if batch_idx % 10 == 0:
            print('Training set [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
                batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    # return average loss for the epoch
    #Returns the average loss for the entire epoch, calculated by dividing the accumulated loss by the total number of training samples.
    return train_loss / len(train_loader.dataset) 
            
            
def test(model, device, test_loader):
    # Switch the model to evaluation mode (so we don't backpropagate or drop), 
    # This disables certain behaviors like dropout and batch normalization, ensuring consistent evaluation results.
    model.eval()
    test_loss = 0    # Initializes the running total of the test loss and the count of correct predictions.
    correct = 0
    # This loop iterates through batches of testing data, where each batch consists of data (input images) and their corresponding target labels.
    with torch.no_grad(): 
        for data, target in test_loader:                               # disables gradient computation, reducing memory usage and speeding up inference.
            data, target = data.to(device), target.to(device)          # Transfers both the input data and target labels to the specified device (CPU or GPU).
            # Get the predicted classes for this batch
            output = model(data)                                       # Passes the input data through the model's layers to obtain the predicted output.
            # calculate the loss and successful predictions for this batch
            test_loss += loss_criteria(output, target).item()          # Accumulates the loss for the current batch to the running total.
            pred = output.max(1, keepdim=True)[1]                      # Predicted class labels obtained by finding the maximum value along dimension 1 of the output tensor.
            correct += pred.eq(target.view_as(pred)).sum().item()      # Counts the number of correct predictions in the current batch.

    # Calculate the average loss and total accuracy for this epoch
    test_loss /= len(test_loader.dataset) # Calculates the average test loss by dividing the accumulated loss by the total number of testing samples.
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset))) # Prints the average loss and accuracy percentage for the testing set.
    
    # return average loss for the epoch, useful for monitoring the model's performance over time.
    return test_loss
    
    
# Now use the train and test functions to train and test the model    

device = "cpu"
if (torch.cuda.is_available()):
    # if GPU available, use cuda (on a cpu, training will take a considerable length of time!)
    device = "cuda"
print('Training on', device)

# Create an instance of the model class and allocate it to the device
# creates an instance of the neural network model based on the Net class, sets the number of output classes based on the length of the classes list
model = Net(num_classes=len(classes)).to(device) 

# Use an "Adam" optimizer to adjust weights
# (see https://pytorch.org/docs/stable/optim.html#algorithms for details of supported algorithms)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# The learning rate controls the step size at each iteration during optimization. A smaller learning rate may lead to slower convergence 
# but more stable optimization, 
# while a larger learning rate may lead to faster convergence but could result in overshooting the optimal solution.

# Specify the loss criteria
loss_criteria = nn.CrossEntropyLoss()
# Cross-entropy loss is used for training classification models where each input sample belongs to one of multiple classes. 
# It combines both the softmax activation and the negative log likelihood loss into a single operation.

# Track metrics in these arrays
# By recording these loss values for each epoch, you can visualize how the model's performance improves over time 
# and detect any potential overfitting or convergence issues
epoch_nums = [] # store the epoch numbers during training
training_loss = [] 
validation_loss = [] # The validation loss provides an indication of how well the model generalizes to new, unseen data

# Train over the number of epochs defined above
epochs = epochs

# check if you already have a trained model, if yes, no training will happen, instead the saved model will be loaded 
if os.path.isfile("model.pt"):
    print("model.pt already exists and is now loaded, no training!")
    model = torch.load("model.pt")
else:
    for epoch in range(1, epochs + 1):
        # This loop iterates over the specified number of epochs. For each epoch, it performs the training by calling the 
        # train function and calculates the training loss. It also evaluates the model on the validation dataset using the 
        # test function and calculates the validation loss. The epoch number, training loss, and validation loss are then appended to the respective arrays.
        train_loss = train(model, device, train_loader, optimizer, epoch)
        test_loss = test(model, device, test_loader)
        epoch_nums.append(epoch)
        training_loss.append(train_loss)
        validation_loss.append(test_loss)
    %matplotlib inline
    from matplotlib import pyplot as plt

    # This code block uses Matplotlib to plot the training and validation loss curves over the epochs. 
    # It sets the X-axis as the epoch numbers and the Y-axis as the loss values. It then displays the plot with labeled axes and a legend.
    plt.plot(epoch_nums, training_loss)
    plt.plot(epoch_nums, validation_loss)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['training', 'validation'], loc='upper right')
    plt.show()

    # After the training loop, the script saves the trained model to a file named "model.pt" using the torch.save function. 
    # This allows you to load the trained model later for inference or further training without starting from scratch.
    torch.save(model, "model.pt")
    # Delete previous model.pt file if you want to train new!

# Evaluate the training - Confusion Matrix

In [None]:
#Pytorch doesn't have a built-in confusion matrix metric, so we'll use SciKit-Learn
from sklearn.metrics import confusion_matrix

# Set the model to evaluate mode
# ensures that dropout and batch normalization layers behave appropriately for inference. 
# Remember to call model.train() to switch the model back to training mode when you resume training
model.eval()

# Get predictions for the test data and convert to numpy arrays for use with SciKit-Learn
print("Getting predictions from test set...")
truelabels = [] # two lists that will store the true labels 
predictions = [] # two lists that will store the model's predicted labels, respectively.
for data, target in test_loader: # loops over the batches of data and target labels provided by the test_loader.
    for label in target.cpu().data.numpy():
        # In the first loop, it iterates through the true labels (target) in the current batch. 
        # The target tensor is converted to a NumPy array using .cpu().data.numpy(), 
        # which extracts the data from the tensor on the CPU and converts it to a NumPy array. Each true label is then appended to the truelabels list.
        truelabels.append(label)
    for prediction in model.cpu()(data).data.numpy().argmax(1):
        # In the second loop, it iterates through the data (data) in the current batch. 
        # It passes the data through the model for prediction. model.cpu() moves the model to the CPU (if it was on the GPU), 
        # and (data) applies the data to the model
        #  .data.numpy().argmax(1) converts the model's output tensor to a NumPy array and selects the index with the highest value along dimension 1, 
        # which corresponds to the predicted class label. Each predicted label is then appended to the predictions list.
        predictions.append(prediction) 

# Plot the confusion matrix, create and display a confusion matrix using the confusion_matrix function from scikit-learn, 
# along with some visualization using matplotlib
cm = confusion_matrix(truelabels, predictions)
# the confusion_matrix function from scikit-learn is used to compute the confusion matrix. It takes two arguments: 
# truelabels (the true labels of the samples) and predictions (the predicted labels by the model). 
# The resulting cm is a 2D array where each element (i, j) represents the number of samples that belong to class i and were predicted to be in class j.
# The interpolation="nearest" argument ensures that the cells are shown with sharp boundaries. 
# cmap=plt.cm.Greys specifies the colormap to use for visualization (in this case, a grayscale colormap).
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Greys) 
plt.colorbar() # serves as a visual guide for the color mapping of the confusion matrix values
# tick_marks is an array containing the indices of the classes. It's used to position the ticks on the x and y axes of the plot.
tick_marks = np.arange(len(classes)) 
# set the tick marks on the x-axis using the class names (stored in classes) and rotates them by 45 degrees for better readability.
plt.xticks(tick_marks, classes, rotation=45) 
plt.yticks(tick_marks, classes) # sets the tick marks on the y-axis using the class names
plt.xlabel("Predicted Shape")
plt.ylabel("True Shape") # set the labels for the x and y axes of the plot to describe the meaning of each axis
plt.show() # displays the complete plot with the confusion matrix and the associated visualizations

# Predict a new image

In [None]:
# Function to predict the class of an image
def predict_image(classifier, image):
    # takes a classifier model and an image as inputs and returns the predicted class index for the given image
    import numpy
    
    # Set the classifer model to evaluation mode
    classifier.eval() # Puts the classifier model into evaluation mode
    
    # Apply the same transformations as we did for the training images
    transformation = transforms.Compose([ # converts the image to a tensor and applies normalization
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])

    # Preprocess the image
    # make sure that the classifier model and the image are compatible in terms of input size and data type
    image_tensor = transformation(image).float() # uses transformation above to the image

    # Add an extra batch dimension since pytorch treats all inputs as batches
    # The preprocessed image tensor is expanded along a new batch dimension using unsqueeze_(0) to match the batch format expected by the model
    image_tensor = image_tensor.unsqueeze_(0)

    # Turn the input into a Variable
    # The input tensor is wrapped in a PyTorch Variable using Variable(image_tensor).
    input_features = Variable(image_tensor)

    # Predict the class of the image
    # The model is used to predict the class of the image by passing the input tensor through the classifier model using classifier(input_features).
    output = classifier(input_features)
    index = output.data.numpy().argmax() 
    # The output tensor is converted to a NumPy array using .data.numpy()
    # The index of the class with the highest score is extracted using .argmax().
    return index


#Now let's try it with a new image
from random import randint
from PIL import Image
import os, shutil
    
imgFile = Image.open("test.jpg") # opens the image you want to classify
imgFile = imgFile.resize((resize_x,resize_y), Image.Resampling.LANCZOS) # resizes to the models size given above

# Display the image
plt.imshow(imgFile) # show the image

# Call the predction function
index = predict_image(model, imgFile) # use model from above and imgFile just opened
print(classes[index]) # prints the class that had highest score in argmax above!

In [None]:
print("Visit http://www.Terragon.de")