In [6]:
# Import necessary modules for defining the neural network architecture and data transformations
import torch.nn as nn
from torchvision.transforms import transforms

# Import the pathlib module for working with file paths
import pathlib

In [7]:
# Define the root path where the training dataset is located
root = pathlib.Path("Dataset/Train")

# Extract and sort the class or category names based on subdirectories within the root directory
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])

# Print the sorted list of class or category names
print(classes)

['None', 'Paper', 'Rock', 'Scissor']


In [8]:
# Define a series of transformations for converting images to tensors
transform = transforms.Compose([
    transforms.Resize((320, 320)),  # Resize the image to 320x320 pixels
    transforms.ToTensor(),          # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the tensor
])

# Define a series of transformations for converting tensors to PIL images
transform_PIL = transforms.Compose([
    transforms.ToPILImage(),        # Convert the tensor to a PIL image
    transforms.Resize((320, 320)),  # Resize the PIL image to 320x320 pixels
    transforms.ToTensor(),          # Convert the PIL image back to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the tensor
])


In [9]:
# Define a custom CNN model for image classification
class ConvNet(nn.Module):
    def __init__(self, num_classes=len(classes)):
        super(ConvNet, self).__init__()
        
        # Define the first convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        # Output shape after this layer: (batch_size, 12, 150, 150)
        
        # Batch normalization for the first convolutional layer
        self.bn1 = nn.BatchNorm2d(num_features=12)
        
        # ReLU activation function after the first convolutional layer
        self.relu1 = nn.ReLU()
        
        # Max-pooling layer to reduce the image size by a factor of 2
        self.pool = nn.MaxPool2d(kernel_size=2)
        # Output shape after max-pooling: (batch_size, 12, 75, 75)
        
        # Define the second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        # Output shape after this layer: (batch_size, 20, 75, 75)
        
        # ReLU activation function after the second convolutional layer
        self.relu2 = nn.ReLU()
        
        # Define the third convolutional layer
        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        # Output shape after this layer: (batch_size, 32, 75, 75)
        
        # Batch normalization for the third convolutional layer
        self.bn3 = nn.BatchNorm2d(num_features=32)
        
        # ReLU activation function after the third convolutional layer
        self.relu3 = nn.ReLU()
        
        # Fully connected (dense) layer for classification
        self.fc = nn.Linear(in_features=32 * 160 * 160, out_features=num_classes)
           
    def forward(self, input):
        # Forward pass through the first convolutional layer
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        
        # Apply max-pooling to reduce spatial dimensions
        output = self.pool(output)
        
        # Forward pass through the second convolutional layer
        output = self.conv2(output)
        output = self.relu2(output)
        
        # Forward pass through the third convolutional layer
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        
        # Reshape the output to be ready for the fully connected layer
        output = output.view(-1, 32 * 160 * 160)
        
        # Forward pass through the fully connected layer for classification
        output = self.fc(output)
        
        return output

In [10]:
# Create an instance of the ConvNet model with the number of output classes
model = ConvNet(num_classes=len(classes))