In [15]:
import torch
import torch.nn as nn
# import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import EMNIST
# from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
import numpy as np
# import random
import os

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [17]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),  # Flip the image horizontally with 100% probability
    transforms.RandomRotation((90, 90)),  # Rotate 90 degrees anti-clockwise
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [18]:
classes = EMNIST(root='data/', split='bymerge', train=True, download=True, transform=transform).classes
print(classes)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'd', 'e', 'f', 'g', 'h', 'n', 'q', 'r', 't']


In [19]:
# Function to display an image
def imshow(img, label):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)), cmap='gray')
    plt.title(f'Label: {label}')
    plt.axis('off')
    plt.show()

# # Select 3 random images from the dataset
# random_indices = random.sample(range(len(train_dataset)), 5)

# for idx in random_indices:
#     img, label = train_dataset[idx]
#     imshow(img, classes[label])


In [20]:

validation_split = 0.1
shuffle_dataset=True
random_seed=42

In [21]:
# Define a simple CNN architecture
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 47)  # 47 classes in the 'byclass' split

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [22]:
model = SimpleCNN()
# Run code from here to use pretrained weights
model.load_state_dict(torch.load("./model3_weights.pth"))

# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

SimpleCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=47, bias=True)
)

In [23]:
# Function to preprocess the image
def preprocess_image(image_path, invert=False, resize=True, show_output=True):
    # Open the image file
    img = Image.open(image_path).convert('L')  # Convert to grayscale ('L' mode)

    if invert:
        # Invert the colors
        img = ImageOps.invert(img)

        if show_output:
            # Display the original grayscale image
            plt.figure()
            plt.title("Inverted Grayscale Image")
            plt.imshow(img, cmap='gray')
            plt.axis('off')
            plt.show()

    if resize:
        # Resize the image to 28x28
        img = img.resize((28, 28))

        if show_output:
            # Display the resized image
            plt.figure()
            plt.title("Resized Image (28x28)")
            plt.imshow(img, cmap='gray')
            plt.axis('off')
            plt.show()

    # Define the transformations: convert to tensor and normalize
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Apply the transformations
    img_tensor = transform(img)

    # Add batch dimension (1, 1, 28, 28)
    img_tensor = img_tensor.unsqueeze(0)

    return img, img_tensor

In [24]:
model.eval()  # Set the model to evaluation mode

# Function to predict the label of the processed image
def predict_image(model, image_tensor):
    with torch.no_grad():
        outputs = model(image_tensor.to(device))
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        _, predicted = torch.max(outputs.data, 1)
        return predicted.item(), probabilities.squeeze().cpu().numpy()

In [25]:
def create_output_dirs(output_dir):
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    for output_class in classes:
        dir_name = os.path.join(output_dir, output_class)
        if not os.path.isdir(dir_name):
            os.makedirs(dir_name)

# Example usage
def get_image_output(image_path, output_dir, save_image=False, show_output=True):
    create_output_dirs(output_dir)
    
    processed_image, image_tensor = preprocess_image(image_path, show_output=show_output)  # Process the image
    

    predicted_label, probabilities = predict_image(model, image_tensor)

    if save_image:
        output_name = os.path.basename(image_path)
        # Save the processed image
        processed_image.save(os.path.join(output_dir, classes[predicted_label], output_name))
    # Save the processed image
    
    
    if show_output:
        # Show the prediction
        print(f'Predicted Label: {classes[predicted_label]}')

        # Get the top 5 probabilities and their indices
        top5_probabilities_indices = np.argsort(probabilities)[-5:]  # Get indices of top 5 probabilities
        top5_probabilities_indices = top5_probabilities_indices[::-1]  # Reverse to get descending order

        # Print the probabilities for all classes
        print('Top 5 Probabilities:')
        for idx in top5_probabilities_indices:
            print(f'{classes[idx]}: {probabilities[idx]:.4f}')

## Testing ground

In [68]:
from collections import defaultdict

In [69]:
def load_dataset(folder, classes=None):
    if classes is None:
        classes = [dir for dir in os.listdir(folder) if os.path.isdir(os.path.join(folder, dir))]
        
    dataset = defaultdict(list)
    for img_class in classes:
        class_folder = os.path.join(folder, img_class)
        for img_name in os.listdir(class_folder):
            img_path = os.path.join(class_folder, img_name)
            dataset[img_class].append(img_path)
            
    return dataset

In [70]:
def get_total_accuracy(dataset, model):
    correct = 0
    total = 0
    for img_class, img_paths in dataset.items():
        for img_path in img_paths:
            img, img_tensor = preprocess_image(img_path, show_output=False)
            predicted_label, _ = predict_image(model, img_tensor)
            if img_class == classes[predicted_label]:
                correct += 1
            total += 1
    return correct / total 

In [71]:
def get_mean_probability_per_class(dataset, model):
    predicition_probabilities = defaultdict(list)
    for img_class, img_paths in dataset.items():
        for img_path in img_paths:
            img, img_tensor = preprocess_image(img_path, show_output=False)
            _, probabilities = predict_image(model, img_tensor)
            prob_correct_class = probabilities[classes.index(img_class)]
            predicition_probabilities[img_class].append(prob_correct_class)
            
    mean_probabilities = {}
    for img_class, probabilities in predicition_probabilities.items():
        mean_probabilities[img_class] = np.mean(probabilities)
    
    return mean_probabilities


In [135]:
# For each class, get the true positive count, false positive count, and false negative count
def get_image_statistics(parent_folder, images_names, model, classes):
    resultant_stats = {}

    for image_name in images_names:
        data_stats = defaultdict(dict, {class_name: {'tp': 0, 'fp': 0, 'fn': 0} for class_name in classes})

        dataset_folder = os.path.join(parent_folder, image_name, 'letters')
        dataset = load_dataset(dataset_folder, classes)
        
        for true_class, img_paths in dataset.items():
            for img_path in img_paths:
                _, img_tensor = preprocess_image(img_path, show_output=False)
                predicted_label, _ = predict_image(model, img_tensor)
                predicted_class = classes[predicted_label]
                
                if predicted_class == true_class:
                    data_stats[true_class]['tp'] += 1
                else:
                    data_stats[true_class]['fn'] += 1
                    data_stats[predicted_class]['fp'] += 1
        resultant_stats[image_name] = data_stats
    
    return resultant_stats

### Test accuracy & probability

In [80]:

parent_folder = '/home/yilliee/ArhamSoft/Contour_NN_test'
images_folder = os.path.join(parent_folder, 'test_images')
images_names = os.listdir(images_folder)

accuracy_data = {}
probabilities_data = {}

for image_name in images_names:
    dataset_folder = os.path.join(parent_folder, image_name, 'letters')
    dataset = load_dataset(dataset_folder, classes)
    
    accuracy = get_total_accuracy(dataset, model) * 100
    accuracy_data[image_name] = accuracy
        
    mean_prob = get_mean_probability_per_class(dataset, model)
    probabilities_data[image_name] = mean_prob
    
    print(f'For {image_name}: Accuracy : {accuracy:.4f} | Mean Probability of correct_class : {np.mean(list(mean_prob.values())):.4f}')

mean_accuracy = np.mean(list(accuracy_data.values()))
print(f'\nMean accuracy: {mean_accuracy:.4f}')

For open2.jpeg: Accuracy : 71.5596 | Mean Probability of correct_class : 0.7741
For sample1.jpeg: Accuracy : 73.7968 | Mean Probability of correct_class : 0.7330
For blank.jpeg: Accuracy : 64.2105 | Mean Probability of correct_class : 0.6802
For open1.jpeg: Accuracy : 69.6809 | Mean Probability of correct_class : 0.7406

Mean accuracy: 69.8120


In [136]:
per_image_stats = get_image_statistics(parent_folder, images_names, model, classes)

In [83]:
import pandas as pd

In [137]:
with pd.ExcelWriter('model3_stats.xlsx') as writer:
    for image_name, image_stats in per_image_stats.items():
        df = pd.DataFrame(per_image_stats[image_name]).T
        df.to_excel(excel_writer=writer, sheet_name=image_name, index=True)