In [1]:
import torch

import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import EMNIST
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
import numpy as np
import os

In [2]:
# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transforms
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),  # Flip the image horizontally with 100% probability
    transforms.RandomRotation((90, 90)),  # Rotate 90 degrees anti-clockwise
    transforms.Resize((10, 10)),  # Resize to 10x10
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [3]:
# Load the dataset
train_dataset = EMNIST(root='data/', split='balanced', train=True, download=True, transform=transform)
test_dataset = EMNIST(root='data/', split='balanced', train=False, download=True, transform=transform)

In [4]:
# Split the training dataset into training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

In [5]:
# Define data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

classes = train_dataset.dataset.classes

In [7]:
# Define a simple CNN architecture for 10x10 images
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 2 * 2, 128)  # Adjusted for 10x10 input size
        self.fc2 = nn.Linear(128, 47)  # 47 classes in the 'balanced' split

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 2 * 2)  # Adjusted for 10x10 input size
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN().to(device)
device

device(type='cuda')

In [8]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
# Function to train the model
def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0
        running_total = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            running_total += labels.size(0)
            running_corrects += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = 100 * running_corrects / running_total
        train_losses.append(train_loss)

        model.eval()
        correct = 0
        total = 0
        val_loss = 0.0
        with torch.no_grad():
            for data in val_loader:
                images, labels = data
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                val_loss += criterion(outputs, labels).item() * labels.size(0)

        val_accuracy = 100 * correct / total
        val_loss /= len(val_loader.dataset)
        val_losses.append(val_loss)

        print(f'Epoch {epoch + 1}, Train Acc: {train_accuracy:.3f}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}, Val Acc: {val_accuracy:.3f}')

    print('Finished Training')
    return train_losses, val_losses

In [12]:
# Train the model
# train_losses, val_losses = train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10)
model.load_state_dict(torch.load('model4_10x10.pth', weights_only=True))


<All keys matched successfully>

In [13]:
# Save the model
# torch.save(model.state_dict(), "model4_10x10.pth")

In [14]:
# Function to preprocess the image
def preprocess_image(image_path, invert=False, resize=True, show_output=True):
    img = Image.open(image_path).convert('L')

    if invert:
        img = ImageOps.invert(img)

        if show_output:
            plt.figure()
            plt.title("Inverted Grayscale Image")
            plt.imshow(img, cmap='gray')
            plt.axis('off')
            plt.show()

    if resize:
        img = img.resize((10, 10))

        if show_output:
            plt.figure()
            plt.title("Resized Image (10x10)")
            plt.imshow(img, cmap='gray')
            plt.axis('off')
            plt.show()

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    img_tensor = transform(img)
    img_tensor = img_tensor.unsqueeze(0)

    return img, img_tensor

model.eval()

SimpleCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=256, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=47, bias=True)
)

In [15]:
def predict_image(model, image_tensor):
    with torch.no_grad():
        outputs = model(image_tensor.to(device))
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        _, predicted = torch.max(outputs.data, 1)
        return predicted.item(), probabilities.squeeze().cpu().numpy()

In [16]:
def create_output_dirs(output_dir):
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    for output_class in classes:
        upper_class = output_class.upper()
        dir_name = os.path.join(output_dir, upper_class)
        if not os.path.isdir(dir_name):
            os.makedirs(dir_name)

In [17]:
def get_image_output(image_path, output_dir, save_image=False, show_output=True):
    create_output_dirs(output_dir)

    processed_image, image_tensor = preprocess_image(image_path, show_output=show_output)

    predicted_label, probabilities = predict_image(model, image_tensor)
    upper_class_label = classes[predicted_label].upper()

    if save_image:
        output_name = os.path.basename(image_path)
        processed_image.save(os.path.join(output_dir, upper_class_label, output_name))

    if show_output:
        print(f'Predicted Label: {classes[predicted_label]} (Uppercase: {upper_class_label})')

        top5_probabilities_indices = np.argsort(probabilities)[-5:]
        top5_probabilities_indices = top5_probabilities_indices[::-1]

        print('Top 5 Probabilities:')
        for idx in top5_probabilities_indices:
            print(f'{classes[idx]}: {probabilities[idx]:.4f}')

In [18]:
image_folder = "open1"
output_folder = 'open1_10x10'
for img_filename in os.listdir(image_folder):
    input_img = os.path.join(image_folder, img_filename)
    get_image_output(image_path=input_img, output_dir=output_folder, save_image=True, show_output=False)

FileNotFoundError: [Errno 2] No such file or directory: 'open1'

In [17]:
!zip -r open1_10x10.zip /content/open1_10x10

  adding: content/open1_10x10/ (stored 0%)
  adding: content/open1_10x10/B/ (stored 0%)
  adding: content/open1_10x10/B/open1.jpeg_96.png (deflated 6%)
  adding: content/open1_10x10/B/open1.jpeg_113.png (deflated 6%)
  adding: content/open1_10x10/B/open1.jpeg_188.png (deflated 1%)
  adding: content/open1_10x10/B/open1.jpeg_85.png (deflated 6%)
  adding: content/open1_10x10/B/open1.jpeg_90.png (deflated 9%)
  adding: content/open1_10x10/Y/ (stored 0%)
  adding: content/open1_10x10/Y/open1.jpeg_160.png (deflated 1%)
  adding: content/open1_10x10/R/ (stored 0%)
  adding: content/open1_10x10/R/open1.jpeg_28.png (deflated 7%)
  adding: content/open1_10x10/R/open1.jpeg_120.png (deflated 7%)
  adding: content/open1_10x10/R/open1.jpeg_74.png (stored 0%)
  adding: content/open1_10x10/R/open1.jpeg_48.png (deflated 8%)
  adding: content/open1_10x10/R/open1.jpeg_205.png (deflated 12%)
  adding: content/open1_10x10/R/open1.jpeg_105.png (deflated 8%)
  adding: content/open1_10x10/R/open1.jpeg_50.png

In [18]:
from google.colab import files
files.download("open1_10x10.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Testing Ground

In [19]:
from collections import defaultdict

In [20]:
def load_dataset(folder, classes=None):
    if classes is None:
        classes = [dir for dir in os.listdir(folder) if os.path.isdir(os.path.join(folder, dir))]
        
    dataset = defaultdict(list)
    for img_class in classes:
        class_folder = os.path.join(folder, img_class)
        for img_name in os.listdir(class_folder):
            img_path = os.path.join(class_folder, img_name)
            dataset[img_class].append(img_path)
            
    return dataset

In [21]:
def get_total_accuracy(dataset, model):
    correct = 0
    total = 0
    for img_class, img_paths in dataset.items():
        for img_path in img_paths:
            img, img_tensor = preprocess_image(img_path, show_output=False)
            predicted_label, _ = predict_image(model, img_tensor)
            if img_class == classes[predicted_label]:
                correct += 1
            total += 1
    return correct / total 

In [22]:
def get_mean_probability_per_class(dataset, model):
    predicition_probabilities = defaultdict(list)
    for img_class, img_paths in dataset.items():
        for img_path in img_paths:
            img, img_tensor = preprocess_image(img_path, show_output=False)
            _, probabilities = predict_image(model, img_tensor)
            prob_correct_class = probabilities[classes.index(img_class)]
            predicition_probabilities[img_class].append(prob_correct_class)
            
    mean_probabilities = {}
    for img_class, probabilities in predicition_probabilities.items():
        mean_probabilities[img_class] = np.mean(probabilities)
    
    return mean_probabilities


In [23]:
# For each class, get the true positive count, false positive count, and false negative count
def get_image_statistics(parent_folder, images_names, model, classes):
    resultant_stats = {}

    for image_name in images_names:
        data_stats = defaultdict(dict, {class_name: {'tp': 0, 'fp': 0, 'fn': 0} for class_name in classes})

        dataset_folder = os.path.join(parent_folder, image_name, 'letters')
        dataset = load_dataset(dataset_folder, classes)
        
        for true_class, img_paths in dataset.items():
            for img_path in img_paths:
                _, img_tensor = preprocess_image(img_path, show_output=False)
                predicted_label, _ = predict_image(model, img_tensor)
                predicted_class = classes[predicted_label]
                
                if predicted_class == true_class:
                    data_stats[true_class]['tp'] += 1
                else:
                    data_stats[true_class]['fn'] += 1
                    data_stats[predicted_class]['fp'] += 1
        resultant_stats[image_name] = data_stats
    
    return resultant_stats

In [24]:

parent_folder = '/home/yilliee/ArhamSoft/Contour_NN_test'
images_folder = os.path.join(parent_folder, 'test_images')
images_names = os.listdir(images_folder)

accuracy_data = {}
probabilities_data = {}

for image_name in images_names:
    dataset_folder = os.path.join(parent_folder, image_name, 'letters')
    dataset = load_dataset(dataset_folder, classes)
    
    accuracy = get_total_accuracy(dataset, model) * 100
    accuracy_data[image_name] = accuracy
        
    mean_prob = get_mean_probability_per_class(dataset, model)
    probabilities_data[image_name] = mean_prob
    
    print(f'For {image_name}: Accuracy : {accuracy:.4f} | Mean Probability of correct_class : {np.mean(list(mean_prob.values())):.4f}')

mean_accuracy = np.mean(list(accuracy_data.values()))
print(f'\nMean accuracy: {mean_accuracy:.4f}')

NameError: name 'get_accuracy' is not defined