In [1]:
from torchvision.datasets import EMNIST
from torchvision import transforms
from torch.utils.data import DataLoader

# Transformation: Convert to Tensor and normalize pixel values to [0, 1]
transform = transforms.Compose([
    transforms.ToTensor()
])

# Download the EMNIST dataset
train_dataset = EMNIST(root='./data', split='byclass', train=True, download=True, transform=transform)
test_dataset = EMNIST(root='./data', split='byclass', train=False, download=True, transform=transform)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Downloading https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip to ./data/EMNIST/raw/gzip.zip


100.0%


Extracting ./data/EMNIST/raw/gzip.zip to ./data/EMNIST/raw


In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.permute(0, 2, 1))  # Rotate to fix orientation
])


In [4]:
import torch.nn as nn
import torch.nn.functional as F

# Step 2: CNN Model Definition
class EMNISTCNN(nn.Module):
    def __init__(self, num_classes=62):
        super(EMNISTCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # Output: (32, 28, 28)
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # Output: (32, 14, 14)
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # Output: (64, 14, 14)
            nn.ReLU(),
            nn.MaxPool2d(2, 2)  # Output: (64, 7, 7)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [5]:
import torch

def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")

In [6]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f"Accuracy: {100 * correct / total:.2f}%")

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = EMNISTCNN(num_classes=62).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, criterion, optimizer, num_epochs=10)
evaluate_model(model, test_loader)


Epoch 1/10, Loss: 0.5076
Epoch 2/10, Loss: 0.3883
Epoch 3/10, Loss: 0.3629
Epoch 4/10, Loss: 0.3478
Epoch 5/10, Loss: 0.3370
Epoch 6/10, Loss: 0.3278
Epoch 7/10, Loss: 0.3206
Epoch 8/10, Loss: 0.3139
Epoch 9/10, Loss: 0.3084
Epoch 10/10, Loss: 0.3029
Accuracy: 86.37%


In [8]:
torch.save(model.state_dict(), "emnist_cnn.pth")
model.load_state_dict(torch.load("emnist_cnn.pth"))
model.eval()

  model.load_state_dict(torch.load("emnist_cnn.pth"))


EMNISTCNN(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=3136, out_features=128, bias=True)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=62, bias=True)
  )
)

In [22]:
import random

# Example Inference
def predict(model, image):
    model.eval()
    with torch.no_grad():
        image = image.to(device).unsqueeze(0)  # Add batch dimension
        output = model(image)
        _, predicted = torch.max(output, 1)
    return predicted.item()

# Test the prediction with a single image from the test dataset
sample_image, sample_label = test_dataset[random.randint(0,100)]
predicted_label = predict(model, sample_image)
print(f"Ground Truth: {sample_label}, Predicted: {predicted_label}")

Ground Truth: 2, Predicted: 2


In [24]:
import torch
import torch.nn as nn
import numpy as np
from PIL import Image

# Define the model architecture (same as during training)
class EMNISTCNN(nn.Module):
    def __init__(self, num_classes=62):
        super(EMNISTCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

# Load the trained model
def load_trained_model(model_path="emnist_cnn.pth"):
    model = EMNISTCNN(num_classes=62)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()  # Set model to evaluation mode
    return model

# Preprocess the input numpy array (30x30)
def preprocess_cell(cell):
    # Ensure the input is a numpy array
    assert isinstance(cell, np.ndarray), "Input must be a numpy array"
    assert cell.shape == (30, 30), "Input array must be 30x30"
    
    # Convert numpy array to Pillow image
    image = Image.fromarray(cell).convert('L')  # Convert to grayscale if not already
    
    # Resize to 28x28 (EMNIST standard size)
    image = image.resize((28, 28), Image.ANTIALIAS)
    
    # Convert to numpy array, normalize to [0, 1], and add channel dimension
    image = np.array(image, dtype=np.float32) / 255.0  # Normalize pixel values
    image = np.expand_dims(image, axis=0)  # Add channel dimension (1, 28, 28)
    
    # Convert to PyTorch tensor
    image_tensor = torch.tensor(image, dtype=torch.float32).unsqueeze(0)  # Add batch dimension
    return image_tensor

# Function to predict the letter in the cell
def predict_letter_from_cell(cell, model):
    # Preprocess the input cell
    image_tensor = preprocess_cell(cell)
    
    # Perform inference
    with torch.no_grad():
        output = model(image_tensor)
        _, predicted = torch.max(output, 1)
    
    # Map prediction to the corresponding character
    classes = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
    predicted_label = classes[predicted.item()]
    return predicted_label

# Usage example
if __name__ == "__main__":
    # Load the trained model
    model = load_trained_model("emnist_cnn.pth")
    
    # Example cell input (replace with actual 30x30 numpy array)
    cell = np.random.randint(0, 255, (30, 30), dtype=np.uint8)  # Example random data
    
    # Predict the letter in the cell
    predicted_letter = predict_letter_from_cell(cell, model)
    print(f"Predicted letter: {predicted_letter}")


True