In [118]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from PIL import Image

In [119]:
class CharacterCNN(nn.Module):
    def __init__(self, num_classes=62):
        super(CharacterCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(2048, 256) 
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        #print("Input Shape:", x.shape)
        x = F.relu(self.conv1(x))
        #print("After conv1:", x.shape)
        x = self.pool(x)
        #print("After pool1:", x.shape)

        x = F.relu(self.conv2(x))
        #print("After conv2:", x.shape)
        x = self.pool(x)
        #print("After pool2:", x.shape)

        x = F.relu(self.conv3(x))
        #print("After conv3:", x.shape)
        x = self.pool(x)
        #print("After pool3:", x.shape)

        x = torch.flatten(x, start_dim=1)
        #print("Flattened Shape:", x.shape)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [120]:
# Data transformations
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((32, 32)),
    transforms.ToTensor(), 
    transforms.Normalize((0.5,), (0.5,))
])

dataset_path = '/Users/arya/Desktop/CVIP_Proj/OCR/Fnt'
dataset = ImageFolder(root=dataset_path, transform=transform)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


In [121]:
import torch.optim as optim
import numpy as np

def train_model(model, train_loader, val_loader, epochs, device, patience=10):

    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_loss = np.inf 
    patience_counter = 0 

    for epoch in range(epochs):
        #Training Phase
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss/len(train_loader):.4f}")

        #Validation Phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        accuracy = 100 * correct / total
        print(f"Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.2f}%")

        #Early Stopping Check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), "/Users/arya/Desktop/CVIP_Proj/OCR/character_cnn.pth")  # Save the best model
            print("Validation loss improved. Model saved.")
        else:
            patience_counter += 1
            print(f"No improvement in validation loss. Patience counter: {patience_counter}/{patience}")

        if patience_counter >= patience:
            print("Early stopping triggered. Stopping training.")
            break

    print("Training complete.")


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CharacterCNN(num_classes=62)
train_model(model, train_loader, val_loader, epochs=400, device=device, patience = 10)


Epoch 1/400, Train Loss: 0.8928
Validation Loss: 0.4981, Accuracy: 82.96%
Validation loss improved. Model saved.
Epoch 2/400, Train Loss: 0.4252
Validation Loss: 0.3934, Accuracy: 85.73%
Validation loss improved. Model saved.
Epoch 3/400, Train Loss: 0.3352
Validation Loss: 0.3455, Accuracy: 86.93%
Validation loss improved. Model saved.
Epoch 4/400, Train Loss: 0.2806
Validation Loss: 0.3286, Accuracy: 87.48%
Validation loss improved. Model saved.
Epoch 5/400, Train Loss: 0.2463
Validation Loss: 0.2896, Accuracy: 88.63%
Validation loss improved. Model saved.
Epoch 6/400, Train Loss: 0.2254
Validation Loss: 0.3087, Accuracy: 88.41%
No improvement in validation loss. Patience counter: 1/10
Epoch 7/400, Train Loss: 0.2048
Validation Loss: 0.2648, Accuracy: 89.93%
Validation loss improved. Model saved.
Epoch 8/400, Train Loss: 0.1913
Validation Loss: 0.2744, Accuracy: 89.73%
No improvement in validation loss. Patience counter: 1/10
Epoch 9/400, Train Loss: 0.1820
Validation Loss: 0.2637, A

In [66]:
# --- Step 6: Save the Model ---
torch.save(model.state_dict(), '/Users/arya/Desktop/CVIP_Proj/OCR/character_cnn.pth')
print("Model saved!")

Model saved!
