In [1]:
# Step 1: Import Libraries
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torchvision import transforms
import matplotlib.font_manager as fm
from PIL import Image
import requests

In [2]:
# Step 2: Verify GPU Availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


In [3]:
# Step 3: Install a Devanagari Font
url = "https://github.com/googlefonts/noto-fonts/raw/main/hinted/ttf/NotoSansDevanagari/NotoSansDevanagari-Regular.ttf"
font_path = "C:/Coding/ocr_model/NotoSansDevanagari-Regular.ttf"
r = requests.get(url)
with open(font_path, "wb") as f:
    f.write(r.content)
font_path = 'C:/Coding/ocr_model/NotoSansDevanagari-Regular.ttf'   
fm.fontManager.addfont(font_path)
plt.rcParams['font.family'] = 'Noto Sans Devanagari'

In [4]:
# Step 4: Load Words and Images
with open('/Coding/ocr_model/handwritten_Dataset/words.txt', 'r', encoding='utf-8') as f:
    words = f.read().splitlines()

image_dir = '/Coding/ocr_model/handwritten_Dataset/images'
images = []
for i in range(1, 36):
    img_path = os.path.join(image_dir, f'{i}.png')
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (128, 32))
    img = img / 255.0
    images.append(img)

images = np.array(images)
images = images[..., np.newaxis]

In [5]:
# Step 5: Encode Labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(words)

# Step 6: Data Augmentation and Dataset Class
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, translate=(0.1, 0.1), shear=10, scale=(0.9, 1.1)),
    transforms.ToTensor()
])

class OCRDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx].astype(np.float32)
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        else:
            img = torch.tensor(img).permute(2, 0, 1)
        return img, label

# Augment data (5x)
augmented_imgs, augmented_lbls = [], []
for i in range(len(images)):
    for _ in range(5):
        img = images[i]
        transformed = transform(img)
        augmented_imgs.append(transformed.numpy())
        augmented_lbls.append(labels[i])

augmented_imgs = np.array(augmented_imgs)
augmented_lbls = np.array(augmented_lbls)

X = np.concatenate([images, augmented_imgs.transpose(0, 2, 3, 1)], axis=0)
y = np.concatenate([labels, augmented_lbls], axis=0)

In [6]:
# Step 7: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_dataset = OCRDataset(X_train, y_train, transform=transform)
test_dataset = OCRDataset(X_test, y_test, transform=None)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

# Step 8: Build CNN Model in PyTorch
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(128 * 16 * 4, 256), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        return self.net(x)

model = CNNModel(num_classes=35).to(device)

In [None]:
# Step 9: Train the Model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_acc, val_acc = [], []
for epoch in range(20):
    model.train()
    correct, total = 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    acc = correct / total
    train_acc.append(acc)

    # Validation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc.append(correct / total)

    print(f"Epoch {epoch+1}: Train Acc = {train_acc[-1]:.4f}, Val Acc = {val_acc[-1]:.4f}")


In [None]:
# Step 10: Evaluate the Model
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")

# Step 11: Plot Training History
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Step 12: Save Model
torch.save(model.state_dict(), 'hindi_ocr_model.pth')

In [None]:
# Step 13: Predict a Sample
sample_img, true_label = test_dataset[0]
model.eval()
with torch.no_grad():
    pred = model(sample_img.unsqueeze(0).to(device))
    pred_label = label_encoder.inverse_transform([pred.argmax(dim=1).item()])[0]

plt.imshow(sample_img.squeeze(), cmap='gray')
plt.title(f"Predicted: {pred_label}")
plt.show()

In [10]:
# Save the label_encoder object to a file
import pickle
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

In [None]:
# Step 1: there was not need to install the font again, as it was already installed in the previous code.
# Step 2: Load Trained PyTorch Model
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(128 * 16 * 4, 256), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 35)  # same number of classes as trained
        )

    def forward(self, x):
        return self.net(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNNModel(num_classes=35)
model.load_state_dict(torch.load('C:\Coding\ocr_model\hindi_ocr_model.pth', map_location=device))
model.to(device)
model.eval()
print("Model loaded successfully from ocr_model\hindi_ocr_model.pth")

# Step 3: Load Label Encoder
encoder_path = 'C:\Coding\ocr_model\label_encoder.pkl'
if not os.path.exists(encoder_path):
    raise FileNotFoundError(f"Label encoder {encoder_path} not found.")
with open(encoder_path, 'rb') as f:
    label_encoder = pickle.load(f)
print("Label encoder loaded successfully.")

# Step 4: Load and Preprocess Image
image_path = 'C:/Coding/ocr_model/handwritten_Dataset/images/2.png'
if not os.path.exists(image_path):
    raise FileNotFoundError(f"Image {image_path} not found. Ensure it's in ocr_model/handwritten_Dataset/images.")

img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (128, 32))
img = img / 255.0

# Convert to tensor: (1, 1, 32, 128)
img_tensor = torch.tensor(img, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)

# Step 5: Predict
with torch.no_grad():
    output = model(img_tensor)
    pred_idx = output.argmax(dim=1).item()
    pred_label = label_encoder.inverse_transform([pred_idx])[0]

# Step 6: Show Result
print(f"Predicted Word for 2.png: {pred_label}")
plt.imshow(img, cmap='gray')
plt.title(f"Predicted: {pred_label}")
plt.axis('off')
plt.show()