<a href="https://colab.research.google.com/github/Aniket21628/OCR/blob/main/OCR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opencv-python numpy pandas scikit-learn tensorflow keras imutils



In [16]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("sachinpatel21/az-handwritten-alphabets-in-csv-format")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/az-handwritten-alphabets-in-csv-format


In [17]:
import os
import cv2
import numpy as np
import pandas as pd
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

In [32]:
def process_kaggle_dataset():
    """Process Kaggle A-Z handwritten dataset (CSV format) - UPDATED"""
    df = pd.read_csv("/kaggle/input/az-handwritten-alphabets-in-csv-format/A_Z Handwritten Data.csv")
    data = []
    labels = []

    # Use iloc for positional access
    for i in range(len(df)):
        row = df.iloc[i]
        label = int(row.iloc[0])  # First column is label
        pixels = row.iloc[1:].values.astype("uint8")  # Remaining columns are pixels

        # Reshape to 28x28
        pixels = pixels.reshape((28, 28))
        data.append(pixels)
        labels.append(label + 10)  # Offset: A=10, B=11, ..., Z=35

    return np.array(data, dtype="float32"), np.array(labels, dtype="int")

In [30]:
def process_mnist():
    """Process MNIST dataset"""
    (train_data, train_labels), (test_data, test_labels) = mnist.load_data()
    mnist_data = np.vstack([train_data, test_data])
    mnist_labels = np.hstack([train_labels, test_labels])
    return mnist_data.astype("float32"), mnist_labels

In [9]:
!mkdir -p data/surrey

In [10]:
!wget -q http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/EnglishImg.tgz -O data/english.tgz
!tar -xf data/english.tgz -C data/surrey/

In [12]:
!tar -xzf data/english.tgz -C data/surrey/ --strip-components=1

In [20]:
import os

# Check if files exist
surrey_path = "data/surrey/English/Img/GoodImg/Bmp"
if os.path.exists(surrey_path):
    print(f"Found {len(os.listdir(surrey_path))} character folders")
    sample_folder = os.path.join(surrey_path, os.listdir(surrey_path)[0])
    print(f"Sample folder contains {len(os.listdir(sample_folder))} images")
else:
    print("Dataset not found. Manual download required.")
    print("Please download from: https://github.com/sachinpatel21/Chars74k_Dataset")

Found 62 character folders
Sample folder contains 64 images


In [28]:
def process_surrey_dataset():
    """Process University of Surrey dataset (image files)"""
    SURREY_PATH = "data/surrey/English/Img/GoodImg/Bmp"
    characters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

    data = []
    labels = []

    # Sort folders numerically (Sample001, Sample002, etc)
    folders = sorted(
        [f for f in os.listdir(SURREY_PATH) if f.startswith("Sample")],
        key=lambda x: int(x[6:])
    )

    for idx, folder in enumerate(folders):
        char_path = os.path.join(SURREY_PATH, folder)
        if not os.path.isdir(char_path):
            continue

        for img_file in os.listdir(char_path):
            if img_file.startswith("."):
                continue

            img_path = os.path.join(char_path, img_file)
            img = cv2.imread(img_path)

            if img is None:
                continue

            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            resized = cv2.resize(gray, (28, 28))
            data.append(resized)
            labels.append(idx)  # Use folder index as label

    return np.array(data, dtype="float32"), np.array(labels, dtype="int")

In [33]:
print("Processing MNIST dataset...")
mnist_data, mnist_labels = process_mnist()

print("Processing Kaggle A-Z dataset...")
kaggle_data, kaggle_labels = process_kaggle_dataset()

print("Processing Surrey dataset...")
surrey_data, surrey_labels = process_surrey_dataset()

# Verify datasets
print(f"MNIST: {mnist_data.shape[0]} images")
print(f"Kaggle: {kaggle_data.shape[0]} images")
print(f"Surrey: {surrey_data.shape[0]} images")


Processing MNIST dataset...
Processing Kaggle A-Z dataset...
Processing Surrey dataset...
MNIST: 70000 images
Kaggle: 372450 images
Surrey: 7705 images


In [34]:
combined_data = np.vstack([mnist_data, kaggle_data, surrey_data])
combined_labels = np.hstack([mnist_labels, kaggle_labels, surrey_labels])

In [35]:
np.save("combined_data.npy", combined_data)
np.save("combined_labels.npy", combined_labels)
print(f"Combined dataset size: {combined_data.shape[0]} images")

Combined dataset size: 450155 images


In [36]:
# Load combined dataset
data = np.load("combined_data.npy")
labels = np.load("combined_labels.npy")

In [37]:
# Resize to 32x32 for ResNet
resized_data = np.array([cv2.resize(img, (32, 32)) for img in data])
resized_data = np.expand_dims(resized_data, axis=-1)  # Add channel dimension
resized_data = resized_data / 255.0  # Normalize

In [38]:
# One-hot encode labels
label_bin = LabelBinarizer()
bin_labels = label_bin.fit_transform(labels)

In [39]:
# Split into train/test sets
(trainX, testX, trainY, testY) = train_test_split(
    resized_data, bin_labels, test_size=0.25, random_state=42
)

print(f"Train shape: {trainX.shape}, Test shape: {testX.shape}")

Train shape: (337616, 32, 32, 1), Test shape: (112539, 32, 32, 1)


In [40]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, BatchNormalization, Activation,
    Add, AveragePooling2D, Flatten, Dense
)
from tensorflow.keras.regularizers import l2

In [41]:
def residual_module(x, num_filters, stride, reduce=False):
    """ResNet residual module"""
    shortcut = x

    # Block 1: 1x1 conv
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv2D(int(num_filters * 0.25), (1, 1), use_bias=False)(x)

    # Block 2: 3x3 conv
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv2D(int(num_filters * 0.25), (3, 3), strides=stride,
               padding="same", use_bias=False)(x)

    # Block 3: 1x1 conv
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv2D(num_filters, (1, 1), use_bias=False)(x)

    # Shortcut connection
    if reduce:
        shortcut = Conv2D(num_filters, (1, 1), strides=stride,
                         use_bias=False)(shortcut)

    x = Add()([x, shortcut])
    return x


In [42]:
def build_resnet(input_shape=(32, 32, 1), num_classes=62):
    """Build ResNet model for OCR"""
    inputs = Input(shape=input_shape)

    # Initial processing
    x = BatchNormalization()(inputs)

    # Stage 1
    x = residual_module(x, 64, (1, 1))

    # Stage 2 (reduce spatial size)
    x = residual_module(x, 128, (2, 2), reduce=True)

    # Stage 3 (reduce spatial size)
    x = residual_module(x, 256, (2, 2), reduce=True)

    # Final layers
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = AveragePooling2D((8, 8))(x)
    x = Flatten()(x)
    x = Dense(num_classes, activation="softmax")(x)

    return Model(inputs, x)

In [43]:
model = build_resnet()
model.summary()