In [None]:
import gradio as gr
import torch 
import torchvision
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from dataclasses import dataclass
import time
import torch.nn as nn


from models import build_model
from models.backbone import Backbone, Joiner
from models.detr import DETR, PostProcess
from models.transformer import Transformer
from models.position_encoding import PositionEmbeddingSine

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


# Character Recognition Model

In [2]:
class CharacterNetwork(nn.Module):
    def __init__(self, hidden_dims=[128,64,32,16]):
        super(CharacterNetwork, self).__init__()
        self.network_dims = [28 * 28] + hidden_dims + [10]
        self.layers = nn.ModuleList()

        for i in range(len(self.network_dims) - 1):
            in_dim = self.network_dims[i]
            out_dim = self.network_dims[i + 1]
            self.layers.append(nn.Linear(in_dim, out_dim))

            # Add activation for hidden layers only
            if i < len(self.network_dims) - 2:
                self.layers.append(nn.ReLU())

    def forward(self, x):
        # Input x is [B, 1, 28, 28] where B is the batch size
        x = x.view(x.size(0), -1)  # Flatten images
        for layer in self.layers[:-1]:
            x = layer(x)
        x = nn.functional.softmax(self.layers[-1](x), dim=1)  # Apply softmax to the final output
        return x

In [3]:
cuda = torch.cuda.is_available()
model2 = CharacterNetwork(hidden_dims=[128, 64, 32])

In [4]:
def nms(boxes, scores, threshold):
    if len(boxes) == 0:
        return []

    # Convert boxes to NumPy array
    boxes = np.array(boxes)

    # Grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # Compute the area of the bounding boxes
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)

    # Sort the bounding boxes by the confidence score
    indices = np.argsort(scores)[::-1]

    pick = []

    while len(indices) > 0:
        # Grab the index of the bounding box with the highest score
        i = indices[0]
        pick.append(i)

        # Compute the intersection over union (IoU)
        xx1 = np.maximum(x1[i], x1[indices[1:]])
        yy1 = np.maximum(y1[i], y1[indices[1:]])
        xx2 = np.minimum(x2[i], x2[indices[1:]])
        yy2 = np.minimum(y2[i], y2[indices[1:]])

        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        iou = (w * h) / (areas[i] + areas[indices[1:]] - w * h)

        # Remove the bounding boxes with IoU greater than the threshold
        indices = np.delete(indices, np.concatenate(([0], np.where(iou > threshold)[0] + 1)))

    return pick

def find_contours(dimensions, img) :
    cntrs, _ = cv2.findContours(img.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # Retrieve potential dimensions
    lower_width = dimensions[0]
    upper_width = dimensions[1]
    lower_height = dimensions[2]
    upper_height = dimensions[3]

    # Check top 15 largest contours for character
    cntrs = sorted(cntrs, key=cv2.contourArea, reverse=True)[:15]


    target_contours = []
    img_res = []
    boxes = []
    scores = []
    for cntr in cntrs :
        # detects contour in binary image and returns the coordinates of rectangle enclosing it
        intX, intY, intWidth, intHeight = cv2.boundingRect(cntr)

        # checking the dimensions of the contour to filter out the characters by contour's size
        if intWidth > lower_width and intWidth < upper_width and intHeight > lower_height and intHeight < upper_height :

            char_copy = np.zeros((60,40))
            # extracting each character using the enclosing rectangle's coordinates.
            char = img[intY:intY+intHeight, intX:intX+intWidth]
            char = cv2.resize(char, (20, 40))

            char_copy[10:50, 10:30] = char

            boxes.append([intX, intY, intX+intWidth, intY+intHeight])
            scores.append(intWidth*intHeight)
            img_res.append(char_copy) # List that stores the character's binary image (unsorted)

    boxes = np.array(boxes)
    scores = np.array(scores)
    keep = nms(boxes, scores, 0.1)



    return [img_res[i] for i in keep], [boxes[i] for i in keep]

In [5]:
def detect_characters(image, model):
    # Check if CUDA is available
    cuda = torch.cuda.is_available()
    if cuda:
        model.cuda()
    model.eval()
    model.load_state_dict(torch.load("CharacterModel.pth", weights_only=True))

    # Store original image dimensions
    original_height, original_width = image.shape[:2]

    image = img_lp = cv2.resize(image, (333, 75))

    # Convert to grayscale and preprocess the license plate image
    img_gray_lp = cv2.cvtColor(img_lp, cv2.COLOR_BGR2GRAY)
    _, img_binary_lp = cv2.threshold(img_gray_lp, 200, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    img_binary_lp = cv2.erode(img_binary_lp, (3, 3))
    img_binary_lp = cv2.dilate(img_binary_lp, (3, 3))

    LP_HEIGHT = img_binary_lp.shape[0]
    LP_WIDTH = img_binary_lp.shape[1]

    # Make borders white (custom preprocessing step)
    img_binary_lp[0:3, :] = 255
    img_binary_lp[:, 0:3] = 255
    img_binary_lp[72:75, :] = 255
    img_binary_lp[:, 330:333] = 255

    # Estimations of character contours sizes of cropped license plates
    dimensions = [0, LP_WIDTH / 2, LP_HEIGHT / 6, 3 * LP_HEIGHT / 3]

    # Get contours within cropped license plate
    char_list, boxes = find_contours(dimensions, img_binary_lp)
    for idx, char in enumerate(char_list):
        char = cv2.resize(char, (28, 28))
        char = torch.from_numpy(char).reshape(1, 1, 28, 28).float()  # Use reshape instead of resize
        if cuda:
            char = char.cuda()

        outputs = model(char)

        pred = torch.argmax(outputs, 1)[0].item()

        x1, y1, x2, y2 = boxes[idx]
        # Draw bounding box and predicted character on the image
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 1)
        image = cv2.putText(image, f'{pred}', (x1 + 2, y1 + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1, cv2.LINE_AA)

    # Resize the processed image back to the original size
    resized_image = cv2.resize(image, (original_width, original_height))  # Resize back to original dimensions

    # Return the modified image as a NumPy array
    return resized_image


In [13]:
@dataclass
class Arguments:
    device: str = 'cpu'
    num_classes: int = 1
    backbone: str = "resnet50"
    hidden_dim: int = 256
    dropout: float = 0.1
    nheads: int = 8
    dim_feedforward: int = 2048
    enc_layers: int = 6
    dec_layers: int = 6
    pre_norm: bool = False
    num_queries: int = 10

args = Arguments()

N_steps = args.hidden_dim // 2

position_embedding = PositionEmbeddingSine(N_steps, normalize=True)
backbone_ = Backbone(args.backbone, False, False, False)
backbone = Joiner(backbone_, position_embedding)
backbone.num_channels = backbone_.num_channels
transformer = Transformer(
        d_model=args.hidden_dim,
        dropout=args.dropout,
        nhead=args.nheads,
        dim_feedforward=args.dim_feedforward,
        num_encoder_layers=args.enc_layers,
        num_decoder_layers=args.dec_layers,
        normalize_before=args.pre_norm,
        return_intermediate_dec=True,
    )

model = DETR(
        backbone,
        transformer,
        num_classes=args.num_classes,
        num_queries=args.num_queries,
)

ckpt_path = "checkpoint0199.pth"
checkpoint = torch.load(ckpt_path, map_location=torch.device('cpu'))
msg = model.load_state_dict(checkpoint['model'])
print(f"Loaded Model From {ckpt_path}: {msg}")

# standard PyTorch mean-std input image normalization
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(800),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b

def detect(im, model, transform, threshold=0.7):
    # mean-std normalize the input image (batch-size: 1)
    img = transform(im).unsqueeze(0)

    # demo model only support by default images with aspect ratio between 0.5 and 2
    # if you want to use images with an aspect ratio outside this range
    # rescale your image so that the maximum size is at most 1333 for best results
    assert img.shape[-2] <= 1600 and img.shape[-1] <= 1600, 'demo model only supports images up to 1600 pixels on each side'

    # propagate through the model
    outputs = model(img)

    outputs['pred_logits'] = outputs['pred_logits'].cpu()
    outputs['pred_boxes'] = outputs['pred_boxes'].cpu()

    # keep only predictions with threshold+ confidence
    probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > threshold

    # convert boxes from [0; 1] to image scales
    bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
    return probas[keep], bboxes_scaled

def detect_characters(image, model):
    # Check if CUDA is available
    cuda = torch.cuda.is_available()
    if cuda:
        model.cuda()
    model.eval()
    model.load_state_dict(torch.load("CharacterModel.pth", weights_only=True))

    # Store original image dimensions
    original_height, original_width = image.shape[:2]

    image = img_lp = cv2.resize(image, (333, 75))

    # Convert to grayscale and preprocess the license plate image
    img_gray_lp = cv2.cvtColor(img_lp, cv2.COLOR_BGR2GRAY)
    _, img_binary_lp = cv2.threshold(img_gray_lp, 200, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    img_binary_lp = cv2.erode(img_binary_lp, (3, 3))
    img_binary_lp = cv2.dilate(img_binary_lp, (3, 3))

    LP_HEIGHT = img_binary_lp.shape[0]
    LP_WIDTH = img_binary_lp.shape[1]

    # Make borders white (custom preprocessing step)
    img_binary_lp[0:3, :] = 255
    img_binary_lp[:, 0:3] = 255
    img_binary_lp[72:75, :] = 255
    img_binary_lp[:, 330:333] = 255

    # Estimations of character contours sizes of cropped license plates
    dimensions = [0, LP_WIDTH / 2, LP_HEIGHT / 6, 3 * LP_HEIGHT / 3]

    # Get contours within cropped license plate
    char_list, boxes = find_contours(dimensions, img_binary_lp)
    for idx, char in enumerate(char_list):
        char = cv2.resize(char, (28, 28))
        char = torch.from_numpy(char).reshape(1, 1, 28, 28).float()  # Use reshape instead of resize
        if cuda:
            char = char.cuda()

        outputs = model(char)

        pred = torch.argmax(outputs, 1)[0].item()

        x1, y1, x2, y2 = boxes[idx]
        # Draw bounding box and predicted character on the image
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 1)
        image = cv2.putText(image, f'{pred}', (x1 + 2, y1 + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1, cv2.LINE_AA)

    # Resize the processed image back to the original size
    resized_image = cv2.resize(image, (original_width, original_height))  

    # Return the modified image as a NumPy array
    return resized_image

def performDetection(image):
    tic = time.time()
    probs, bboxes = detect(Image.fromarray(image), model, transform, threshold=0.9)
    toc = time.time()

    # Process each license plate region
    for c, (xmin, ymin, xmax, ymax) in zip(probs, bboxes.tolist()):

        # Crop license plate and perform character recognition
        cropped_lp = image[int(ymin):int(ymax), int(xmin):int(xmax)]
        image = detect_characters(cropped_lp, model2)  # Perform character recognition on the cropped license plate

        text = f"License-Plate: {c.item():0.2f}"
        font_scale = 0.5
        txt_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, 1)[0]
        color = (0, 0, 255)
        txt_bk_color = [int(v * 0.7) for v in color]
        txt_color = (255, 255, 255)
        cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color, 3)
        cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmin + txt_size[0] + 1),
                                            int(ymin + 1.5 * txt_size[1])), txt_bk_color, -1, )
        cv2.putText(image, text, (int(xmin), int(ymin + txt_size[1])),
                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, txt_color, 1, cv2.LINE_AA,)


    label = f"Detecting {len(bboxes)} Bounding Box(es) Take {toc-tic:0.2f} Seconds"
    return image, label

if __name__ == "__main__":
    css = """
        h1 {
            text-align: center;
            display:block;
        }
        """
    demo = gr.Blocks(css = css)

    with demo:
        gr.Markdown(
                    """
                    # Character Recognition
                    """
            )

        # Define Layout
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(label = "Input Image")
                button = gr.Button("Detect Characters")
            with gr.Column():
                output_image = gr.Image(label = "Output Image")
                text_label = gr.Label()

        # Define Event
        button.click(performDetection, inputs = [input_image], outputs=[output_image, text_label])

# Launch the interface
demo.launch()

  checkpoint = torch.load(ckpt_path, map_location=torch.device('cpu'))


Loaded Model From checkpoint0199.pth: <All keys matched successfully>
Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\Simon\anaconda3\envs\env2\lib\site-packages\gradio\queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "c:\Users\Simon\anaconda3\envs\env2\lib\site-packages\gradio\route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "c:\Users\Simon\anaconda3\envs\env2\lib\site-packages\gradio\blocks.py", line 1935, in process_api
    result = await self.call_function(
  File "c:\Users\Simon\anaconda3\envs\env2\lib\site-packages\gradio\blocks.py", line 1520, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "c:\Users\Simon\anaconda3\envs\env2\lib\site-packages\anyio\to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "c:\Users\Simon\anaconda3\envs\env2\lib\site-packages\anyio\_backends\_asyncio.py", line 2470, in run_sync_in_worker_thread
    return await future
