In [None]:
# !pip install ultralytics -q

In [None]:
# # For cloud instances
# !pip uninstall opencv-python opencv-python-headless
# !pip install opencv-python-headless

In [None]:
# !pip install transformers # hf transformers

In [None]:
import torch
import glob
import cv2
import os
import random
import numpy as np
import matplotlib.pyplot as plt

from urllib.request import urlretrieve
from zipfile import ZipFile
from ultralytics import YOLO
from tqdm.auto import tqdm
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

In [None]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
# # Trained TrOCR - as of now 
# !wget -q "https://www.dropbox.com/s/3t5b01alpw446p4/lpr_ocr_base.zip?dl=1" -O lpr_ocr_base.zip
# # Inference Data
# !wget -q "https://www.dropbox.com/s/clseshw93iinsff/alpr_image_inference_data.zip?dl=1" -O alpr_image_inference_data.zip

In [None]:
# !unzip lpr_ocr_base 
# !unzip alpr_image_inference_data

In [None]:
model = YOLO('/home/jaykumaran/Vision_Projects/Vision_Projects/ANPR-OCR/YOLO11/runs/detect/yolo11m-license/weights/best.pt')

In [None]:
# We will use a non-downstreamed checkpoint i.e. TrOCR Large Stage 1 rather than printed or handwritten ckpt
# trocr_name = "microsoft/trocr-large-stage1"
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-stage1')
ocr_model = VisionEncoderDecoderModel.from_pretrained('lpr_ocr_base/').to(device)

In [None]:
def ocr(image, processor, model, print_tokens = False):
    
    """image: PIL Image,
        print_tokens: Whether to print the generated integer tokens or not
        
        Returns:
            generated_text: OCR text string
    """
    
    # Perform ocr on detected and cropped images
    pixel_values = processor(image, return_tensors='pt').pixel_values.to(device)
    generated_ids = model.generate(pixel_values)
    if print_tokens:
        print(generated_ids)
    
    generated_text = processor.batch_decode(
        generated_ids, skip_special_tokens = True
    )[0]
    
    return generated_text

In [None]:
def draw_box(output, frame, processor, ocr_model, print_tokens = False):
    frame = np.array(frame[..., ::-1])
    line_width = max(round(sum(frame.shape) / 2 * 0.003), 2)
    font_thickness = max(line_width - 1, 1)
    
    for out in output:
        for box in out.boxes.xyxy:
            point1 = (int(box[0]), int(box[1])) #tuple
            point2 = (int(box[2]), int(box[3])) #tuple
            
            #crop ROI and pass to ocr
            license_plate_roi = frame[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
            extracted_text = ocr(license_plate_roi, processor, ocr_model)
            
            cv2.rectangle(
                frame,
                point1, point2,
                color = (0, 0, 255), # RED
                thickness=3
            )
            
            w, h = cv2.getTextSize(
                extracted_text,
                cv2.FONT_HERSHEY_SIMPLEX,
                fontScale = line_width /3,
                thickness=font_thickness
            )[0]  #text width and height
            
            w = int(w - (0.20 * w))
            outside = point1[1] - h >= 3 
            
            point2 = point1[0] + w, point1[1] - h - 3 if outside else point1[1] + h + 3
            
            cv2.rectangle(
                frame,
                point1, point2,
                color = (0, 0, 255),
                thickness=-1,
                lineType=cv2.LINE_AA
            )
            cv2.putText(
                frame,
                extracted_text,
                (point1[0], point1[1] - 5 if outside else point1[1] + h + 2),
                fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                color = (255, 255, 255),
                fontScale= line_width / 3.8,
                thickness=2,
                lineType=cv2.LINE_AA
                
            )
            
            
        plt.figure(figsize=(30,27))
        plt.subplot(1, 2, 1)
        plt.imshow(frame[..., ::-1])
        plt.axis('off')
        plt.show()

In [None]:
def crop_and_ocr(all_images, processor, ocr_model, print_tokens = False):
    
    for image_name in all_images:
        image = cv2.imread(image_name)[..., ::-1]
        output = model.predict(image)
        draw_box(output, image, processor, ocr_model)

In [None]:
all_images = glob.glob(os.path.join(
    'alpr_image_inference_data', '*'
))

crop_and_ocr(all_images, processor, ocr_model)

### Video


In [None]:
import cv2
import numpy as np
import os

def process_video(video_path, processor, ocr_model, output_path=None):
    if not os.path.exists(video_path):
        print("❌ Error: Video file not found!")
        return

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("❌ Error: Cannot open video file!")
        return
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec

    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # ✅ Ensure FPS and dimensions are valid
    if fps <= 0 or fps is None:
        fps = 30  # Default FPS

    if width <= 0 or height <= 0:
        print("❌ Error: Invalid video dimensions!")
        cap.release()
        return

    # ✅ Create video writer
    out = None
    if output_path:
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret or frame is None:
            print("⚠️ Warning: Empty frame detected, stopping.")
            break  # Stop when video ends or frame is empty
        
        output = model.predict(frame)
        frame = draw_box(output, frame, processor, ocr_model)

        # ✅ Ensure frame is valid before writing
        if output_path and out is not None and frame is not None and frame.shape[0] > 0 and frame.shape[1] > 0:
            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
        else:
            print("⚠️ Skipping frame: Invalid or empty")

    cap.release()
    if out is not None:
        out.release()
        print(f"✅ Video saved successfully at {output_path}")

# Run the video processing
video_path = "/home/jaykumaran/Vision_Projects/ANPR-OCR/TrOCR/mycarplate.mp4"
output_video_path = "indian_license_output.mp4"
process_video(video_path, processor, ocr_model, output_video_path)


In [None]:
ls

In [None]:
video_path = "/home/jaykumaran/Vision_Projects/ANPR-OCR/TrOCR/mycarplate.mp4"
output_video_path = "./"
process_video(video_path, processor, ocr_model, output_video_path)