# Installing Libraries

In [None]:
# --- Core YOLO + OCR libraries ---
!pip install ultralytics==8.3.203  # stable YOLOv8
!pip install easyocr               # OCR model
!pip install pytesseract           # wrapper for Tesseract OCR
!pip install opencv-python         # OpenCV
!apt-get install -y tesseract-ocr libtesseract-dev  # system Tesseract engine

# --- Extra utils ---
!pip install filterpy              # Kalman filter (tracking)
!pip install roboflow              # for Roboflow dataset download

# --- Fix sympy bug with torch.compile ---
!pip install sympy==1.12

# 2. Download sort.py
!wget https://raw.githubusercontent.com/abewley/sort/master/sort.py -O sort.py

# 3. Remove plotting lines (matplotlib/skimage not needed)
!sed -i '/import matplotlib/d' sort.py
!sed -i '/matplotlib.use/d' sort.py
!sed -i '/from skimage/d' sort.py
!sed -i '/import pylab/d' sort.py

In [None]:
from roboflow import Roboflow
rf = Roboflow(api_key="MYgV2qq7j4x6PO0L7jdm")   ## get API key
project = rf.workspace("roboflow-universe-projects").project("license-plate-recognition-rxg4e")
dataset = project.version(11).download("yolov8")

In [None]:
!du -sh /content/License-Plate-Recognition-11/*

In [None]:
!find /content/License-Plate-Recognition-11/train/images -type f | wc -l    # Training Images  ---- 7057
!find /content/License-Plate-Recognition-11/valid/images -type f | wc -l    # Validation Images --- 2048
!find /content/License-Plate-Recognition-11/test/images -type f | wc -l     # Test Images       --- 1020


# Train Model---70% Train-20% Val-- 10% Test

In [None]:
from ultralytics import YOLO

# Load pretrained YOLOv8 small
model = YOLO("yolov8n.pt")

# Train
model.train(
    data="License-Plate-Recognition-11/data.yaml",
    epochs=80,
    imgsz=960,
    batch=16,
    freeze=4,
    patience=15,# fits on Colab T4/A100
    workers=2,
    cache=False,
    amp=True,
    project="/content/Yolo_project",
    name="lp_exp1")

## Loading Best Trained Model and Predict on Test Dataset

In [None]:
# Load best model
model = YOLO('best.pt')

# Run inference on test images
results = model.predict('/content/datasets/test/images', save=True,
                        save_txt=True,
                        save_conf=True,
                        project='/content/YOLO_project',
                        name='predict')



## Utility Functions for Evaluation

In [None]:
import glob, os, cv2

def compute_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    inter_area = max(0, x2-x1) * max(0, y2-y1)
    box1_area = (box1[2]-box1[0]) * (box1[3]-box1[1])
    box2_area = (box2[2]-box2[0]) * (box2[3]-box2[1])
    return inter_area / (box1_area + box2_area - inter_area + 1e-6)


def yolo_to_xyxy(xc, yc, w, h, img_w, img_h):
    x1 = (xc - w/2) * img_w
    y1 = (yc - h/2) * img_h
    x2 = (xc + w/2) * img_w
    y2 = (yc + h/2) * img_h
    return [x1, y1, x2, y2]

true_dir = "/content/datasets/test/labels"  # GT labels
pred_dir = "/content//YOLO_project/predict/labels"  # YOLO predictions
img_dir  = "/content/datasets/test/images"

iou_threshold = 0.5
summary = []

for true_file in glob.glob(os.path.join(true_dir, "*.txt")):
    image_id = os.path.basename(true_file).replace(".txt", "")
    pred_file = os.path.join(pred_dir, image_id + ".txt")
    img_path  = os.path.join(img_dir, image_id + ".jpg")

    img = cv2.imread(img_path)
    if img is None:
        continue
    img_h, img_w = img.shape[:2]

    # -------- Ground truth --------
    true_boxes = []
    with open(true_file, "r") as f:
        for line in f.readlines():
            vals = line.strip().split()
            if len(vals) == 5:  # cls xc yc w h
                cls, xc, yc, w, h = map(float, vals)
                x1, y1, x2, y2 = yolo_to_xyxy(xc, yc, w, h, img_w, img_h)
                true_boxes.append([int(cls), x1, y1, x2, y2])

    # -------- Predictions --------
    pred_boxes = []
    if os.path.exists(pred_file):
        with open(pred_file, "r") as f:
            for line in f.readlines():
                vals = line.strip().split()
                if len(vals) == 6:  # cls xc yc w h conf
                    cls, xc, yc, w, h, conf = map(float, vals)
                    x1, y1, x2, y2 = yolo_to_xyxy(xc, yc, w, h, img_w, img_h)
                    pred_boxes.append([int(cls), x1, y1, x2, y2, conf])

    # -------- IoU Matching --------
    matched, missed, false_pos = 0, 0, 0
    used_preds = set()

    for tb in true_boxes:
        found_match = False
        for i, pb in enumerate(pred_boxes):
            iou = compute_iou(tb[1:], pb[1:5])
            if iou >= iou_threshold and i not in used_preds:
                matched += 1
                used_preds.add(i)
                found_match = True
                break
        if not found_match:
            missed += 1

    false_pos = len(pred_boxes) - len(used_preds)
    summary.append([image_id, matched, missed, false_pos])

# Totals
total_TP = sum(r[1] for r in summary)
total_FN = sum(r[2] for r in summary)
total_FP = sum(r[3] for r in summary)

overall_precision = total_TP / (total_TP + total_FP + 1e-6)
overall_recall    = total_TP / (total_TP + total_FN + 1e-6)
overall_accuracy  = total_TP / (total_TP + total_FP + total_FN + 1e-6)

print("\n📊 Final Totals Across Test Set:")
print(f"TP={total_TP}, FN={total_FN}, FP={total_FP}")
print(f"Precision={overall_precision:.3f}, Recall={overall_recall:.3f}, Accuracy={overall_accuracy:.3f}")


## Main Pipeline For Plate Detection From Video



### Indian Number Plate Recognition

In [None]:
import cv2
import sqlite3
import os, re
import numpy as np
import pytesseract
from sort import Sort
from ultralytics import YOLO
from google.colab.patches import cv2_imshow  # for Colab debugging


# ---------------------------
# Utility functions
# ---------------------------
def compute_iou(boxA, boxB):
    """Compute IoU between two boxes [x1,y1,x2,y2]."""
    xA, yA = max(boxA[0], boxB[0]), max(boxA[1], boxB[1])
    xB, yB = min(boxA[2], boxB[2]), min(boxA[3], boxB[3])
    inter_area = max(0, xB - xA) * max(0, yB - yA)
    areaA = max(0, (boxA[2] - boxA[0])) * max(0, (boxA[3] - boxA[1]))
    areaB = max(0, (boxB[2] - boxB[0])) * max(0, (boxB[3] - boxB[1]))
    return inter_area / (areaA + areaB - inter_area + 1e-6)


def preprocess_plate(plate_crop):
    """Preprocess license plate image for OCR clarity."""
    if plate_crop is None or plate_crop.size == 0:
        return None

    # 1. Upscale moderately
    plate_crop = cv2.resize(plate_crop, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC)

    # 2. Grayscale
    gray = cv2.cvtColor(plate_crop, cv2.COLOR_BGR2GRAY)

    # 3. Gentle denoising
    gray = cv2.bilateralFilter(gray, d=5, sigmaColor=40, sigmaSpace=40)

    # 4. Contrast enhancement (CLAHE preserves edges)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    gray = clahe.apply(gray)

    # 5. Thresholding (binary inverse for Tesseract)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # 6. Morphological OPEN (remove small dots)
    kernel = np.ones((2, 2), np.uint8)
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)


    # 7. Invert (black text on white background for OCR)
    return cv2.bitwise_not(thresh)


##Indian NumBer Plate Recognition
def read_plate_text(img, easy_reader=None, conf_threshold=0.5):
    """
    Read license-plate text from a *preprocessed* plate image.
    Returns (plate_text, confidence in [0..1]).
    Strategy:
      1) Try Tesseract with several PSMs.
      2) Optionally try EasyOCR.
      3) Clean + correct lookalikes by slot (letters vs digits).
      4) Score candidates with Indian-plate regex; pick the best.
    """

    if img is None or img.size == 0:
        return "UNKNOWN", 0.0

    # -------- helpers --------
    def run_tess(psm):
        cfg = f"--psm {psm} --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
        d = pytesseract.image_to_data(img, config=cfg, output_type=pytesseract.Output.DICT)
        raw = "".join([t for t in d["text"] if t.strip()]).upper()
        confs = [float(c) for c in d["conf"] if str(c) != "-1"]
        avg = (np.mean(confs) / 100.0) if confs else 0.0
        return raw, avg

    def clean(s):  # remove junk and clamp length
        s = re.sub(r"[^A-Z0-9]", "", s.upper())
        return s[:12]

    # Map digits<->letters only where they don't belong
    letter_map = {'0':'O','1':'I','2':'Z','5':'S','8':'B','6':'G','4':'A'}
    digit_map  = {'O':'0','D':'0','Q':'0','I':'1','L':'1','Z':'2','S':'5','B':'8','G':'6','T':'7'}

    def fix_by_slots(s):
        s = list(clean(s))
        n = len(s)
        if n == 0:
            return ""

        # Expected Indian-style: LL DD L[L] DDDD (length 8–10)
        # force letters at 0-1
        for i in range(min(2, n)):
            if s[i].isdigit():
                s[i] = letter_map.get(s[i], s[i])
        # digits at 2-3
        for i in range(2, min(4, n)):
            if s[i].isalpha():
                s[i] = digit_map.get(s[i], s[i])
        # one or two letters at 4-5
        for i in range(4, min(6, n)):
            if s[i].isdigit():
                s[i] = letter_map.get(s[i], s[i])
        # last 4 must be digits
        for i in range(max(6, n-4), n):
            if i >= 0 and i < n and s[i].isalpha():
                s[i] = digit_map.get(s[i], s[i])

        # if still too long, prefer keeping last 4 digits
        s = "".join(s)
        if len(s) > 10:
            s = s[:10]
        return s

    patterns = [
        re.compile(r'^[A-Z]{2}\d{2}[A-Z]{2}\d{4}$'),   # KA02MN1826
        re.compile(r'^[A-Z]{2}\d{2}[A-Z]{1}\d{4}$'),   # KA02M1826
        re.compile(r'^[A-Z]{2}\d{1}[A-Z]{2}\d{4}$')    # older variants
    ]
    def score(s, base_conf):
        s = clean(s)
        sc = base_conf + 0.05 * min(len(s), 10)
        if any(p.fullmatch(s) for p in patterns):
            sc += 0.5     # strong format bonus
        return s, sc

    # -------- collect candidates --------
    cands = []
    for psm in (8, 7, 13):                      # 8=word, 7=single line, 13=raw line
        raw, conf = run_tess(psm)
        cands.append((raw, conf))

    if easy_reader is not None:
        try:
            r = easy_reader.readtext(img, detail=1, paragraph=False)
            if r:
                cands.append( (r[0][1].upper(), float(r[0][2])) )
        except Exception as e:
            print(f"[EasyOCR error] {e}")

    # -------- normalize, correct, choose best --------
    best_text, best_score = "UNKNOWN", 0.0
    for raw, conf in cands:
        fixed = fix_by_slots(raw)               # O→0, Z→2 etc by position
        s, sc = score(fixed, conf)
        if 7 <= len(s) <= 10 and sc > best_score:
            best_text, best_score = s, sc

    # As a last resort, try image_to_string (sometimes returns a cleaner line)
    if best_score < 0.25:
        fallback = pytesseract.image_to_string(
            img,
            config="--psm 7 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
        ).upper()
        fixed = fix_by_slots(fallback)
        s, sc = score(fixed, 0.25)
        if sc > best_score:
            best_text, best_score = s, sc

    return best_text, float(max(0.0, min(1.0, best_score)))



def save_crop(crop, frame_id, output_dir):
    """Save cropped plate images for debugging."""
    crops_dir = os.path.join(output_dir, "crops_4")
    os.makedirs(crops_dir, exist_ok=True)
    path = os.path.join(crops_dir, f"frame{frame_id}_plate.jpg")
    cv2.imwrite(path, crop)
    return path


def insert_db(cursor, frame_id, track_id, vlabel, vx1, vy1, vx2, vy2,
              x1, y1, x2, y2, plate_text, ocr_confidence):
    """Insert detection into SQLite DB with vehicle type and OCR confidence."""
    cursor.execute(
        "INSERT INTO plates (plate_text, ocr_confidence, frame_id, vehicle_id, vehicle_type, vehicle_bbox, plate_bbox) VALUES (?, ?, ?, ?, ?, ?, ?)",
        (plate_text, ocr_confidence, frame_id, track_id, vlabel,
         f"{vx1},{vy1},{vx2},{vy2}", f"{x1},{y1},{x2},{y2}")
    )


# ---------------------------
# Main ANPR pipeline
# ---------------------------
def run_npr(video_path, output_dir, conf=0.25):
    os.makedirs(output_dir, exist_ok=True)

    # --- Setup DB ---
    db_path = os.path.join(output_dir, "plates_4.db")
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    c.execute('''
    CREATE TABLE IF NOT EXISTS plates
    (id INTEGER PRIMARY KEY AUTOINCREMENT,
     plate_text TEXT,
     ocr_confidence REAL,
     timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
     frame_id INTEGER,
     vehicle_id INTEGER,
     vehicle_type TEXT,
     vehicle_bbox TEXT,
     plate_bbox TEXT)
    ''')
    conn.commit()

    # --- Load models ---
    vehicle_model = YOLO("yolov8s.pt")   # vehicles
    plate_model   = YOLO("/content/drive/MyDrive/YOLO_project/output/best.pt")  # license plates

    # --- Video setup ---
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS) or 25
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"Video resolution: {width}x{height}, FPS={fps}")

    out_path = os.path.join(output_dir, "annotated_video_4.mp4")
    out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))

    tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.1)
    frame_count, db_inserts = 0, 0

    # --- Processing loop ---
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_count += 1

        # --- Detect vehicles ---
        results_vehicle = vehicle_model(frame, imgsz=960, conf=conf)[0]
        detections, vehicle_data = [], []
        for box in results_vehicle.boxes:
            cls_id = int(box.cls[0])
            vlabel = vehicle_model.names[cls_id]
            if vlabel in ["car", "bus", "motorcycle", "truck"]:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                score = float(box.conf[0])
                detections.append([x1, y1, x2, y2, score])
                vehicle_data.append((x1, y1, x2, y2, vlabel))
        tracked_objects = tracker.update(np.array(detections) if len(detections) > 0 else np.empty((0, 5)))

        # --- Detect plates ---
        results_plate = plate_model(frame, imgsz=960, conf=conf)[0]
        print(f"[Frame {frame_count}] Vehicles={len(vehicle_data)}, Plates={len(results_plate.boxes)}")

        for box in results_plate.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            # --- Match plate with vehicles ---
            matched_vehicle, best_iou, matched_track_id, matched_vlabel = None, 0, -1, "UNKNOWN"
            for (vx1, vy1, vx2, vy2, vlabel) in vehicle_data:
                for (tx1, ty1, tx2, ty2, track_id) in tracked_objects:
                    iou = compute_iou([x1,y1,x2,y2], [tx1,ty1,tx2,ty2])
                    if iou > best_iou:
                        best_iou = iou
                        matched_vehicle = (vx1, vy1, vx2, vy2)
                        matched_track_id = int(track_id)
                        matched_vlabel = vlabel

            # --- OCR ---
            plate_crop = frame[y1:y2, x1:x2]
            save_crop(plate_crop, frame_count, output_dir)  # save debug crop
            preprocessed = preprocess_plate(plate_crop)
            plate_text, ocr_conf = read_plate_text(preprocessed)

            # --- Save to DB (only if valid plate found) ---
            if plate_text != "UNKNOWN" and ocr_conf > 0.3:
                vx1, vy1, vx2, vy2 = matched_vehicle if matched_vehicle else (-1, -1, -1, -1)
                insert_db(c, frame_count, matched_track_id, matched_vlabel,
                          vx1, vy1, vx2, vy2, x1, y1, x2, y2,
                          plate_text, ocr_conf)
                conn.commit()
                db_inserts += 1

            # --- Draw annotations ---
            cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2)
            cv2.putText(frame, f"{plate_text} ({ocr_conf:.2f})", (x1, y1-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
            if matched_vehicle:
                vx1, vy1, vx2, vy2 = matched_vehicle
                cv2.rectangle(frame, (vx1,vy1), (vx2,vy2), (255,0,0), 2)
                cv2.putText(frame, f"{matched_vlabel} ID {matched_track_id}", (vx1, vy1-5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2)

        # Show every 30th frame in Colab
        if frame_count % 30 == 0:
            cv2_imshow(frame)

        out.write(frame)

    cap.release()
    out.release()
    conn.close()
    cv2.destroyAllWindows()

    print(f"✅ Video saved: {out_path}")
    print(f"✅ Database saved: {db_path} with {db_inserts} valid rows")

### UK Number Plate Recognition

In [None]:
import cv2
import sqlite3
import os, re
import numpy as np
import pytesseract
from sort import Sort
from ultralytics import YOLO
from google.colab.patches import cv2_imshow  # for Colab debugging


# ---------------------------
# Utility functions
# ---------------------------
def compute_iou(boxA, boxB):
    """Compute IoU between two boxes [x1,y1,x2,y2]."""
    xA, yA = max(boxA[0], boxB[0]), max(boxA[1], boxB[1])
    xB, yB = min(boxA[2], boxB[2]), min(boxA[3], boxB[3])
    inter_area = max(0, xB - xA) * max(0, yB - yA)
    areaA = max(0, (boxA[2] - boxA[0])) * max(0, (boxA[3] - boxA[1]))
    areaB = max(0, (boxB[2] - boxB[0])) * max(0, (boxB[3] - boxB[1]))
    return inter_area / (areaA + areaB - inter_area + 1e-6)


def preprocess_plate(plate_crop):
    """Preprocess license plate image for OCR clarity."""
    if plate_crop is None or plate_crop.size == 0:
        return None
    plate_crop = cv2.resize(plate_crop, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC)
    gray = cv2.cvtColor(plate_crop, cv2.COLOR_BGR2GRAY)
    gray = cv2.bilateralFilter(gray, d=5, sigmaColor=40, sigmaSpace=40)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    gray = clahe.apply(gray)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    kernel = np.ones((2, 2), np.uint8)
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    return cv2.bitwise_not(thresh)


def correct_uk_plate(raw_text: str) -> str:
    """
    Correct OCR errors for UK number plates (AA NN AAA).
    """
    plate = re.sub(r"[^A-Z0-9]", "", raw_text.upper())
    if len(plate) != 7:
        return plate  # skip correction if length not 7

    corrections = list(plate)

    # First 2 → letters
    for i in [0, 1]:
        if corrections[i].isdigit():
            corrections[i] = {"0": "O", "1": "I", "5": "S"}.get(corrections[i], corrections[i])

    # Middle 2 → digits
    for i in [2, 3]:
        if corrections[i].isalpha():
            corrections[i] = {"O": "0", "I": "1", "S": "5", "B": "8", "G": "6"}.get(corrections[i], corrections[i])

    # Last 3 → letters
    for i in [4, 5, 6]:
        if corrections[i].isdigit():
            corrections[i] = {"0": "O", "1": "I", "5": "S", "6": "G", "8": "B"}.get(corrections[i], corrections[i])

    return "".join(corrections)


def read_plate_text(img):
    """Reads UK format license plate text using Tesseract OCR. Returns (text, confidence)."""
    plate_text, confidence = "UNKNOWN", 0.0
    if img is not None:
        config = "--psm 7 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
        try:
            data = pytesseract.image_to_data(img, config=config, output_type=pytesseract.Output.DICT)
            texts = [t.strip().upper() for t in data["text"] if t.strip()]
            confs = [int(c) for c in data["conf"] if str(c) != '-1']

            if texts:
                raw_text = "".join(texts)
                plate_text = correct_uk_plate(raw_text)
                confidence = max(confs) / 100 if confs else 0.0
        except Exception as e:
            print(f"[Tesseract error] {e}")
    return plate_text, confidence


def save_crop(crop, frame_id, output_dir):
    """Save cropped plate images for debugging."""
    crops_dir = os.path.join(output_dir, "crops_final")
    os.makedirs(crops_dir, exist_ok=True)
    path = os.path.join(crops_dir, f"frame{frame_id}_plate.jpg")
    cv2.imwrite(path, crop)
    return path


def insert_db(cursor, frame_id, track_id, vlabel, vx1, vy1, vx2, vy2,
              x1, y1, x2, y2, plate_text, ocr_confidence):
    """Insert detection into SQLite DB."""
    cursor.execute(
        "INSERT INTO plates (plate_text, ocr_confidence, frame_id, vehicle_id, vehicle_type, vehicle_bbox, plate_bbox) VALUES (?, ?, ?, ?, ?, ?, ?)",
        (plate_text, ocr_confidence, frame_id, track_id, vlabel,
         f"{vx1},{vy1},{vx2},{vy2}", f"{x1},{y1},{x2},{y2}")
    )


# ---------------------------
# Detection helpers
# ---------------------------
def detect_vehicles(frame, vehicle_model, tracker, conf=0.25):
    results = vehicle_model(frame, imgsz=640, conf=conf)[0]
    detections, vehicle_data = [], []
    for box in results.boxes:
        cls_id = int(box.cls[0])
        vlabel = vehicle_model.names[cls_id]
        if vlabel in ["car", "bus", "motorcycle", "truck"]:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            score = float(box.conf[0])
            detections.append([x1, y1, x2, y2, score])
            vehicle_data.append((x1, y1, x2, y2, vlabel))
    tracked_objects = tracker.update(np.array(detections) if len(detections) > 0 else np.empty((0, 5)))
    return tracked_objects, vehicle_data


def detect_plates(frame, plate_model, conf=0.25):
    results = plate_model(frame, imgsz=1280, conf=conf)[0]
    return [tuple(map(int, box.xyxy[0])) for box in results.boxes]


# ---------------------------
# Main ANPR pipeline
# ---------------------------
def run_npr(video_path, output_dir, conf=0.25):
    os.makedirs(output_dir, exist_ok=True)

    # --- Setup DB ---
    db_path = os.path.join(output_dir, "plates_final.db")
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    c.execute('''
    CREATE TABLE IF NOT EXISTS plates
    (id INTEGER PRIMARY KEY AUTOINCREMENT,
     plate_text TEXT,
     ocr_confidence REAL,
     timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
     frame_id INTEGER,
     vehicle_id INTEGER,
     vehicle_type TEXT,
     vehicle_bbox TEXT,
     plate_bbox TEXT)
    ''')
    conn.commit()

    # --- Load models ---
    vehicle_model = YOLO("yolov8s.pt")   # vehicles
    plate_model   = YOLO("/content/best.pt")  # license plates

    # --- Video setup ---
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS) or 25
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"Video resolution: {width}x{height}, FPS={fps}")

    out_path = os.path.join(output_dir, "annotated_video_final.mp4")
    out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))

    tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.1)
    frame_count, db_inserts = 0, 0
    ocr_confidences = []

    # --- Processing loop ---
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_count += 1

        tracked_objects, vehicle_data = detect_vehicles(frame, vehicle_model, tracker, conf=conf)
        plate_boxes = detect_plates(frame, plate_model, conf=conf)
        print(f"[Frame {frame_count}] Vehicles={len(vehicle_data)}, Plates={len(plate_boxes)}")

        for (x1, y1, x2, y2) in plate_boxes:
            # Match plate with vehicles
            matched_vehicle, best_iou, matched_track_id, matched_vlabel = None, 0, -1, "UNKNOWN"
            for (vx1, vy1, vx2, vy2, vlabel) in vehicle_data:
                for (tx1, ty1, tx2, ty2, track_id) in tracked_objects:
                    iou = compute_iou([x1,y1,x2,y2], [tx1,ty1,tx2,ty2])
                    if iou > best_iou:
                        best_iou = iou
                        matched_vehicle = (vx1, vy1, vx2, vy2)
                        matched_track_id = int(track_id)
                        matched_vlabel = vlabel

            # OCR
            plate_crop = frame[y1:y2, x1:x2]
            save_crop(plate_crop, frame_count, output_dir)
            preprocessed = preprocess_plate(plate_crop)
            plate_text, ocr_conf = read_plate_text(preprocessed)
            ocr_confidences.append(ocr_conf)

            # Save if valid
            if plate_text != "UNKNOWN" and ocr_conf > 0.3:
                vx1, vy1, vx2, vy2 = matched_vehicle if matched_vehicle else (-1, -1, -1, -1)
                insert_db(c, frame_count, matched_track_id, matched_vlabel,
                          vx1, vy1, vx2, vy2, x1, y1, x2, y2,
                          plate_text, ocr_conf)
                conn.commit()
                db_inserts += 1

            # Draw
            cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2)
            cv2.putText(frame, f"{plate_text} ({ocr_conf:.2f})", (x1, y1-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
            if matched_vehicle:
                vx1, vy1, vx2, vy2 = matched_vehicle
                cv2.rectangle(frame, (vx1,vy1), (vx2,vy2), (255,0,0), 2)
                cv2.putText(frame, f"{matched_vlabel} ID {matched_track_id}", (vx1, vy1-5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2)

        if frame_count % 30 == 0:
            cv2_imshow(frame)
        out.write(frame)

    cap.release()
    out.release()
    conn.close()
    cv2.destroyAllWindows()

    # Confidence stats
    if ocr_confidences:
        print(f"OCR confidence: min={min(ocr_confidences):.2f}, max={max(ocr_confidences):.2f}, avg={np.mean(ocr_confidences):.2f}")

    print(f"✅ Video saved: {out_path}")
    print(f"✅ Database saved: {db_path} with {db_inserts} rows")


In [None]:
# Video path and Outeput_dir
video_path="/content/drive/MyDrive/YOLO_project/Traffic_Control_CCTV.mp4"
output_dir="/content/drive/MyDrive/YOLO_project/output"
run_npr(video_path,output_dir)

## Utiltity Function For Database Schema

In [None]:
def read_plates_db(db_path):
    """
    Read and summarize plates.db into a nice Pandas DataFrame.


    Args:
        db_path (str): Path to plates.db
    Returns:
        DataFrame with plate summary
    """
    conn = sqlite3.connect(db_path)
    query = """
        SELECT plate_text,
               vehicle_id,
               vehicle_type,
               MIN(frame_id) AS first_seen,
               MAX(frame_id) AS last_seen,
               COUNT(*) AS times_seen,
               *
        FROM plates
        GROUP BY plate_text, vehicle_id, vehicle_type
        ORDER BY first_seen;
    """
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df

# Removing Unnecessary Rows
def summarize_and_save(raw_db_path, summary_db_path, conf_thresh=0.8):
    """
    Summarize raw ANPR results into a clean database.

    Steps:
    1. Load all rows from `plates` table in raw_db_path.
    2. Filter invalid rows:
       - vehicle_id != -1
       - plate_text != 'UNKNOWN'
       - ocr_confidence >= conf_thresh
    3. Keep ALL rows with ocr_confidence == 1.0.
    4. For each vehicle_id with lower confidence, keep only the highest one.
    5. Drop duplicate plate_text (keep best confidence per plate_text).
    6. Save the summary to a new SQLite DB under `plates_summary`.
    """
    # Load raw detections
    conn = sqlite3.connect(raw_db_path)
    try:
        df = pd.read_sql_query("SELECT * FROM plates", conn)
    finally:
        conn.close()

    if df.empty:
        print("⚠️ No rows found in raw database.")
        return pd.DataFrame()

    # Clean rows
    df_clean = df[(df["vehicle_id"] != -1) &
                  (df["plate_text"] != "UNKNOWN") &
                  (df["ocr_confidence"] >= conf_thresh)]

    if df_clean.empty:
        print("⚠️ No valid rows after filtering.")
        return pd.DataFrame()

    # Split: perfect vs imperfect
    perfect = df_clean[df_clean["ocr_confidence"] == 1.0]
    imperfect = df_clean[df_clean["ocr_confidence"] < 1.0]

    # Pick best per vehicle_id only from imperfect
    imperfect_best = imperfect.loc[imperfect.groupby("vehicle_id")["ocr_confidence"].idxmax()]

    # Combine both sets
    summary = pd.concat([perfect, imperfect_best], ignore_index=True)

    # Drop duplicates by plate_text (keep highest confidence)
    summary = summary.sort_values("ocr_confidence", ascending=False)
    summary = summary.drop_duplicates(subset=["plate_text"], keep="first")

    # Save summary DB
    conn_sum = sqlite3.connect(summary_db_path)
    try:
        summary.to_sql("plates_summary", conn_sum, if_exists="replace", index=False)
    finally:
        conn_sum.close()

    print(f"✅ Summary saved: {summary_db_path} with {len(summary)} unique plates")
    return summary

In [None]:
raw_db_path = "/content/drive/MyDrive/YOLO_project/output/plates4.db"
   # --- Create summary ---
summary_db_path = os.path.join(output_dir, "plates_summary_uk.db")
summarize_and_save(raw_db_path, summary_db_path)
df = read_plates_db(raw_db_path)
print(df)

In [None]:
df.head()