In [None]:
!unzip /content/drive/MyDrive/Projects/archive.zip -d /content/drive/MyDrive/Projects/ANPRDataset


Archive:  /content/drive/MyDrive/Projects/archive.zip
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN1.jpg  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN1.xml  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN10.jpg  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN10.xml  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN2.jpg  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN2.xml  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN4.jpg  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN4.xml  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN5.jpg  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN5.xml  
  inflating: /content/drive/MyDrive/Projects/ANPRDataset/State-wise_OLX/AN/AN6.jpg  
  inflati

In [None]:

import os

base_dir = "/content/drive/MyDrive/Projects/ANPRDataset"

folders = {
    "google_images": os.path.join(base_dir, "google_images"),
    "statewise_olx": os.path.join(base_dir, "State-wise_OLX"),
    "video_images": os.path.join(base_dir, "video_images")
}

def count_files(path, recursive=False):
    count = 0
    if recursive:
        for root, _, files in os.walk(path):
            count += len(files)
    else:
        count = len(os.listdir(path))
    return count

# -----------------------------
# Count
# -----------------------------
print("📂 Dataset folder sizes:\n")

# Google images (flat folder)
print(f"google_images: {count_files(folders['google_images'])} files")

# State-wise OLX (with subfolders)
for state in os.listdir(folders['statewise_olx']):
    state_path = os.path.join(folders['statewise_olx'], state)
    if os.path.isdir(state_path):
        print(f"  {state}: {count_files(state_path)} files")

print(f"TOTAL statewise_olx: {count_files(folders['statewise_olx'], recursive=True)} files")

# Video images (flat folder)
print(f"video_images: {count_files(folders['video_images'])} files")


📂 Dataset folder sizes:

google_images: 883 files
  AN: 14 files
  AP: 74 files
  AR: 24 files
  AS: 48 files
  BR: 32 files
  CG: 38 files
  CH: 20 files
  DL: 70 files
  DN: 16 files
  GA: 26 files
  GJ: 54 files
  HP: 44 files
  HR: 44 files
  JH: 36 files
  JK: 66 files
  KA: 40 files
  KL: 26 files
  LA: 2 files
  MH: 49 files
  ML: 70 files
  MN: 10 files
  MP: 22 files
  MZ: 2 files
  NL: 16 files
  OD: 44 files
  PB: 54 files
  PY: 34 files
  RJ: 14 files
  SK: 26 files
  TN: 20 files
  TR: 20 files
  TS: 32 files
  UK: 20 files
  UP: 48 files
  WB: 50 files
TOTAL statewise_olx: 1205 files
video_images: 1308 files


In [None]:
import os
import glob
import shutil
import random
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm import tqdm

# ============================
# Paths
# ============================
ROOT = "/content/drive/MyDrive/Projects/ANPRDataset"
OUT  = "/content/drive/MyDrive/Projects/ANPRDataset2025"

# Fresh start
if os.path.exists(OUT):
    shutil.rmtree(OUT)
for split in ["train", "val", "test"]:
    os.makedirs(f"{OUT}/images/{split}", exist_ok=True)
    os.makedirs(f"{OUT}/labels/{split}", exist_ok=True)

# ============================
# VOC → YOLO conversion
# ============================
def voc_to_yolo(xml_file, img_w, img_h):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    yolo_lines = []

    for obj in root.findall("object"):
        bbox = obj.find("bndbox")
        if bbox is None:
            continue

        xmin = float(bbox.find("xmin").text)
        ymin = float(bbox.find("ymin").text)
        xmax = float(bbox.find("xmax").text)
        ymax = float(bbox.find("ymax").text)

        # Normalize
        cx = ((xmin + xmax) / 2) / img_w
        cy = ((ymin + ymax) / 2) / img_h
        w  = (xmax - xmin) / img_w
        h  = (ymax - ymin) / img_h

        # Class is always "0" (license_plate)
        yolo_lines.append(f"0 {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")

    return yolo_lines

# ============================
# Collect all valid pairs
# ============================
all_pairs = []
img_exts = [".jpg", ".jpeg", ".png", ".bmp"]

for folder in ["google_images", "video_images", "State-wise_OLX"]:
    folder_path = os.path.join(ROOT, folder)

    # handle subfolders in State-wise_OLX
    if folder == "State-wise_OLX":
        img_files = []
        for state in os.listdir(folder_path):
            state_path = os.path.join(folder_path, state)
            img_files.extend(glob.glob(f"{state_path}/*"))
    else:
        img_files = glob.glob(f"{folder_path}/*")

    for img_file in img_files:
        if os.path.splitext(img_file)[1].lower() not in img_exts:
            continue

        xml_file = os.path.splitext(img_file)[0] + ".xml"
        if os.path.exists(xml_file):
            all_pairs.append((img_file, xml_file))

print(f"✅ Found {len(all_pairs)} valid image-label pairs")

# ============================
# Split dataset
# ============================
train_pairs, temp_pairs = train_test_split(all_pairs, test_size=0.30, random_state=42)
val_pairs, test_pairs   = train_test_split(temp_pairs, test_size=0.50, random_state=42)

splits = {
    "train": train_pairs,
    "val": val_pairs,
    "test": test_pairs
}

# ============================
# Process & Save
# ============================
for split, pairs in splits.items():
    for img_file, xml_file in tqdm(pairs, desc=f"Processing {split}"):
        try:
            img = Image.open(img_file)
            w, h = img.size

            yolo_labels = voc_to_yolo(xml_file, w, h)
            if not yolo_labels:  # skip if no bbox
                continue

            # Copy image
            out_img = os.path.join(OUT, "images", split, os.path.basename(img_file))
            shutil.copy(img_file, out_img)

            # Save label
            out_label = os.path.join(OUT, "labels", split, os.path.splitext(os.path.basename(img_file))[0] + ".txt")
            with open(out_label, "w") as f:
                f.write("\n".join(yolo_labels))

        except Exception as e:
            print(f"⚠️ Error processing {img_file}: {e}")

# ============================
# Sanity Check
# ============================
def count_files(img_dir, lbl_dir):
    imgs = len(glob.glob(f"{img_dir}/*"))
    lbls = len(glob.glob(f"{lbl_dir}/*"))
    return imgs, lbls

print("\n📊 Final Sanity Check")
for split in ["train", "val", "test"]:
    imgs, lbls = count_files(f"{OUT}/images/{split}", f"{OUT}/labels/{split}")
    print(f"{split.upper()} → Images: {imgs} | Labels: {lbls}")


✅ Found 1698 valid image-label pairs


Processing train: 100%|██████████| 1188/1188 [00:39<00:00, 29.77it/s]
Processing val: 100%|██████████| 255/255 [00:08<00:00, 31.25it/s]
Processing test: 100%|██████████| 255/255 [00:07<00:00, 31.97it/s]



📊 Final Sanity Check
TRAIN → Images: 1188 | Labels: 1187
VAL → Images: 255 | Labels: 255
TEST → Images: 255 | Labels: 255


In [9]:
!pip install ultralytics




In [None]:
from ultralytics import YOLO

# Load pretrained YOLOv8-small model
model = YOLO("yolov8s.pt")

# Train on ANPR dataset (transfer learning)
model.train(
    data="/content/drive/MyDrive/Projects/ANPRDataset2025/dataset.yaml",
    epochs=50,
    imgsz=640,
    batch=16,
    patience=10,
    name="yolov8s-anpr",   # run folder name
    project="/content/drive/MyDrive/Projects/ANPRDataset2025",  # save here
    save_period=5          # saves weights every 5 epochs
)



Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt': 100%|██████████| 21.5M/21.5M [00:00<00:00, 73.2MB/s]


Ultralytics 8.3.180 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (AMD EPYC 7B12)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/Projects/ANPRDataset2025/dataset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8s-anpr, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=10, perspect

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf': 100%|██████████| 755k/755k [00:00<00:00, 13.5MB/s]

Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics




 12                  -1  1    591360  ultralytics.nn.modules.block.C2f             [768, 256, 1]                 
 13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 15                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
 16                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 18                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 19                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]  

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Projects/ANPRDataset2025/labels/train.cache... 1188 images, 0 backgrounds, 1 corrupt: 100%|██████████| 1188/1188 [00:00<?, ?it/s]

[34m[1mtrain: [0m/content/drive/MyDrive/Projects/ANPRDataset2025/images/train/HP20.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates [     1.0882]
[34m[1mtrain: [0m/content/drive/MyDrive/Projects/ANPRDataset2025/images/train/car-wbs-MH03AR5549_00000.jpg: corrupt JPEG restored and saved





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.5±0.3 ms, read: 0.7±1.1 MB/s, size: 169.9 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/Projects/ANPRDataset2025/labels/val.cache... 255 images, 0 backgrounds, 0 corrupt: 100%|██████████| 255/255 [00:00<?, ?it/s]


Plotting labels to /content/drive/MyDrive/Projects/ANPRDataset2025/yolov8s-anpr/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/content/drive/MyDrive/Projects/ANPRDataset2025/yolov8s-anpr[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G     0.9683      1.566      1.012          6        640: 100%|██████████| 75/75 [52:11<00:00, 41.76s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [01:47<00:00, 13.46s/it]

                   all        255        255      0.851      0.827      0.866      0.649






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50         0G     0.9604      0.766      1.029          2        640: 100%|██████████| 75/75 [33:44<00:00, 26.99s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [01:52<00:00, 14.12s/it]

                   all        255        255      0.445      0.672      0.435      0.314






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50         0G     0.9697     0.7243      1.025          4        640: 100%|██████████| 75/75 [33:31<00:00, 26.82s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  12%|█▎        | 1/8 [00:10<01:13, 10.47s/it]

In [3]:
from ultralytics import YOLO

# Load the best model
model = YOLO("/content/drive/MyDrive/Projects/ANPRDataset2025/yolov8s-anpr/weights/best.pt")

# Run validation (on the test set defined in dataset.yaml)
results = model.val()

# Print mAP, precision, recall
print(results)



Ultralytics 8.3.180 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
Model summary (fused): 72 layers, 11,125,971 parameters, 0 gradients, 28.4 GFLOPs


Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf': 100%|██████████| 755k/755k [00:00<00:00, 17.3MB/s]


[34m[1mval: [0mFast image access ✅ (ping: 0.7±0.3 ms, read: 0.5±0.2 MB/s, size: 182.5 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/Projects/ANPRDataset2025/labels/val.cache... 255 images, 0 backgrounds, 0 corrupt: 100%|██████████| 255/255 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [07:40<00:00, 28.76s/it]


                   all        255        255      0.917      0.922      0.948       0.67
Speed: 5.5ms preprocess, 706.3ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1mruns/detect/val[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x791d6bfe4c90>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,  

In [5]:
from ultralytics import YOLO
import os

# Load best model
model = YOLO("/content/drive/MyDrive/Projects/ANPRDataset2025/yolov8s-anpr/weights/best.pt")

# Input image
img_path = "/content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData/car1.jpeg"
save_dir = os.path.dirname(img_path)   # same folder as input

# Run prediction and save to same folder
results = model.predict(
    source=img_path,
    conf=0.80,
    save=True,
    project=save_dir,    # save here
    name="",             # no subfolder
    exist_ok=True        # overwrite if needed
)



image 1/1 /content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData/car1.jpeg: 416x640 1 license_plate, 456.6ms
Speed: 3.1ms preprocess, 456.6ms inference, 1.8ms postprocess per image at shape (1, 3, 416, 640)
Results saved to [1m/content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData/predict[0m


In [8]:
from ultralytics import YOLO
import os

# Load best trained model
model = YOLO("/content/drive/MyDrive/Projects/ANPRDataset2025/yolov8s-anpr/weights/best.pt")

# input video
video_path = "/content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData/gettyimages-807742384-640_adpp.mp4"
save_dir = os.path.dirname(video_path)   # same folder as input

# Run prediction
results = model.predict(
    source=video_path,
    conf=0.7,
    save=True,
    project=save_dir,   # save in same folder
    name="",            # no subfolder
    exist_ok=True       # overwrite if exists
)

print(f"✅ Processed video saved in: {save_dir}")



inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/596) /content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData/gettyimages-807742384-640_adpp.mp4: 384x640 1 license_plate, 404.0ms
video 1/1 (frame 2/596) /content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData/gettyimages-807742384-640_adpp.mp4: 384x640 1 license_plate, 414.7ms
video 1/1 (frame 3/596) /content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData/gettyimages-807742384-640_adpp.mp4: 384x640 1 license_plate, 408.7ms
video 

In [10]:
!pip install pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [19]:
import os

# Folder path
folder_path = "/content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData"

# Files you want to keep
keep_files = {"car1.jpeg", "gettyimages-807742384-640_adpp.mp4"}

# Loop through folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)

    if filename not in keep_files:
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.remove(file_path)   # Delete file
            elif os.path.isdir(file_path):
                import shutil
                shutil.rmtree(file_path)  # Delete folder
            print(f"🗑️ Deleted: {filename}")
        except Exception as e:
            print(f"❌ Error deleting {filename}: {e}")

print("✅ Cleanup complete!")


🗑️ Deleted: detected_plates.csv
🗑️ Deleted: video_with_plates.mp4
🗑️ Deleted: plate_AwWIS AVR_1.jpg
🗑️ Deleted: plate_— AWIS AVR_2.jpg
🗑️ Deleted: plate_AwiS AVR_3.jpg
🗑️ Deleted: plate_| AWIS AVR_8.jpg
🗑️ Deleted: plate_| AWIS AVR |_9.jpg
🗑️ Deleted: plate_AWS AVR_11.jpg
🗑️ Deleted: plate_| AwIS AVR_14.jpg
🗑️ Deleted: plate_AWIS AVR_15.jpg
🗑️ Deleted: plate_- AWIS AVR_16.jpg
🗑️ Deleted: plate_AwIS AVR_19.jpg
🗑️ Deleted: plate_AwWIS AY!_20.jpg
🗑️ Deleted: plate_AwIS AY_21.jpg
🗑️ Deleted: plate_AWIS A_22.jpg
🗑️ Deleted: plate_AwIS |_23.jpg
🗑️ Deleted: plate_eS_24.jpg
🗑️ Deleted: plate_AWS_25.jpg
🗑️ Deleted: plate_| AM_26.jpg
🗑️ Deleted: plate_| Ae_27.jpg
🗑️ Deleted: plate_wys4 ut_62.jpg
🗑️ Deleted: plate_wvs4 WT_70.jpg
🗑️ Deleted: plate_wvS4 LUT._74.jpg
🗑️ Deleted: plate_wvs4 Lut_76.jpg
🗑️ Deleted: plate_WVS4 LUT_78.jpg
🗑️ Deleted: plate_wvs4 LUT_79.jpg
🗑️ Deleted: plate_wvS4 LUT_80.jpg
🗑️ Deleted: plate_WVS4 LUT |_82.jpg
🗑️ Deleted: plate_Pera eecet_101.jpg
🗑️ Deleted: plate_ES eee_102

In [None]:
from ultralytics import YOLO
import cv2
import os
import pytesseract
import csv
from datetime import datetime, timedelta

# Load YOLO model
model = YOLO("/content/drive/MyDrive/Projects/ANPRDataset2025/yolov8s-anpr/weights/best.pt")

# Input video
video_path = "/content/drive/MyDrive/Projects/ANPRDataset2025/RealTimeData/video2.mp4"
save_dir = os.path.dirname(video_path)
output_video = os.path.join(save_dir, "video_with_plates2.mp4")
csv_file = os.path.join(save_dir, "detected_plates2.csv")

# Define start time of video (example: recording started now)
video_start_time = datetime.now()

# Open CSV for logging
csv_header = ["car_id", "frame_no", "entry_time", "date_time", "confidence", "x1", "y1", "x2", "y2", "plate_text"]
with open(csv_file, mode="w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(csv_header)

# Tracking dictionary
tracked_cars = {}

# Open video
cap = cv2.VideoCapture(video_path)
fps = cap.get(5)  # frames per second
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video, fourcc, int(fps), (int(cap.get(3)), int(cap.get(4))))

frame_no = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    frame_no += 1

    # Run YOLO detection with tracking
    results = model.track(frame, conf=0.70, persist=True, verbose=False)

    for r in results:
        if r.boxes.id is None:
            continue

        boxes = r.boxes.xyxy.cpu().numpy().astype(int)
        confs = r.boxes.conf.cpu().numpy()
        ids = r.boxes.id.cpu().numpy().astype(int)

        for box, conf, car_id in zip(boxes, confs, ids):
            x1, y1, x2, y2 = box
            plate_crop = frame[y1:y2, x1:x2]

            plate_text = ""
            if plate_crop.size > 0:
                plate_text = pytesseract.image_to_string(plate_crop, config="--psm 7")
                plate_text = "".join(ch for ch in plate_text.strip().upper() if ch.isalnum() or ch == " ")

            # Calculate entry time (in seconds -> HH:MM:SS)
            entry_sec = frame_no / fps
            hrs = int(entry_sec // 3600)
            mins = int((entry_sec % 3600) // 60)
            secs = int(entry_sec % 60)
            entry_time = f"{hrs:02d}:{mins:02d}:{secs:02d}"

            # Calculate real-world datetime
            entry_datetime = video_start_time + timedelta(seconds=entry_sec)
            entry_datetime_str = entry_datetime.strftime("%Y-%m-%d %H:%M:%S")

            if car_id in tracked_cars:
                if conf > tracked_cars[car_id]["conf"]:
                    tracked_cars[car_id] = {
                        "conf": conf,
                        "frame": frame_no,
                        "time": entry_time,
                        "datetime": entry_datetime_str,
                        "bbox": (x1, y1, x2, y2),
                        "plate": plate_text
                    }
            else:
                tracked_cars[car_id] = {
                    "conf": conf,
                    "frame": frame_no,
                    "time": entry_time,
                    "datetime": entry_datetime_str,
                    "bbox": (x1, y1, x2, y2),
                    "plate": plate_text
                }

            # Draw bbox + OCR text + datetime
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"ID:{car_id} {plate_text}", (x1, y1 - 25),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            cv2.putText(frame, entry_datetime_str, (x1, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)

    out.write(frame)

cap.release()
out.release()

# Save best detections to CSV
with open(csv_file, mode="a", newline="") as f:
    writer = csv.writer(f)
    for car_id, data in tracked_cars.items():
        x1, y1, x2, y2 = data["bbox"]
        writer.writerow([car_id, data["frame"], data["time"], data["datetime"],
                         round(float(data["conf"]), 3), x1, y1, x2, y2, data["plate"]])

print(f"✅ Annotated video saved at: {output_video}")
print(f"✅ Plate details with entry date & time logged into: {csv_file}")
