In [None]:
import cv2
import torch
import torch.nn as nn
import numpy as np
from pathlib import Path
from tqdm import tqdm

# =====================
# CONFIG
# =====================
VIDEO_PATH = Path("../data/raw_videos/WhatsApp Video 2026-01-08 at 15.55.48.mp4")

LOWLIGHT_MODEL_PATH = Path("../outputs/models/low_light/best_model_lowlight.pth")
DEBLUR_MODEL_PATH   = Path("../outputs/models/deblur/best_model_deblur.pth")

OUTPUT_PATH = Path("FINAL_lowlight_then_deblur.mp4")

FRAME_SKIP = 10       # üî• SAFE SPEEDUP
DEFAULT_FPS = 50

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("[INFO] Device:", DEVICE)

# =====================
# MODELS (MATCH TRAINING)
# =====================

class ZeroDCE(nn.Module):
    def __init__(self):
        super().__init__()
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 32, 3, padding=1)
        self.conv4 = nn.Conv2d(32, 32, 3, padding=1)
        self.conv5 = nn.Conv2d(32, 32, 3, padding=1)
        self.conv6 = nn.Conv2d(32, 32, 3, padding=1)
        self.conv7 = nn.Conv2d(32, 24, 3, padding=1)

    def forward(self, x):
        x1 = self.relu(self.conv1(x))
        x2 = self.relu(self.conv2(x1))
        x3 = self.relu(self.conv3(x2))
        x4 = self.relu(self.conv4(x3))
        x5 = self.relu(self.conv5(x4))
        x6 = self.relu(self.conv6(x5))
        return torch.tanh(self.conv7(x6))

def apply_curves(img, curves, n=8):
    out = img
    for i in range(n):
        r = curves[:, i*3:(i+1)*3]
        out = out + r * (out**2 - out)
    return torch.clamp(out, 0, 1)

class ResBlock(nn.Module):
    def __init__(self, c):
        super().__init__()
        self.conv1 = nn.Conv2d(c, c, 3, padding=1)
        self.conv2 = nn.Conv2d(c, c, 3, padding=1)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        return x + self.conv2(self.relu(self.conv1(x)))

class DeblurGenerator(nn.Module):
    def __init__(self):
        super().__init__()
        self.head = nn.Conv2d(3, 64, 7, padding=3)
        self.body = nn.Sequential(*[ResBlock(64) for _ in range(9)])
        self.tail = nn.Conv2d(64, 3, 7, padding=3)

    def forward(self, x):
        x = torch.relu(self.head(x))
        x = self.body(x)
        return torch.sigmoid(self.tail(x))

# =====================
# LOAD MODELS (SAFE)
# =====================
lowlight = ZeroDCE().to(DEVICE)
low_ckpt = torch.load(LOWLIGHT_MODEL_PATH, map_location=DEVICE, weights_only=True)
lowlight.load_state_dict(low_ckpt["model_state"])
lowlight.eval()


deblur = DeblurGenerator().to(DEVICE)
deblur_ckpt = torch.load(DEBLUR_MODEL_PATH, map_location=DEVICE, weights_only=True)
deblur.load_state_dict(deblur_ckpt["model_state"])
deblur.eval()


print("[INFO] Models loaded successfully")

# =====================
# VIDEO SETUP
# =====================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

fps = cap.get(cv2.CAP_PROP_FPS)
fps = fps if fps > 1 else DEFAULT_FPS
out_fps = fps / FRAME_SKIP

ret, frame = cap.read()
assert ret, "‚ùå No frames"

h, w = frame.shape[:2]

writer = cv2.VideoWriter(
    str(OUTPUT_PATH),
    cv2.VideoWriter_fourcc(*"mp4v"),
    out_fps,
    (w * 2, h)
)

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# =====================
# FINAL PIPELINE
# =====================
frame_id = 0
processed = 0

print("[INFO] Starting FINAL pipeline inference...")

with torch.no_grad():
    for _ in tqdm(range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))):

        ret, frame = cap.read()
        if not ret:
            break

        if frame_id % FRAME_SKIP != 0:
            frame_id += 1
            continue

        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        inp = torch.from_numpy(rgb / 255.0)\
                    .permute(2,0,1)\
                    .unsqueeze(0)\
                    .float()\
                    .to(DEVICE)

        # ---- LOW LIGHT ----
        curves = lowlight(inp)
        enhanced = apply_curves(inp, curves)

        # ---- DEBLUR ----
        restored = deblur(enhanced)

        out = restored[0].permute(1,2,0).cpu().numpy()
        out = np.clip(out * 255, 0, 255).astype(np.uint8)
        out_bgr = cv2.cvtColor(out, cv2.COLOR_RGB2BGR)

        combined = np.hstack([frame, out_bgr])
        writer.write(combined)

        processed += 1
        frame_id += 1

cap.release()
writer.release()

print("[DONE]")
print(f"[INFO] Frames processed: {processed}")
print(f"[INFO] Output saved to: {OUTPUT_PATH}")


In [None]:
import cv2
import torch
import torch.nn as nn
import numpy as np
from pathlib import Path
from tqdm import tqdm

# =====================
# CONFIG
# =====================
VIDEO_PATH = Path("../data/raw_videos/high_speed_axis_55kmph_night.mp4")
DEBLUR_MODEL_PATH = Path("../outputs/models/deblur/best_model_deblur.pth")
OUTPUT_PATH = Path("deblur_only_output.mp4")

FRAME_SKIP = 2.5        # speed-up (process every 5th frame)
DEFAULT_FPS = 50

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("[INFO] Device:", DEVICE)

# =====================
# MODEL (MATCHES TRAINING)
# =====================
class ResBlock(nn.Module):
    def __init__(self, c):
        super().__init__()
        self.conv1 = nn.Conv2d(c, c, 3, padding=1)
        self.conv2 = nn.Conv2d(c, c, 3, padding=1)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        return x + self.conv2(self.relu(self.conv1(x)))

class DeblurGenerator(nn.Module):
    def __init__(self):
        super().__init__()
        self.head = nn.Conv2d(3, 64, 7, padding=3)
        self.body = nn.Sequential(*[ResBlock(64) for _ in range(9)])
        self.tail = nn.Conv2d(64, 3, 7, padding=3)

    def forward(self, x):
        x = torch.relu(self.head(x))
        x = self.body(x)
        return torch.sigmoid(self.tail(x))

# =====================
# LOAD MODEL (CORRECT)
# =====================
model = DeblurGenerator().to(DEVICE)
ckpt = torch.load(DEBLUR_MODEL_PATH, map_location=DEVICE, weights_only=True)
model.load_state_dict(ckpt["model_state"])
model.eval()

print("[INFO] Deblur model loaded successfully")

# =====================
# VIDEO SETUP
# =====================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

fps = cap.get(cv2.CAP_PROP_FPS)
fps = fps if fps > 1 else DEFAULT_FPS

ret, frame = cap.read()
assert ret, "‚ùå No frames in video"

h, w = frame.shape[:2]

writer = cv2.VideoWriter(
    str(OUTPUT_PATH),
    cv2.VideoWriter_fourcc(*"mp4v"),
    fps // FRAME_SKIP,
    (w * 2, h)
)

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# =====================
# INFERENCE LOOP
# =====================
frame_id = 0
processed = 0

print("[INFO] Starting DEBLUR-ONLY inference...")

with torch.no_grad():
    for _ in tqdm(range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))):

        ret, frame = cap.read()
        if not ret:
            break

        if frame_id % FRAME_SKIP != 0:
            frame_id += 1
            continue

        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        inp = torch.from_numpy(rgb / 255.0)\
                    .permute(2,0,1)\
                    .unsqueeze(0)\
                    .float()\
                    .to(DEVICE)

        out = model(inp)[0].permute(1,2,0).cpu().numpy()
        out = (out * 255).astype(np.uint8)
        out_bgr = cv2.cvtColor(out, cv2.COLOR_RGB2BGR)

        combined = np.hstack([frame, out_bgr])
        writer.write(combined)

        processed += 1
        frame_id += 1

cap.release()
writer.release()

print("[DONE]")
print(f"[INFO] Frames processed: {processed}")
print(f"[INFO] Output saved to: {OUTPUT_PATH}")


In [None]:
import cv2
import numpy as np
import onnxruntime as ort
from pathlib import Path
from collections import deque

# ==========================
# CONFIG
# ==========================
VIDEO_PATH = Path("../data/raw_videos/high_speed_axis_55kmph_night.mp4")
OUTPUT_PATH = Path("final_stable_pipeline.mp4")

DEBLUR_ONNX = "../onnx_models/deblur.onnx"
OCR_ONNX    = "../onnx_models/ocr.onnx"

FRAME_SKIP   = 5
OCR_EVERY_N  = 20
DEFAULT_FPS  = 50

EDGE_VAR_THRESH = 80.0
OCR_CONF_THRESH = 0.65
TEMPORAL_WINDOW = 5

ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
PROVIDERS = ["CUDAExecutionProvider", "CPUExecutionProvider"]

# ==========================
# LOAD MODELS
# ==========================
deb_sess = ort.InferenceSession(DEBLUR_ONNX, providers=PROVIDERS)
ocr_sess = ort.InferenceSession(OCR_ONNX, providers=PROVIDERS)

deb_in = deb_sess.get_inputs()[0].name
ocr_in = ocr_sess.get_inputs()[0].name

# ==========================
# VIDEO SETUP
# ==========================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

fps = cap.get(cv2.CAP_PROP_FPS)
fps = fps if fps > 1 else DEFAULT_FPS
fps /= FRAME_SKIP

ret, first = cap.read()
assert ret, "‚ùå Empty video"

H, W = first.shape[:2]

writer = cv2.VideoWriter(
    str(OUTPUT_PATH),
    cv2.VideoWriter_fourcc(*"mp4v"),
    fps,
    (W * 2, H)
)

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# ==========================
# HELPERS
# ==========================
def softmax(x, axis=-1):
    x = x - np.max(x, axis=axis, keepdims=True)
    e = np.exp(x)
    return e / np.sum(e, axis=axis, keepdims=True)

def decode_ctc(logits):
    probs = softmax(logits, axis=-1)
    preds = probs.argmax(axis=-1)[0]

    prev = -1
    text, confs = "", []

    for t, p in enumerate(preds):
        if p != prev and p != 0:
            text += ALPHABET[p - 1]
            confs.append(probs[0, t, p])
        prev = p

    if not confs:
        return "", 0.0

    return text, float(np.mean(confs))

def get_text_roi(frame):
    h, w = frame.shape[:2]
    return (
        int(w * 0.25),
        int(h * 0.45),
        int(w * 0.75),
        int(h * 0.65),
    )

def has_text_like_content(gray):
    return cv2.Laplacian(gray, cv2.CV_64F).var() > EDGE_VAR_THRESH

# ==========================
# TEMPORAL OCR BUFFER
# ==========================
buffer = deque(maxlen=TEMPORAL_WINDOW)
final_text = ""

# ==========================
# MAIN LOOP
# ==========================
frame_id = 0
processed = 0

print("[INFO] Starting STABLE pipeline...")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_id % FRAME_SKIP != 0:
        frame_id += 1
        continue

    original = frame.copy()

    # -------------------------
    # DEBLUR (ALWAYS ON)
    # -------------------------
    inp = frame.astype(np.float32) / 255.0
    inp = inp.transpose(2, 0, 1)[None]

    deblurred = deb_sess.run(None, {deb_in: inp})[0][0]
    deblurred = (deblurred.transpose(1, 2, 0) * 255).astype(np.uint8)

    # -------------------------
    # OCR (RAW, NOT DEBLURRED)
    # -------------------------
    if processed % OCR_EVERY_N == 0:
        x1, y1, x2, y2 = get_text_roi(original)
        roi = original[y1:y2, x1:x2]
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        if has_text_like_content(gray):
            gray = cv2.resize(gray, (128, 32))
            ocr_inp = (gray.astype(np.float32) / 255.0)[None, None]

            logits = ocr_sess.run(None, {ocr_in: ocr_inp})[0]
            text, conf = decode_ctc(logits)

            if conf >= OCR_CONF_THRESH:
                buffer.append(text)
                if len(buffer) == TEMPORAL_WINDOW and len(set(buffer)) == 1:
                    final_text = buffer[0]
                    print(f"[OCR ‚úì] {final_text}")

    # -------------------------
    # VISUALIZATION
    # -------------------------
    vis = deblurred.copy()
    if final_text:
        cv2.putText(
            vis, final_text,
            (30, 50),
            cv2.FONT_HERSHEY_SIMPLEX,
            1.3, (0, 255, 0), 3
        )

    combined = np.hstack([original, vis])
    writer.write(combined)

    processed += 1
    frame_id += 1

cap.release()
writer.release()

print("‚úÖ DONE")


In [None]:
import cv2
import numpy as np
import onnxruntime as ort
from pathlib import Path
from collections import deque
from tqdm import tqdm

# ==========================
# CONFIG
# ==========================
VIDEO_PATH = Path("../data/raw_videos/high_speed_axis_55kmph_night.mp4")
OUTPUT_PATH = Path("patch_deblur_output.mp4")

DEBLUR_ONNX = "../onnx_models/deblur.onnx"
OCR_ONNX    = "../onnx_models/ocr.onnx"

FRAME_SKIP   = 5
OCR_EVERY_N  = 20
DEFAULT_FPS  = 50

# Patch parameters
TILE_SIZE = 256
OVERLAP   = 32
STEP      = 224

# OCR parameters
EDGE_VAR_THRESH = 80.0
OCR_CONF_THRESH = 0.65
TEMPORAL_WINDOW = 5

ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
PROVIDERS = ["CUDAExecutionProvider", "CPUExecutionProvider"]

# ==========================
# LOAD MODELS
# ==========================
deb_sess = ort.InferenceSession(DEBLUR_ONNX, providers=PROVIDERS)
ocr_sess = ort.InferenceSession(OCR_ONNX, providers=PROVIDERS)

deb_in = deb_sess.get_inputs()[0].name
ocr_in = ocr_sess.get_inputs()[0].name

# ==========================
# VIDEO SETUP
# ==========================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
total_to_process = total_frames // FRAME_SKIP

fps = cap.get(cv2.CAP_PROP_FPS)
fps = fps if fps > 1 else DEFAULT_FPS
fps /= FRAME_SKIP

ret, first = cap.read()
assert ret, "‚ùå Empty video"

H, W = first.shape[:2]

writer = cv2.VideoWriter(
    str(OUTPUT_PATH),
    cv2.VideoWriter_fourcc(*"mp4v"),
    fps,
    (W * 2, H)
)

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# ==========================
# HELPERS
# ==========================
def softmax(x, axis=-1):
    x = x - np.max(x, axis=axis, keepdims=True)
    e = np.exp(x)
    return e / np.sum(e, axis=axis, keepdims=True)

def decode_ctc(logits):
    probs = softmax(logits, axis=-1)
    preds = probs.argmax(axis=-1)[0]

    prev = -1
    text, confs = "", []

    for t, p in enumerate(preds):
        if p != prev and p != 0:
            text += ALPHABET[p - 1]
            confs.append(probs[0, t, p])
        prev = p

    if not confs:
        return "", 0.0

    return text, float(np.mean(confs))

def has_text_like_content(gray):
    return cv2.Laplacian(gray, cv2.CV_64F).var() > EDGE_VAR_THRESH

def get_text_roi(frame):
    h, w = frame.shape[:2]
    return (
        int(w * 0.25),
        int(h * 0.45),
        int(w * 0.75),
        int(h * 0.65),
    )

# ==========================
# PATCH-BASED DEBLUR
# ==========================
def patch_deblur(frame):
    h, w, _ = frame.shape
    acc = np.zeros((h, w, 3), dtype=np.float32)
    weight = np.zeros((h, w, 1), dtype=np.float32)

    for y in range(0, h - TILE_SIZE + 1, STEP):
        for x in range(0, w - TILE_SIZE + 1, STEP):
            tile = frame[y:y+TILE_SIZE, x:x+TILE_SIZE]

            inp = (tile.astype(np.float32) / 255.0)
            inp = inp.transpose(2, 0, 1)[None]

            out = deb_sess.run(None, {deb_in: inp})[0][0]
            out = out.transpose(1, 2, 0)

            acc[y:y+TILE_SIZE, x:x+TILE_SIZE] += out
            weight[y:y+TILE_SIZE, x:x+TILE_SIZE] += 1.0

    deblurred = acc / np.maximum(weight, 1e-6)
    return (np.clip(deblurred, 0, 1) * 255).astype(np.uint8)

# ==========================
# OCR TEMPORAL BUFFER
# ==========================
buffer = deque(maxlen=TEMPORAL_WINDOW)
final_text = ""

# ==========================
# MAIN LOOP (WITH tqdm)
# ==========================
frame_id = 0
processed = 0

pbar = tqdm(total=total_to_process, desc="Processing frames", unit="frame")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_id % FRAME_SKIP != 0:
        frame_id += 1
        continue

    original = frame.copy()

    # -------------------------
    # PATCH-BASED DEBLUR
    # -------------------------
    deblurred = patch_deblur(frame)

    # -------------------------
    # OCR (RAW FRAME ONLY)
    # -------------------------
    if processed % OCR_EVERY_N == 0:
        x1, y1, x2, y2 = get_text_roi(original)
        roi = original[y1:y2, x1:x2]
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        if has_text_like_content(gray):
            gray = cv2.resize(gray, (128, 32))
            ocr_inp = (gray.astype(np.float32) / 255.0)[None, None]

            logits = ocr_sess.run(None, {ocr_in: ocr_inp})[0]
            text, conf = decode_ctc(logits)

            if conf >= OCR_CONF_THRESH:
                buffer.append(text)
                if len(buffer) == TEMPORAL_WINDOW and len(set(buffer)) == 1:
                    final_text = buffer[0]
                    tqdm.write(f"[OCR ‚úì] {final_text}")

    # -------------------------
    # VISUALIZATION
    # -------------------------
    vis = deblurred.copy()
    if final_text:
        cv2.putText(
            vis,
            final_text,
            (30, 50),
            cv2.FONT_HERSHEY_SIMPLEX,
            1.3,
            (0, 255, 0),
            3
        )

    combined = np.hstack([original, vis])
    writer.write(combined)

    processed += 1
    frame_id += 1
    pbar.update(1)

pbar.close()
cap.release()
writer.release()

print("‚úÖ DONE")
print(f"üìΩ Frames processed: {processed}")
print(f"üé¨ Output saved to: {OUTPUT_PATH}")


In [None]:
import cv2
import numpy as np
import onnxruntime as ort
from pathlib import Path
from collections import deque
from tqdm import tqdm

# ==========================
# CONFIG
# ==========================
VIDEO_PATH = Path("../data/raw_videos/high_speed_axis_55kmph_night.mp4")
OUTPUT_PATH = Path("patch_deblur_output.mp4")

DEBLUR_ONNX = "../onnx_models/deblur.onnx"
OCR_ONNX    = "../onnx_models/ocr.onnx"

FRAME_SKIP   = 5
OCR_EVERY_N  = 20
DEFAULT_FPS  = 50

# Patch parameters
TILE_SIZE = 256
OVERLAP   = 32
STEP      = 224

# OCR parameters
EDGE_VAR_THRESH = 80.0
OCR_CONF_THRESH = 0.65
TEMPORAL_WINDOW = 5

ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
PROVIDERS = ["CUDAExecutionProvider", "CPUExecutionProvider"]

# ==========================
# LOAD MODELS
# ==========================
deb_sess = ort.InferenceSession(DEBLUR_ONNX, providers=PROVIDERS)
ocr_sess = ort.InferenceSession(OCR_ONNX, providers=PROVIDERS)

deb_in = deb_sess.get_inputs()[0].name
ocr_in = ocr_sess.get_inputs()[0].name

# ==========================
# VIDEO SETUP
# ==========================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
total_to_process = total_frames // FRAME_SKIP

fps = cap.get(cv2.CAP_PROP_FPS)
fps = fps if fps > 1 else DEFAULT_FPS
fps /= FRAME_SKIP

ret, first = cap.read()
assert ret, "‚ùå Empty video"

H, W = first.shape[:2]

writer = cv2.VideoWriter(
    str(OUTPUT_PATH),
    cv2.VideoWriter_fourcc(*"mp4v"),
    fps,
    (W * 2, H)
)

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# ==========================
# HELPERS
# ==========================
def softmax(x, axis=-1):
    x = x - np.max(x, axis=axis, keepdims=True)
    e = np.exp(x)
    return e / np.sum(e, axis=axis, keepdims=True)

def decode_ctc(logits):
    probs = softmax(logits, axis=-1)
    preds = probs.argmax(axis=-1)[0]

    prev = -1
    text, confs = "", []

    for t, p in enumerate(preds):
        if p != prev and p != 0:
            text += ALPHABET[p - 1]
            confs.append(probs[0, t, p])
        prev = p

    if not confs:
        return "", 0.0

    return text, float(np.mean(confs))

def has_text_like_content(gray):
    return cv2.Laplacian(gray, cv2.CV_64F).var() > EDGE_VAR_THRESH

def get_text_roi(frame):
    h, w = frame.shape[:2]
    return (
        int(w * 0.25),
        int(h * 0.45),
        int(w * 0.75),
        int(h * 0.65),
    )

# ==========================
# PATCH-BASED DEBLUR
# ==========================
def patch_deblur(frame):
    h, w, _ = frame.shape
    acc = np.zeros((h, w, 3), dtype=np.float32)
    weight = np.zeros((h, w, 1), dtype=np.float32)

    for y in range(0, h - TILE_SIZE + 1, STEP):
        for x in range(0, w - TILE_SIZE + 1, STEP):
            tile = frame[y:y+TILE_SIZE, x:x+TILE_SIZE]

            inp = (tile.astype(np.float32) / 255.0)
            inp = inp.transpose(2, 0, 1)[None]

            out = deb_sess.run(None, {deb_in: inp})[0][0]
            out = out.transpose(1, 2, 0)

            acc[y:y+TILE_SIZE, x:x+TILE_SIZE] += out
            weight[y:y+TILE_SIZE, x:x+TILE_SIZE] += 1.0

    deblurred = acc / np.maximum(weight, 1e-6)
    return (np.clip(deblurred, 0, 1) * 255).astype(np.uint8)

# ==========================
# OCR TEMPORAL BUFFER
# ==========================
buffer = deque(maxlen=TEMPORAL_WINDOW)
final_text = ""

# ==========================
# MAIN LOOP (WITH tqdm)
# ==========================
frame_id = 0
processed = 0

pbar = tqdm(total=total_to_process, desc="Processing frames", unit="frame")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_id % FRAME_SKIP != 0:
        frame_id += 1
        continue

    original = frame.copy()

    # -------------------------
    # PATCH-BASED DEBLUR
    # -------------------------
    deblurred = patch_deblur(frame)

    # -------------------------
    # OCR (RAW FRAME ONLY)
    # -------------------------
    if processed % OCR_EVERY_N == 0:
        x1, y1, x2, y2 = get_text_roi(original)
        roi = original[y1:y2, x1:x2]
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        if has_text_like_content(gray):
            gray = cv2.resize(gray, (128, 32))
            ocr_inp = (gray.astype(np.float32) / 255.0)[None, None]

            logits = ocr_sess.run(None, {ocr_in: ocr_inp})[0]
            text, conf = decode_ctc(logits)

            if conf >= OCR_CONF_THRESH:
                buffer.append(text)
                if len(buffer) == TEMPORAL_WINDOW and len(set(buffer)) == 1:
                    final_text = buffer[0]
                    tqdm.write(f"[OCR ‚úì] {final_text}")

    # -------------------------
    # VISUALIZATION
    # -------------------------
    vis = deblurred.copy()
    if final_text:
        cv2.putText(
            vis,
            final_text,
            (30, 50),
            cv2.FONT_HERSHEY_SIMPLEX,
            1.3,
            (0, 255, 0),
            3
        )

    combined = np.hstack([original, vis])
    writer.write(combined)

    processed += 1
    frame_id += 1
    pbar.update(1)

pbar.close()
cap.release()
writer.release()

print("‚úÖ DONE")
print(f"üìΩ Frames processed: {processed}")
print(f"üé¨ Output saved to: {OUTPUT_PATH}")


In [None]:
import cv2
import numpy as np
from tqdm import tqdm
from collections import defaultdict, deque
from pathlib import Path
import onnxruntime as ort
import pytesseract
import os

# =====================================================
# PATHS (ROBUST)
# =====================================================
VIDEO_PATH = Path("../data/raw_videos/high_speed_axis_55kmph_night.mp4")

LOWLIGHT_ONNX = Path("../onnx_models/lowlight.onnx")
DEBLUR_ONNX   = Path("../onnx_models/deblur.onnx")
OCR_ONNX      = Path("../onnx_models/ocr.onnx")

# ---- sanity checks ----
print("CWD:", os.getcwd())
print("Video resolved to:", VIDEO_PATH.resolve())
assert VIDEO_PATH.exists(), f"‚ùå Video not found: {VIDEO_PATH.resolve()}"
assert LOWLIGHT_ONNX.exists(), "‚ùå Missing lowlight ONNX"
assert DEBLUR_ONNX.exists(), "‚ùå Missing deblur ONNX"
assert OCR_ONNX.exists(), "‚ùå Missing OCR ONNX"

# =====================================================
# GAP DETECTION (ROBUST)
# =====================================================
MIN_GAP_DROP = 0.08          # normalized (night-safe)
COOLDOWN_FRAMES = 10

# =====================================================
# STRICT ENHANCEMENT THRESHOLDS
# =====================================================
LOWLIGHT_MEAN_THRESH = 40
LOWLIGHT_STD_THRESH  = 25
BLUR_LAPLACIAN_THRESH = 35.0

# =====================================================
# OCR CONFIG
# =====================================================
ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

FRAME_SKIP_OCR = 3
TEMPORAL_WINDOW = 5

EDGE_DENSITY_THRESH = 0.02
TESS_CONF_THRESH = 70

TESS_CONFIG = (
    "--oem 1 "
    "--psm 7 "
    "-c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
)

# ROI (tune ONCE if needed)
ROI_X1_FRAC = 0.25
ROI_X2_FRAC = 0.75
ROI_Y1_FRAC = 0.45
ROI_Y2_FRAC = 0.65

# =====================================================
# PATCH DEBLUR PARAMS
# =====================================================
TILE_SIZE = 256
OVERLAP = 32
STEP = TILE_SIZE - OVERLAP

# =====================================================
# LOAD MODELS
# =====================================================
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]

low_sess = ort.InferenceSession(str(LOWLIGHT_ONNX), providers=providers)
deb_sess = ort.InferenceSession(str(DEBLUR_ONNX), providers=providers)
ocr_sess = ort.InferenceSession(str(OCR_ONNX), providers=providers)

low_in = low_sess.get_inputs()[0].name
deb_in = deb_sess.get_inputs()[0].name
ocr_in = ocr_sess.get_inputs()[0].name

# =====================================================
# HELPERS
# =====================================================
def softmax(x, axis=-1):
    x = x - np.max(x, axis=axis, keepdims=True)
    e = np.exp(x)
    return e / np.sum(e, axis=axis, keepdims=True)

def decode_ctc(logits):
    probs = softmax(logits, axis=-1)
    preds = probs.argmax(axis=-1)[0]
    prev = -1
    text = ""
    for p in preds:
        if p != prev and 1 <= p <= len(ALPHABET):
            text += ALPHABET[p - 1]
        prev = p
    return text

def is_text_like(gray):
    edges = cv2.Canny(gray, 50, 150)
    return (edges.mean() / 255.0) > EDGE_DENSITY_THRESH

def preprocess_for_tesseract(roi):
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
    bin_img = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        31, 5
    )
    return cv2.morphologyEx(bin_img, cv2.MORPH_OPEN, np.ones((2,2), np.uint8))

def tesseract_ocr(bin_img):
    data = pytesseract.image_to_data(
        bin_img,
        config=TESS_CONFIG,
        output_type=pytesseract.Output.DICT
    )
    texts, confs = [], []
    for txt, conf in zip(data["text"], data["conf"]):
        if txt.strip() and int(conf) > 0:
            texts.append(txt)
            confs.append(int(conf))
    if not texts:
        return "", 0.0
    return "".join(texts), float(np.mean(confs))

def is_low_light(gray):
    return gray.mean() < LOWLIGHT_MEAN_THRESH and gray.std() < LOWLIGHT_STD_THRESH

def is_blurry(gray):
    return cv2.Laplacian(gray, cv2.CV_64F).var() < BLUR_LAPLACIAN_THRESH

def apply_zero_dce(img, curves):
    out = img
    for i in range(8):
        r = curves[:, i*3:(i+1)*3]
        out = out + r * (out*out - out)
    return np.clip(out, 0, 1)

def patch_deblur(frame):
    h, w, _ = frame.shape
    tiles, coords = [], []
    for y in range(0, h - TILE_SIZE + 1, STEP):
        for x in range(0, w - TILE_SIZE + 1, STEP):
            tiles.append(frame[y:y+TILE_SIZE, x:x+TILE_SIZE])
            coords.append((y, x))
    if not tiles:
        return frame

    batch = np.stack([
        t.astype(np.float32).transpose(2,0,1) / 255.0
        for t in tiles
    ])
    out = deb_sess.run(None, {deb_in: batch})[0].transpose(0,2,3,1)

    acc = np.zeros((h,w,3), np.float32)
    weight = np.zeros((h,w,1), np.float32)
    for (y,x), tile in zip(coords, out):
        acc[y:y+TILE_SIZE, x:x+TILE_SIZE] += tile
        weight[y:y+TILE_SIZE, x:x+TILE_SIZE] += 1

    return (np.clip(acc / np.maximum(weight,1e-6),0,1)*255).astype(np.uint8)

# =====================================================
# MAIN LOOP (SAFE)
# =====================================================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå OpenCV cannot open the video"

prev_profile = None
cooldown = 0
wagon_index = 1

ocr_buffer = deque(maxlen=TEMPORAL_WINDOW)
wagon_texts = defaultdict(list)

print("üöÜ START ‚Üí Wagon #1")

pbar = tqdm(desc="Processing video", unit="frame")
frame_id = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_id += 1
    pbar.update(1)

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    profile = gray.mean(axis=0)

    # ---------- GAP DETECTION ----------
    if prev_profile is not None:
        diff = np.abs(prev_profile - profile)
        drop = diff.mean() / (profile.mean() + 1e-6)
        if drop > MIN_GAP_DROP and cooldown == 0:
            wagon_index += 1
            cooldown = COOLDOWN_FRAMES
            print(f"\nüîπ GAP ‚Üí Wagon #{wagon_index}")

    prev_profile = profile
    cooldown = max(0, cooldown - 1)

    # ---------- STRICT VISUAL ENHANCEMENT ----------
    vis = frame.copy()
    if is_low_light(gray):
        rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
        inp = (rgb/255.0).transpose(2,0,1)[None]
        curves = low_sess.run(None, {low_in: inp})[0]
        vis = (apply_zero_dce(inp, curves)[0].transpose(1,2,0)*255).astype(np.uint8)

    if is_blurry(gray):
        vis = patch_deblur(vis)

    # ---------- OCR (RAW ONLY) ----------
    if frame_id % FRAME_SKIP_OCR == 0:
        H,W = frame.shape[:2]
        roi = frame[
            int(H*ROI_Y1_FRAC):int(H*ROI_Y2_FRAC),
            int(W*ROI_X1_FRAC):int(W*ROI_X2_FRAC)
        ]
        gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        if is_text_like(gray_roi):
            bin_img = preprocess_for_tesseract(roi)
            text, conf = tesseract_ocr(bin_img)

            if conf >= TESS_CONF_THRESH:
                ocr_buffer.append(text)
            else:
                resized = cv2.resize(gray_roi,(128,32))
                inp = (resized/255.0).astype(np.float32)[None,None]
                logits = ocr_sess.run(None,{ocr_in:inp})[0]
                ocr_buffer.append(decode_ctc(logits))

            if len(ocr_buffer)==TEMPORAL_WINDOW and len(set(ocr_buffer))==1:
                wagon_texts[wagon_index].append(ocr_buffer[0])
                print(f"[OCR] Wagon #{wagon_index}: {ocr_buffer[0]}")

cap.release()
pbar.close()

# =====================================================
# RESULTS
# =====================================================
print("\n==============================")
print(f"‚úÖ TOTAL WAGONS: {wagon_index}")
print("==============================")

for i in range(1, wagon_index+1):
    texts = wagon_texts[i]
    if texts:
        print(f"Wagon #{i}: {max(set(texts), key=texts.count)}")
    else:
        print(f"Wagon #{i}: <NO TEXT>")


In [None]:
import cv2
import numpy as np
from tqdm import tqdm
from collections import defaultdict, deque
import onnxruntime as ort
import pytesseract

# =====================================================
# PATHS
# =====================================================
VIDEO_PATH = "raw_videos/high_speed_axis_55kmph_night.mp4"

LOWLIGHT_ONNX = "../onnx_models/lowlight.onnx"
DEBLUR_ONNX   = "../onnx_models/deblur.onnx"
OCR_ONNX      = "../onnx_models/ocr.onnx"

# =====================================================
# GAP DETECTION (ROBUST)
# =====================================================
MIN_GAP_DROP = 0.08          # normalized drop (night-safe)
COOLDOWN_FRAMES = 10

# =====================================================
# STRICT ENHANCEMENT THRESHOLDS
# =====================================================
LOWLIGHT_MEAN_THRESH = 40
LOWLIGHT_STD_THRESH  = 25
BLUR_LAPLACIAN_THRESH = 35.0

# =====================================================
# OCR CONFIG
# =====================================================
ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

FRAME_SKIP_OCR = 3
TEMPORAL_WINDOW = 5

EDGE_DENSITY_THRESH = 0.02
TESS_CONF_THRESH = 70

TESS_CONFIG = (
    "--oem 1 "
    "--psm 7 "
    "-c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
)

# ROI (tune ONCE if needed)
ROI_X1_FRAC = 0.25
ROI_X2_FRAC = 0.75
ROI_Y1_FRAC = 0.45
ROI_Y2_FRAC = 0.65

# =====================================================
# PATCH DEBLUR PARAMS
# =====================================================
TILE_SIZE = 256
OVERLAP = 32
STEP = TILE_SIZE - OVERLAP

# =====================================================
# LOAD MODELS
# =====================================================
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]

low_sess = ort.InferenceSession(LOWLIGHT_ONNX, providers=providers)
deb_sess = ort.InferenceSession(DEBLUR_ONNX, providers=providers)
ocr_sess = ort.InferenceSession(OCR_ONNX, providers=providers)

low_in = low_sess.get_inputs()[0].name
deb_in = deb_sess.get_inputs()[0].name
ocr_in = ocr_sess.get_inputs()[0].name

# =====================================================
# HELPERS
# =====================================================
def softmax(x, axis=-1):
    x = x - np.max(x, axis=axis, keepdims=True)
    e = np.exp(x)
    return e / np.sum(e, axis=axis, keepdims=True)

def decode_ctc(logits):
    probs = softmax(logits, axis=-1)
    preds = probs.argmax(axis=-1)[0]
    prev = -1
    text = ""
    for p in preds:
        if p != prev and 1 <= p <= len(ALPHABET):
            text += ALPHABET[p - 1]
        prev = p
    return text

def is_text_like(gray):
    edges = cv2.Canny(gray, 50, 150)
    return (edges.mean() / 255.0) > EDGE_DENSITY_THRESH

def preprocess_for_tesseract(roi):
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
    bin_img = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        31, 5
    )
    return cv2.morphologyEx(bin_img, cv2.MORPH_OPEN, np.ones((2,2), np.uint8))

def tesseract_ocr(bin_img):
    data = pytesseract.image_to_data(
        bin_img,
        config=TESS_CONFIG,
        output_type=pytesseract.Output.DICT
    )
    texts, confs = [], []
    for txt, conf in zip(data["text"], data["conf"]):
        if txt.strip() and int(conf) > 0:
            texts.append(txt)
            confs.append(int(conf))
    if not texts:
        return "", 0.0
    return "".join(texts), float(np.mean(confs))

def is_low_light(gray):
    return gray.mean() < LOWLIGHT_MEAN_THRESH and gray.std() < LOWLIGHT_STD_THRESH

def is_blurry(gray):
    return cv2.Laplacian(gray, cv2.CV_64F).var() < BLUR_LAPLACIAN_THRESH

def apply_zero_dce(img, curves):
    out = img
    for i in range(8):
        r = curves[:, i*3:(i+1)*3]
        out = out + r * (out*out - out)
    return np.clip(out, 0, 1)

def patch_deblur(frame):
    h, w, _ = frame.shape
    tiles, coords = [], []
    for y in range(0, h - TILE_SIZE + 1, STEP):
        for x in range(0, w - TILE_SIZE + 1, STEP):
            tiles.append(frame[y:y+TILE_SIZE, x:x+TILE_SIZE])
            coords.append((y, x))
    if not tiles:
        return frame
    batch = np.stack([
        t.astype(np.float32).transpose(2,0,1) / 255.0
        for t in tiles
    ])
    out = deb_sess.run(None, {deb_in: batch})[0].transpose(0,2,3,1)
    acc = np.zeros((h,w,3), np.float32)
    weight = np.zeros((h,w,1), np.float32)
    for (y,x), tile in zip(coords, out):
        acc[y:y+TILE_SIZE, x:x+TILE_SIZE] += tile
        weight[y:y+TILE_SIZE, x:x+TILE_SIZE] += 1
    return (np.clip(acc / np.maximum(weight,1e-6),0,1)*255).astype(np.uint8)

# =====================================================
# MAIN LOOP (FIXED)
# =====================================================
cap = cv2.VideoCapture(VIDEO_PATH)
assert cap.isOpened(), f"‚ùå Cannot open video: {VIDEO_PATH}"

prev_profile = None
cooldown = 0
wagon_index = 0

ocr_buffer = deque(maxlen=TEMPORAL_WINDOW)
wagon_texts = defaultdict(list)

print("üöÜ START ‚Üí Wagon #1")

pbar = tqdm(desc="Processing video", unit="frame")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    pbar.update(1)

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    profile = gray.mean(axis=0)

    # ---------- GAP DETECTION ----------
    if prev_profile is not None:
        diff = np.abs(prev_profile - profile)
        drop = diff.mean() / (profile.mean() + 1e-6)
        if drop > MIN_GAP_DROP and cooldown == 0:
            wagon_index += 1
            cooldown = COOLDOWN_FRAMES
            print(f"\nüîπ GAP ‚Üí Wagon #{wagon_index}")

    prev_profile = profile
    cooldown = max(0, cooldown - 1)

    # ---------- STRICT VISUAL ENHANCEMENT ----------
    vis = frame.copy()
    if is_low_light(gray):
        rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
        inp = (rgb/255.0).transpose(2,0,1)[None]
        curves = low_sess.run(None, {low_in: inp})[0]
        vis = (apply_zero_dce(inp, curves)[0].transpose(1,2,0)*255).astype(np.uint8)

    if is_blurry(gray):
        vis = patch_deblur(vis)

    # ---------- OCR (RAW ONLY) ----------
    if frame_id = pbar.n
    if wagon_index >= 1 and frame_id % FRAME_SKIP_OCR == 0:
        H,W = frame.shape[:2]
        roi = frame[
            int(H*ROI_Y1_FRAC):int(H*ROI_Y2_FRAC),
            int(W*ROI_X1_FRAC):int(W*ROI_X2_FRAC)
        ]
        gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        if is_text_like(gray_roi):
            bin_img = preprocess_for_tesseract(roi)
            text, conf = tesseract_ocr(bin_img)
            if conf >= TESS_CONF_THRESH:
                ocr_buffer.append(text)
            else:
                resized = cv2.resize(gray_roi,(128,32))
                inp = (resized/255.0).astype(np.float32)[None,None]
                logits = ocr_sess.run(None,{ocr_in:inp})[0]
                ocr_buffer.append(decode_ctc(logits))
            if len(ocr_buffer)==TEMPORAL_WINDOW and len(set(ocr_buffer))==1:
                wagon_texts[wagon_index].append(ocr_buffer[0])
                print(f"[OCR] Wagon #{wagon_index}: {ocr_buffer[0]}")

cap.release()

# =====================================================
# RESULTS
# =====================================================
print("\n==============================")
print(f"‚úÖ TOTAL WAGONS: {wagon_index}")
print("==============================")
for i in range(1, wagon_index+1):
    texts = wagon_texts[i]
    if texts:
        print(f"Wagon #{i}: {max(set(texts), key=texts.count)}")
    else:
        print(f"Wagon #{i}: <NO TEXT>")


In [None]:
from pathlib import Path

VIDEO_PATH = Path("raw_videos/high_speed_axis_55kmph_night.mp4")
print("Exists:", VIDEO_PATH.exists())
print("Absolute path:", VIDEO_PATH.resolve())


In [None]:
import cv2
import numpy as np
from tqdm import tqdm
from collections import defaultdict, deque
from pathlib import Path
import onnxruntime as ort
import os

# =====================================================
# PATHS
# =====================================================
VIDEO_PATH = Path("../data/raw_videos/high_speed_axis_55kmph_night.mp4")

LOWLIGHT_ONNX = Path("../onnx_models/lowlight.onnx")
DEBLUR_ONNX   = Path("../onnx_models/deblur.onnx")
OCR_ONNX      = Path("../onnx_models/ocr.onnx")

print("CWD:", os.getcwd())
print("Video:", VIDEO_PATH.resolve())
assert VIDEO_PATH.exists(), "‚ùå Video not found"

# =====================================================
# GAP DETECTION
# =====================================================
MIN_GAP_DROP = 0.08
COOLDOWN_FRAMES = 10

# =====================================================
# STRICT ENHANCEMENT
# =====================================================
LOWLIGHT_MEAN_THRESH = 40
LOWLIGHT_STD_THRESH  = 25
BLUR_LAPLACIAN_THRESH = 35.0

# =====================================================
# OCR (CRNN ONLY)
# =====================================================
ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

FRAME_SKIP_OCR = 3
TEMPORAL_WINDOW = 5

EDGE_DENSITY_THRESH = 0.02
CRNN_CONF_THRESH = 0.75   # VERY IMPORTANT

ROI_X1_FRAC = 0.25
ROI_X2_FRAC = 0.75
ROI_Y1_FRAC = 0.45
ROI_Y2_FRAC = 0.65

# =====================================================
# PATCH DEBLUR PARAMS
# =====================================================
TILE_SIZE = 256
OVERLAP = 32
STEP = TILE_SIZE - OVERLAP

# =====================================================
# LOAD MODELS
# =====================================================
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]

low_sess = ort.InferenceSession(str(LOWLIGHT_ONNX), providers=providers)
deb_sess = ort.InferenceSession(str(DEBLUR_ONNX), providers=providers)
ocr_sess = ort.InferenceSession(str(OCR_ONNX), providers=providers)

low_in = low_sess.get_inputs()[0].name
deb_in = deb_sess.get_inputs()[0].name
ocr_in = ocr_sess.get_inputs()[0].name

# =====================================================
# HELPERS
# =====================================================
def softmax(x, axis=-1):
    x = x - np.max(x, axis=axis, keepdims=True)
    e = np.exp(x)
    return e / np.sum(e, axis=axis, keepdims=True)

def decode_ctc_with_conf(logits):
    probs = softmax(logits, axis=-1)
    preds = probs.argmax(axis=-1)[0]

    prev = -1
    text = ""
    confs = []

    for t, p in enumerate(preds):
        if p != prev and p > 0 and p <= len(ALPHABET):
            text += ALPHABET[p - 1]
            confs.append(probs[0, t, p])
        prev = p

    mean_conf = float(np.mean(confs)) if confs else 0.0
    return text, mean_conf

def is_text_like(gray):
    edges = cv2.Canny(gray, 50, 150)
    return (edges.mean() / 255.0) > EDGE_DENSITY_THRESH

def is_low_light(gray):
    return gray.mean() < LOWLIGHT_MEAN_THRESH and gray.std() < LOWLIGHT_STD_THRESH

def is_blurry(gray):
    return cv2.Laplacian(gray, cv2.CV_64F).var() < BLUR_LAPLACIAN_THRESH

def apply_zero_dce(img, curves):
    out = img
    for i in range(8):
        r = curves[:, i*3:(i+1)*3]
        out = out + r * (out*out - out)
    return np.clip(out, 0, 1)

def patch_deblur(frame):
    h, w, _ = frame.shape
    tiles, coords = [], []

    for y in range(0, h - TILE_SIZE + 1, STEP):
        for x in range(0, w - TILE_SIZE + 1, STEP):
            tiles.append(frame[y:y+TILE_SIZE, x:x+TILE_SIZE])
            coords.append((y, x))

    if not tiles:
        return frame

    batch = np.stack([
        t.astype(np.float32).transpose(2,0,1) / 255.0
        for t in tiles
    ])

    out = deb_sess.run(None, {deb_in: batch})[0].transpose(0,2,3,1)

    acc = np.zeros((h,w,3), np.float32)
    weight = np.zeros((h,w,1), np.float32)

    for (y,x), tile in zip(coords, out):
        acc[y:y+TILE_SIZE, x:x+TILE_SIZE] += tile
        weight[y:y+TILE_SIZE, x:x+TILE_SIZE] += 1

    return (np.clip(acc / np.maximum(weight,1e-6),0,1)*255).astype(np.uint8)

# =====================================================
# MAIN LOOP
# =====================================================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

prev_profile = None
cooldown = 0
wagon_index = 1

ocr_buffer = deque(maxlen=TEMPORAL_WINDOW)
wagon_texts = defaultdict(list)

print("üöÜ START ‚Üí Wagon #1")

pbar = tqdm(desc="Processing video", unit="frame")
frame_id = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_id += 1
    pbar.update(1)

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    profile = gray.mean(axis=0)

    # ---------- GAP DETECTION ----------
    if prev_profile is not None:
        diff = np.abs(prev_profile - profile)
        drop = diff.mean() / (profile.mean() + 1e-6)
        if drop > MIN_GAP_DROP and cooldown == 0:
            wagon_index += 1
            cooldown = COOLDOWN_FRAMES
            print(f"\nüîπ GAP ‚Üí Wagon #{wagon_index}")

    prev_profile = profile
    cooldown = max(0, cooldown - 1)

    # ---------- STRICT VISUAL ENHANCEMENT ----------
    vis = frame.copy()
    if is_low_light(gray):
        rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
        inp = (rgb/255.0).transpose(2,0,1)[None]
        curves = low_sess.run(None, {low_in: inp})[0]
        vis = (apply_zero_dce(inp, curves)[0].transpose(1,2,0)*255).astype(np.uint8)

    if is_blurry(gray):
        vis = patch_deblur(vis)

    # ---------- OCR (CRNN ONLY, RAW FRAME) ----------
    if frame_id % FRAME_SKIP_OCR == 0:
        H,W = frame.shape[:2]
        roi = frame[
            int(H*ROI_Y1_FRAC):int(H*ROI_Y2_FRAC),
            int(W*ROI_X1_FRAC):int(W*ROI_X2_FRAC)
        ]

        gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        if is_text_like(gray_roi):
            resized = cv2.resize(gray_roi, (128,32))
            inp = (resized/255.0).astype(np.float32)[None,None]

            logits = ocr_sess.run(None, {ocr_in: inp})[0]
            text, conf = decode_ctc_with_conf(logits)

            if conf >= CRNN_CONF_THRESH:
                ocr_buffer.append(text)

            if len(ocr_buffer) == TEMPORAL_WINDOW and len(set(ocr_buffer)) == 1:
                wagon_texts[wagon_index].append(ocr_buffer[0])
                print(f"[OCR] Wagon #{wagon_index}: {ocr_buffer[0]}")

cap.release()
pbar.close()

# =====================================================
# RESULTS
# =====================================================
print("\n==============================")
print(f"‚úÖ TOTAL WAGONS: {wagon_index}")
print("==============================")

for i in range(1, wagon_index+1):
    texts = wagon_texts[i]
    if texts:
        print(f"Wagon #{i}: {max(set(texts), key=texts.count)}")
    else:
        print(f"Wagon #{i}: <NO TEXT>")


In [None]:
import cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm

# =====================
# CONFIG
# =====================
VIDEO_PATH = Path("../data/raw_videos/high_speed_axis_55kmph_night.mp4")
RESIZE_WIDTH = 960

LINE_X_RATIO = 0.5
STRIP_WIDTH = 12

ROI_Y1_RATIO = 0.55
ROI_Y2_RATIO = 0.75

# ---- FIXED THRESHOLDS ----
DIFF_NORM_THRESHOLD = 0.06     # normalized, night-safe
COOLDOWN_FRAMES = 40

NO_MOTION_FRAMES = 300         # ~4 seconds at ~75fps

# =====================
# INIT
# =====================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

print("üöÜ Wagon Counting Started")

wagon_count = 0
cooldown = 0
no_motion = 0

prev_strip = None
motion_active = False   # NEW: edge detection

pbar = tqdm(desc="Processing video", unit="frame")

# =====================
# MAIN LOOP
# =====================
while True:
    ret, frame = cap.read()
    if not ret:
        break

    pbar.update(1)

    h, w = frame.shape[:2]
    scale = RESIZE_WIDTH / w
    frame = cv2.resize(frame, (RESIZE_WIDTH, int(h * scale)))
    H, W = frame.shape[:2]

    x = int(W * LINE_X_RATIO)
    y1 = int(H * ROI_Y1_RATIO)
    y2 = int(H * ROI_Y2_RATIO)

    strip = frame[y1:y2, x-STRIP_WIDTH:x+STRIP_WIDTH]
    gray = cv2.cvtColor(strip, cv2.COLOR_BGR2GRAY)

    if prev_strip is None:
        prev_strip = gray
        continue

    diff = cv2.absdiff(gray, prev_strip)
    mean_diff = diff.mean() / (gray.mean() + 1e-6)
    prev_strip = gray

    # =====================
    # COUNTING LOGIC (FIXED)
    # =====================
    if mean_diff > DIFF_NORM_THRESHOLD:
        no_motion = 0

        if not motion_active and cooldown == 0:
            wagon_count += 1
            cooldown = COOLDOWN_FRAMES
            motion_active = True
            print(f"üÜï WAGON #{wagon_count} DETECTED")

    else:
        motion_active = False
        no_motion += 1

    if cooldown > 0:
        cooldown -= 1

    # Stop when train gone (FIXED)
    if no_motion > NO_MOTION_FRAMES and wagon_count > 0:
        print("\nüö´ Train exited scene. Counting stopped.")
        break

    # =====================
    # VISUALIZATION
    # =====================
    cv2.line(frame, (x, 0), (x, H), (0, 0, 255), 2)
    cv2.rectangle(frame, (x-STRIP_WIDTH, y1), (x+STRIP_WIDTH, y2), (255, 0, 0), 2)

    cv2.putText(
        frame,
        f"Wagons: {wagon_count}",
        (20, 40),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (255, 255, 255),
        2
    )

    cv2.imshow("Robust Wagon Counter", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# =====================
# DONE
# =====================
cap.release()
cv2.destroyAllWindows()
pbar.close()

print("\n===================================")
print(f"‚úÖ TOTAL WAGONS COUNTED: {wagon_count}")
print("===================================")


In [5]:
import cv2
import numpy as np
import onnxruntime as ort
from pathlib import Path
from tqdm import tqdm
import csv

# =====================
# PATHS
# =====================
PROJECT_ROOT = Path(r"C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM")
VIDEO_PATH = PROJECT_ROOT / "data" / "raw_videos" / "high_speed_axis_55kmph_night.mp4"
OCR_ONNX = PROJECT_ROOT / "onnx_models" / "ocr.onnx"
CSV_OUT = PROJECT_ROOT / "wagon_results.csv"

print("üìÇ Project root:", PROJECT_ROOT)
print("üé• Video:", VIDEO_PATH)

assert VIDEO_PATH.exists(), "‚ùå Video not found"

OCR_ENABLED = OCR_ONNX.exists()
print("üî† OCR enabled" if OCR_ENABLED else "‚ö† OCR disabled")

# =====================
# CONFIG
# =====================
RESIZE_WIDTH = 960
LINE_X_RATIO = 0.50
STRIP_WIDTH = 10

ROI_Y1_RATIO = 0.55
ROI_Y2_RATIO = 0.75

DIFF_THRESHOLD = 18
COOLDOWN_FRAMES = 30
NO_MOTION_FRAMES = 90

SHOW_VIDEO = True
OCR_CONF_THRESH = 0.5

# =====================
# OCR
# =====================
ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

def decode_safe(logits):
    probs = logits[0]
    seq = probs.argmax(axis=1)
    text, confs = "", []
    prev = -1
    for t, c in enumerate(seq):
        if c != prev and 0 < c <= len(ALPHABET):
            text += ALPHABET[c - 1]
            confs.append(probs[t, c])
        prev = c
    return text, float(np.mean(confs)) if confs else 0.0

sess = None
ocr_input = None
if OCR_ENABLED:
    sess = ort.InferenceSession(str(OCR_ONNX), providers=["CPUExecutionProvider"])
    ocr_input = sess.get_inputs()[0].name

# =====================
# VIDEO
# =====================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

print("üöÜ Wagon counting started")

wagon_count = 0
cooldown = 0
no_motion = 0
train_seen = False

wagon_frames = []
wagon_times = []
wagon_texts = {}

prev_strip = None

# =====================
# MAIN LOOP
# =====================
for frame_idx in tqdm(range(total_frames), desc="Processing video"):
    ret, frame = cap.read()
    if not ret:
        break

    h, w = frame.shape[:2]
    scale = RESIZE_WIDTH / w
    frame = cv2.resize(frame, (RESIZE_WIDTH, int(h * scale)))
    H, W = frame.shape[:2]

    x = int(W * LINE_X_RATIO)
    y1 = int(H * ROI_Y1_RATIO)
    y2 = int(H * ROI_Y2_RATIO)

    strip = frame[y1:y2, x-STRIP_WIDTH:x+STRIP_WIDTH]
    gray = cv2.cvtColor(strip, cv2.COLOR_BGR2GRAY)

    if prev_strip is None:
        prev_strip = gray
        continue

    diff = cv2.absdiff(gray, prev_strip).mean()
    prev_strip = gray

    # ===== TRAIN LOGIC =====
    if diff > DIFF_THRESHOLD:
        train_seen = True
        no_motion = 0

        if cooldown == 0:
            wagon_count += 1
            wagon_frames.append(frame_idx)
            wagon_times.append(frame_idx / fps)
            print(f"üÜï WAGON #{wagon_count} at frame {frame_idx}")

            # OCR (non-blocking)
            if OCR_ENABLED:
                roi = frame[int(H*0.35):int(H*0.55), int(W*0.60):int(W*0.95)]
                roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                roi_gray = cv2.resize(roi_gray, (128, 32))
                inp = (roi_gray / 255.0).astype(np.float32)[None, None]
                if inp.shape == (1,1,32,128):
                    logits = sess.run(None, {ocr_input: inp})[0]
                    txt, conf = decode_safe(logits)
                    wagon_texts[wagon_count] = txt if conf >= OCR_CONF_THRESH else "<LOW_CONF>"
                else:
                    wagon_texts[wagon_count] = "<OCR_SHAPE_ERR>"
            else:
                wagon_texts[wagon_count] = "<OCR_DISABLED>"

            cooldown = COOLDOWN_FRAMES

    else:
        if train_seen:
            no_motion += 1

    if cooldown > 0:
        cooldown -= 1

    if train_seen and no_motion > NO_MOTION_FRAMES:
        print("üö´ Train exited scene")
        break

    # ===== VIS =====
    if SHOW_VIDEO:
        cv2.line(frame, (x,0), (x,H), (0,0,255), 2)
        cv2.rectangle(frame, (x-STRIP_WIDTH,y1), (x+STRIP_WIDTH,y2), (255,0,0), 2)
        cv2.putText(frame, f"Wagons: {wagon_count}", (20,40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
        cv2.imshow("Wagon Counter", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

cap.release()
cv2.destroyAllWindows()

# =====================
# CSV
# =====================
with open(CSV_OUT, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["wagon_index", "frame", "timestamp_sec", "ocr_text"])
    for i in range(wagon_count):
        writer.writerow([
            i+1,
            wagon_frames[i],
            round(wagon_times[i], 3),
            wagon_texts.get(i+1, "")
        ])

print("\n===================================")
print(f"‚úÖ TOTAL WAGONS: {wagon_count}")
print(f"üìÑ CSV saved to: {CSV_OUT}")
print("===================================")


üìÇ Project root: C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM
üé• Video: C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\data\raw_videos\high_speed_axis_55kmph_night.mp4
üî† OCR enabled
üöÜ Wagon counting started


Processing video:  36%|‚ñà‚ñà‚ñà‚ñå      | 1100/3082 [00:10<00:21, 93.61it/s] 

üÜï WAGON #1 at frame 1089


Processing video:  37%|‚ñà‚ñà‚ñà‚ñã      | 1132/3082 [00:11<00:20, 97.18it/s]

üÜï WAGON #2 at frame 1119


Processing video:  38%|‚ñà‚ñà‚ñà‚ñä      | 1163/3082 [00:11<00:19, 98.47it/s]

üÜï WAGON #3 at frame 1149


Processing video:  39%|‚ñà‚ñà‚ñà‚ñâ      | 1196/3082 [00:11<00:18, 102.43it/s]

üÜï WAGON #4 at frame 1179


Processing video:  40%|‚ñà‚ñà‚ñà‚ñâ      | 1230/3082 [00:12<00:18, 98.94it/s] 

üÜï WAGON #5 at frame 1210


Processing video:  41%|‚ñà‚ñà‚ñà‚ñà      | 1251/3082 [00:12<00:19, 94.37it/s]

üÜï WAGON #6 at frame 1240


Processing video:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 1282/3082 [00:12<00:19, 93.72it/s]

üÜï WAGON #7 at frame 1270


Processing video:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 1313/3082 [00:13<00:19, 89.01it/s]

üÜï WAGON #8 at frame 1300


Processing video:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 1343/3082 [00:13<00:22, 76.72it/s]

üÜï WAGON #9 at frame 1330


Processing video:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 1367/3082 [00:13<00:25, 68.55it/s]

üÜï WAGON #10 at frame 1360


Processing video:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 1397/3082 [00:14<00:24, 67.94it/s]

üÜï WAGON #11 at frame 1390


Processing video:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 1434/3082 [00:14<00:24, 67.11it/s]

üÜï WAGON #12 at frame 1420


Processing video:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 1458/3082 [00:15<00:23, 69.78it/s]

üÜï WAGON #13 at frame 1450


Processing video:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 1489/3082 [00:15<00:24, 63.84it/s]

üÜï WAGON #14 at frame 1480


Processing video:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 1527/3082 [00:16<00:22, 70.49it/s]

üÜï WAGON #15 at frame 1522


Processing video:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 1565/3082 [00:16<00:22, 67.68it/s]

üÜï WAGON #16 at frame 1552


Processing video:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1594/3082 [00:17<00:21, 67.87it/s]

üÜï WAGON #17 at frame 1582


Processing video:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 1619/3082 [00:17<00:21, 68.71it/s]

üÜï WAGON #18 at frame 1612


Processing video:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 1648/3082 [00:18<00:22, 65.08it/s]

üÜï WAGON #19 at frame 1642


Processing video:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 1680/3082 [00:18<00:20, 66.91it/s]

üÜï WAGON #20 at frame 1672


Processing video:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1708/3082 [00:19<00:21, 64.31it/s]

üÜï WAGON #21 at frame 1702


Processing video:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 1740/3082 [00:19<00:19, 68.63it/s]

üÜï WAGON #22 at frame 1732


Processing video:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 1773/3082 [00:19<00:19, 68.05it/s]

üÜï WAGON #23 at frame 1762


Processing video:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 1799/3082 [00:20<00:17, 71.92it/s]

üÜï WAGON #24 at frame 1792


Processing video:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1832/3082 [00:20<00:16, 73.79it/s]

üÜï WAGON #25 at frame 1822


Processing video:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 1864/3082 [00:21<00:17, 70.00it/s]

üÜï WAGON #26 at frame 1852


Processing video:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 1895/3082 [00:21<00:17, 68.58it/s]

üÜï WAGON #27 at frame 1882


Processing video:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 1919/3082 [00:21<00:16, 69.96it/s]

üÜï WAGON #28 at frame 1912


Processing video:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 1951/3082 [00:22<00:16, 69.09it/s]

üÜï WAGON #29 at frame 1942


Processing video:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1983/3082 [00:22<00:15, 70.33it/s]

üÜï WAGON #30 at frame 1972


Processing video:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 2014/3082 [00:23<00:15, 68.86it/s]

üÜï WAGON #31 at frame 2002


Processing video:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 2039/3082 [00:23<00:14, 73.69it/s]

üÜï WAGON #32 at frame 2032


Processing video:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 2071/3082 [00:24<00:13, 72.38it/s]

üÜï WAGON #33 at frame 2062


Processing video:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 2103/3082 [00:24<00:13, 73.57it/s]

üÜï WAGON #34 at frame 2095


Processing video:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 2136/3082 [00:25<00:12, 73.06it/s]

üÜï WAGON #35 at frame 2125


Processing video:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 2161/3082 [00:25<00:13, 70.14it/s]

üÜï WAGON #36 at frame 2155


Processing video:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 2192/3082 [00:25<00:13, 66.38it/s]

üÜï WAGON #37 at frame 2185


Processing video:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 2222/3082 [00:26<00:12, 66.98it/s]

üÜï WAGON #38 at frame 2215


Processing video:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 2259/3082 [00:26<00:12, 68.32it/s]

üÜï WAGON #39 at frame 2245


Processing video:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 2284/3082 [00:27<00:11, 68.70it/s]

üÜï WAGON #40 at frame 2275


Processing video:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 2314/3082 [00:27<00:10, 70.52it/s]

üÜï WAGON #41 at frame 2305


Processing video:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 2345/3082 [00:28<00:10, 69.66it/s]

üÜï WAGON #42 at frame 2335


Processing video:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 2377/3082 [00:28<00:10, 69.63it/s]

üÜï WAGON #43 at frame 2365


Processing video:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 2408/3082 [00:28<00:09, 68.40it/s]

üÜï WAGON #44 at frame 2395


Processing video:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 2431/3082 [00:29<00:09, 70.45it/s]

üÜï WAGON #45 at frame 2425


Processing video:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2468/3082 [00:29<00:09, 65.64it/s]

üÜï WAGON #46 at frame 2455


Processing video:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2493/3082 [00:30<00:08, 66.56it/s]

üÜï WAGON #47 at frame 2487


Processing video:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2527/3082 [00:30<00:07, 71.31it/s]

üÜï WAGON #48 at frame 2517


Processing video:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2561/3082 [00:31<00:07, 73.72it/s]

üÜï WAGON #49 at frame 2550


Processing video:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2594/3082 [00:31<00:06, 73.95it/s]

üÜï WAGON #50 at frame 2580


Processing video:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2620/3082 [00:31<00:06, 73.60it/s]

üÜï WAGON #51 at frame 2610


Processing video:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2652/3082 [00:32<00:06, 71.08it/s]

üÜï WAGON #52 at frame 2640


Processing video:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 2683/3082 [00:32<00:05, 67.16it/s]

üÜï WAGON #53 at frame 2670


Processing video:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 2714/3082 [00:33<00:05, 71.24it/s]

üÜï WAGON #54 at frame 2700


Processing video:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 2740/3082 [00:33<00:04, 75.45it/s]

üÜï WAGON #55 at frame 2732


Processing video:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 2773/3082 [00:34<00:04, 71.40it/s]

üÜï WAGON #56 at frame 2762


Processing video:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 2798/3082 [00:34<00:03, 71.27it/s]

üÜï WAGON #57 at frame 2792


Processing video:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 2829/3082 [00:34<00:03, 68.50it/s]

üÜï WAGON #58 at frame 2822


Processing video:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 2918/3082 [00:36<00:02, 80.84it/s]

üö´ Train exited scene

‚úÖ TOTAL WAGONS: 58
üìÑ CSV saved to: C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\wagon_results.csv





In [9]:
# ============================================================
# FULL WAGON COUNTING + FIXED OCR (SINGLE JUPYTER CELL)
# ============================================================

import cv2
import numpy as np
import csv
from pathlib import Path
from tqdm import tqdm
import onnxruntime as ort

# ============================================================
# PATHS (EDIT ONLY IF NEEDED)
# ============================================================
VIDEO_PATH = Path(r"C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\data\raw_videos\high_speed_axis_55kmph_night.mp4")
OCR_ONNX  = Path(r"C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\onnx_models\ocr.onnx")

print("üìÇ Project root:", VIDEO_PATH.parents[2])
print("üé• Video:", VIDEO_PATH)
print("üî† OCR model:", OCR_ONNX)

if not VIDEO_PATH.exists():
    raise FileNotFoundError(f"‚ùå Video not found: {VIDEO_PATH}")
if not OCR_ONNX.exists():
    raise FileNotFoundError(f"‚ùå OCR model not found: {OCR_ONNX}")

# ============================================================
# OCR SETUP
# ============================================================
ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

def preprocess_for_ocr(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # CLAHE for night contrast
    clahe = cv2.createCLAHE(3.0, (8,8))
    gray = clahe.apply(gray)

    # Adaptive threshold (CRITICAL)
    th = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV,
        31, 15
    )

    # Morphological cleanup
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    th = cv2.morphologyEx(th, cv2.MORPH_CLOSE, kernel)

    return th


def safe_decode(logits):
    probs = logits[0]
    seq = probs.argmax(axis=1)

    txt = ""
    confs = []
    prev = -1

    for t, c in enumerate(seq):
        if c != prev and c > 0 and c-1 < len(ALPHABET):
            txt += ALPHABET[c-1]
            confs.append(probs[t, c])
        prev = c

    if not confs:
        return "", 0.0
    return txt, float(np.mean(confs))


print("üî† OCR enabled")
sess = ort.InferenceSession(str(OCR_ONNX), providers=["CPUExecutionProvider"])
ocr_input = sess.get_inputs()[0].name

# ============================================================
# WAGON COUNTING CONFIG (UNCHANGED LOGIC)
# ============================================================
RESIZE_WIDTH = 960
LINE_X_RATIO = 0.5
STRIP_WIDTH = 10

ROI_Y1_RATIO = 0.55
ROI_Y2_RATIO = 0.75

DIFF_THRESHOLD = 18
COOLDOWN_FRAMES = 30
NO_MOTION_FRAMES = 60

# ============================================================
# VIDEO INIT
# ============================================================
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), "‚ùå Cannot open video"

fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

wagon_count = 0
cooldown = 0
no_motion = 0
prev_strip = None
train_started = False

results = []

print("üöÜ Wagon counting started")

# ============================================================
# MAIN LOOP
# ============================================================
for frame_idx in tqdm(range(total_frames)):
    ret, frame = cap.read()
    if not ret:
        break

    h, w = frame.shape[:2]
    scale = RESIZE_WIDTH / w
    frame = cv2.resize(frame, (RESIZE_WIDTH, int(h * scale)))
    H, W = frame.shape[:2]

    x = int(W * LINE_X_RATIO)
    y1 = int(H * ROI_Y1_RATIO)
    y2 = int(H * ROI_Y2_RATIO)

    strip = frame[y1:y2, x-STRIP_WIDTH:x+STRIP_WIDTH]
    gray_strip = cv2.cvtColor(strip, cv2.COLOR_BGR2GRAY)

    if prev_strip is None:
        prev_strip = gray_strip
        continue

    diff = cv2.absdiff(gray_strip, prev_strip)
    mean_diff = diff.mean()
    prev_strip = gray_strip

    if mean_diff > DIFF_THRESHOLD:
        no_motion = 0

        if not train_started:
            train_started = True
            print(f"üöÜ Train confirmed at frame {frame_idx}")

        if cooldown == 0:
            wagon_count += 1
            cooldown = COOLDOWN_FRAMES
            ts = frame_idx / fps

            print(f"üÜï WAGON #{wagon_count} at frame {frame_idx}")

            # =============================
            # FIXED OCR (MULTI-FRAME VOTING)
            # =============================
            texts = []
            for offset in [5, 8, 12]:
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx + offset)
                r, f2 = cap.read()
                if not r:
                    continue

                roi = f2[y1:y2, x+40:x+300]
                roi = cv2.resize(roi, (128, 32))
                th = preprocess_for_ocr(roi)

                inp = (th / 255.0).astype(np.float32)[None, None]
                logits = sess.run(None, {ocr_input: inp})[0]
                txt, conf = safe_decode(logits)

                if conf > 0.55 and len(txt) >= 3:
                    texts.append((txt, conf))

            best_text = max(texts, key=lambda x: x[1])[0] if texts else "<NO_TEXT>"

            results.append([wagon_count, frame_idx, round(ts,3), best_text])

    else:
        if train_started:
            no_motion += 1

    if cooldown > 0:
        cooldown -= 1

    if train_started and no_motion > NO_MOTION_FRAMES:
        print("üö´ Train exited scene")
        break

    # =============================
    # VISUAL DISPLAY
    # =============================
    cv2.line(frame, (x,0), (x,H), (0,0,255), 2)
    cv2.rectangle(frame, (x-STRIP_WIDTH,y1), (x+STRIP_WIDTH,y2), (255,0,0), 2)
    cv2.putText(frame, f"Wagons: {wagon_count}", (20,40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)

    cv2.imshow("Wagon Counter + OCR", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# ============================================================
# SAVE CSV
# ============================================================
out_csv = VIDEO_PATH.parents[1] / "wagon_results.csv"

with open(out_csv, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["wagon_index", "frame", "timestamp_sec", "ocr_text"])
    writer.writerows(results)

print("\n===================================")
print(f"‚úÖ TOTAL WAGONS: {len(results)}")
print(f"üìÑ CSV saved to: {out_csv}")
print("===================================")


üìÇ Project root: C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM
üé• Video: C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\data\raw_videos\high_speed_axis_55kmph_night.mp4
üî† OCR model: C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\onnx_models\ocr.onnx
üî† OCR enabled
üöÜ Wagon counting started


 35%|‚ñà‚ñà‚ñà‚ñå      | 1088/3082 [00:11<00:19, 100.64it/s]

üöÜ Train confirmed at frame 1089
üÜï WAGON #1 at frame 1089


 36%|‚ñà‚ñà‚ñà‚ñã      | 1119/3082 [00:12<00:29, 66.59it/s] 

üÜï WAGON #2 at frame 1120


 38%|‚ñà‚ñà‚ñà‚ñä      | 1158/3082 [00:12<00:28, 67.47it/s]

üÜï WAGON #3 at frame 1158


 38%|‚ñà‚ñà‚ñà‚ñä      | 1179/3082 [00:13<00:37, 50.65it/s]

üÜï WAGON #4 at frame 1188


 40%|‚ñà‚ñà‚ñà‚ñâ      | 1218/3082 [00:14<00:28, 64.70it/s]

üÜï WAGON #5 at frame 1218


 40%|‚ñà‚ñà‚ñà‚ñà      | 1248/3082 [00:14<00:32, 56.71it/s]

üÜï WAGON #6 at frame 1248


 41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 1278/3082 [00:15<00:29, 61.68it/s]

üÜï WAGON #7 at frame 1278


 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 1306/3082 [00:16<00:34, 51.52it/s]

üÜï WAGON #8 at frame 1308


 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 1332/3082 [00:16<00:31, 55.02it/s]

üÜï WAGON #9 at frame 1340


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 1362/3082 [00:17<00:31, 54.79it/s]

üÜï WAGON #10 at frame 1370


 45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 1392/3082 [00:17<00:26, 62.68it/s]

üÜï WAGON #11 at frame 1400


 46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 1427/3082 [00:18<00:29, 57.05it/s]

üÜï WAGON #12 at frame 1430


 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 1454/3082 [00:19<00:29, 54.73it/s]

üÜï WAGON #13 at frame 1461


 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 1491/3082 [00:20<00:28, 54.99it/s]

üÜï WAGON #14 at frame 1491


 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 1519/3082 [00:20<00:34, 44.97it/s]

üÜï WAGON #15 at frame 1522


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 1548/3082 [00:21<00:32, 47.51it/s]

üÜï WAGON #16 at frame 1552


 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 1576/3082 [00:22<00:34, 43.80it/s]

üÜï WAGON #17 at frame 1582


 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1611/3082 [00:23<00:30, 48.95it/s]

üÜï WAGON #18 at frame 1612


 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 1640/3082 [00:24<00:30, 46.96it/s]

üÜï WAGON #19 at frame 1642


 54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 1667/3082 [00:25<00:31, 44.86it/s]

üÜï WAGON #20 at frame 1672


 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 1695/3082 [00:25<00:32, 42.43it/s]

üÜï WAGON #21 at frame 1702


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1732/3082 [00:26<00:26, 50.21it/s]

üÜï WAGON #22 at frame 1732


 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 1761/3082 [00:27<00:30, 42.68it/s]

üÜï WAGON #23 at frame 1762


 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 1791/3082 [00:28<00:26, 48.84it/s]

üÜï WAGON #24 at frame 1792


 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1821/3082 [00:29<00:27, 45.91it/s]

üÜï WAGON #25 at frame 1822


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1845/3082 [00:30<00:28, 43.25it/s]

üÜï WAGON #26 at frame 1852


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 1882/3082 [00:31<00:25, 47.98it/s]

üÜï WAGON #27 at frame 1882


 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 1912/3082 [00:31<00:23, 48.92it/s]

üÜï WAGON #28 at frame 1912


 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 1941/3082 [00:32<00:25, 44.13it/s]

üÜï WAGON #29 at frame 1942


 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1970/3082 [00:33<00:23, 47.46it/s]

üÜï WAGON #30 at frame 1972


 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1999/3082 [00:34<00:24, 44.70it/s]

üÜï WAGON #31 at frame 2002


 66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 2032/3082 [00:35<00:21, 49.13it/s]

üÜï WAGON #32 at frame 2032


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 2059/3082 [00:36<00:23, 43.46it/s]

üÜï WAGON #33 at frame 2062


 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 2087/3082 [00:36<00:20, 49.21it/s]

üÜï WAGON #34 at frame 2092


 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 2122/3082 [00:37<00:18, 51.20it/s]

üÜï WAGON #35 at frame 2122


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 2151/3082 [00:38<00:17, 51.78it/s]

üÜï WAGON #36 at frame 2157


 71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 2181/3082 [00:39<00:19, 46.72it/s]

üÜï WAGON #37 at frame 2187


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 2213/3082 [00:39<00:17, 49.01it/s]

üÜï WAGON #38 at frame 2217


 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 2241/3082 [00:40<00:18, 45.82it/s]

üÜï WAGON #39 at frame 2247


 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 2272/3082 [00:41<00:14, 54.52it/s]

üÜï WAGON #40 at frame 2278


 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 2302/3082 [00:42<00:15, 50.09it/s]

üÜï WAGON #41 at frame 2309


 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 2332/3082 [00:42<00:14, 53.33it/s]

üÜï WAGON #42 at frame 2339


 77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 2369/3082 [00:43<00:13, 52.62it/s]

üÜï WAGON #43 at frame 2370


 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 2399/3082 [00:44<00:12, 54.58it/s]

üÜï WAGON #44 at frame 2400


 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 2430/3082 [00:44<00:10, 60.87it/s]

üÜï WAGON #45 at frame 2430


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 2460/3082 [00:45<00:11, 53.79it/s]

üÜï WAGON #46 at frame 2460


 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2489/3082 [00:46<00:10, 58.77it/s]

üÜï WAGON #47 at frame 2490


 82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2518/3082 [00:46<00:10, 56.18it/s]

üÜï WAGON #48 at frame 2520


 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2548/3082 [00:47<00:10, 51.61it/s]

üÜï WAGON #49 at frame 2550


 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2578/3082 [00:48<00:08, 56.64it/s]

üÜï WAGON #50 at frame 2580


 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2606/3082 [00:48<00:09, 49.18it/s]

üÜï WAGON #51 at frame 2610


 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2636/3082 [00:49<00:08, 51.71it/s]

üÜï WAGON #52 at frame 2640


 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 2664/3082 [00:50<00:08, 48.29it/s]

üÜï WAGON #53 at frame 2670


 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 2699/3082 [00:51<00:07, 51.13it/s]

üÜï WAGON #54 at frame 2700


 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 2729/3082 [00:51<00:06, 50.96it/s]

üÜï WAGON #55 at frame 2730


 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 2753/3082 [00:52<00:07, 44.91it/s]

üÜï WAGON #56 at frame 2760


 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 2790/3082 [00:53<00:05, 52.39it/s]

üÜï WAGON #57 at frame 2790


 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 2875/3082 [00:55<00:03, 52.09it/s]

üö´ Train exited scene

‚úÖ TOTAL WAGONS: 57
üìÑ CSV saved to: C:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\data\wagon_results.csv





In [None]:
from pathlib import Path

# =====================================================
# JUPYTER-SAFE PROJECT ROOT RESOLUTION
# =====================================================
NOTEBOOK_DIR = Path.cwd()          # .../AIDTM/notebooks
PROJECT_ROOT = NOTEBOOK_DIR.parent # .../AIDTM

VIDEO_PATH = PROJECT_ROOT / "data" / "raw_videos" / "high_speed_axis_55kmph_night.mp4"
OCR_ONNX   = PROJECT_ROOT / "onnx_models" / "ocr.onnx"

print("üìÇ Notebook dir:", NOTEBOOK_DIR)
print("üìÇ Project root:", PROJECT_ROOT)
print("üé• Video:", VIDEO_PATH)
print("üî† OCR model:", OCR_ONNX)

assert VIDEO_PATH.exists(), f"‚ùå Video not found: {VIDEO_PATH}"
assert OCR_ONNX.exists(), f"‚ùå OCR model not found: {OCR_ONNX}"


üìÇ Project root: c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\notebooks
üé• Video: c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\notebooks\data\raw_videos\high_speed_axis_55kmph_night.mp4
üî† OCR model: c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\notebooks\onnx_models\ocr.onnx


NoSuchFile: [ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\notebooks\onnx_models\ocr.onnx failed:Load model c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\notebooks\onnx_models\ocr.onnx failed. File doesn't exist

In [12]:
# =====================================================
# FULL STABLE WAGON COUNTING + FIXED OCR (ONE CELL)
# JUPYTER SAFE | NO TESSERACT | OCR ONNX
# =====================================================

import cv2
import numpy as np
import csv
from pathlib import Path
from collections import defaultdict, Counter
from tqdm import tqdm
import onnxruntime as ort

# =====================================================
# PATHS (JUPYTER SAFE)
# =====================================================
NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = NOTEBOOK_DIR.parent

VIDEO_PATH = PROJECT_ROOT / "data" / "raw_videos" / "high_speed_axis_55kmph_night.mp4"
OCR_ONNX   = PROJECT_ROOT / "onnx_models" / "ocr.onnx"

print("üìÇ Project root:", PROJECT_ROOT)
print("üé• Video:", VIDEO_PATH)
print("üî† OCR model:", OCR_ONNX)

assert VIDEO_PATH.exists(), "‚ùå Video not found"
assert OCR_ONNX.exists(), "‚ùå OCR ONNX not found"

# =====================================================
# LOAD OCR MODEL
# =====================================================
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
ocr_sess = ort.InferenceSession(str(OCR_ONNX), providers=providers)
ocr_in = ocr_sess.get_inputs()[0].name

ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

# =====================================================
# OCR HELPERS (ROBUST)
# =====================================================
def softmax(x):
    x = x - x.max(axis=-1, keepdims=True)
    e = np.exp(x)
    return e / e.sum(axis=-1, keepdims=True)

def decode_ctc(logits):
    probs = softmax(logits)[0]
    seq = probs.argmax(axis=-1)
    prev = -1
    txt, conf = "", []
    for t, c in enumerate(seq):
        if c != prev and c > 0 and c <= len(ALPHABET):
            txt += ALPHABET[c-1]
            conf.append(probs[t, c])
        prev = c
    return txt, float(np.mean(conf)) if conf else 0.0

def preprocess_big_text(roi):
    """
    Designed specifically for BLUE wagon + WHITE text
    """
    hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

    # suppress blue background
    mask = cv2.inRange(hsv, (90, 50, 50), (140, 255, 255))
    roi[mask > 0] = (0, 0, 0)

    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)

    bin_img = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_MEAN_C,
        cv2.THRESH_BINARY,
        31, 5
    )

    return bin_img

def ocr_from_roi(roi):
    roi = cv2.resize(roi, (128, 32))
    roi = preprocess_big_text(roi)
    inp = (roi / 255.0).astype(np.float32)[None, None]
    logits = ocr_sess.run(None, {ocr_in: inp})[0]
    return decode_ctc(logits)

# =====================================================
# WAGON COUNTING CONFIG (UNCHANGED LOGIC)
# =====================================================
RESIZE_WIDTH = 960
LINE_X_RATIO = 0.5
STRIP_WIDTH = 10
ROI_Y1_RATIO = 0.55
ROI_Y2_RATIO = 0.75
DIFF_THRESHOLD = 18
COOLDOWN_FRAMES = 30
NO_MOTION_FRAMES = 60

# =====================================================
# MAIN
# =====================================================
cap = cv2.VideoCapture(str(VIDEO_PATH))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

wagon_index = 0
cooldown = 0
no_motion = 0
prev_strip = None

wagon_frames = defaultdict(list)
results = []

print("üöÜ Wagon counting started")

for frame_id in tqdm(range(total_frames), desc="Processing video"):
    ret, frame = cap.read()
    if not ret:
        break

    h, w = frame.shape[:2]
    scale = RESIZE_WIDTH / w
    frame = cv2.resize(frame, (RESIZE_WIDTH, int(h * scale)))
    H, W = frame.shape[:2]

    x = int(W * LINE_X_RATIO)
    y1 = int(H * ROI_Y1_RATIO)
    y2 = int(H * ROI_Y2_RATIO)

    strip = frame[y1:y2, x-STRIP_WIDTH:x+STRIP_WIDTH]
    gray = cv2.cvtColor(strip, cv2.COLOR_BGR2GRAY)

    if prev_strip is None:
        prev_strip = gray
        continue

    diff = cv2.absdiff(gray, prev_strip).mean()
    prev_strip = gray

    if diff > DIFF_THRESHOLD:
        no_motion = 0
        if cooldown == 0:
            wagon_index += 1
            cooldown = COOLDOWN_FRAMES
            print(f"üÜï WAGON #{wagon_index} at frame {frame_id}")
    else:
        no_motion += 1

    if cooldown > 0:
        cooldown -= 1

    if no_motion > NO_MOTION_FRAMES and wagon_index > 0:
        print("üö´ Train exited scene")
        break

    # -------------------------------------------------
    # OCR COLLECTION (MULTI-FRAME PER WAGON)
    # -------------------------------------------------
    if wagon_index > 0:
        roi = frame[int(H*0.45):int(H*0.7), int(W*0.2):int(W*0.8)]
        txt, conf = ocr_from_roi(roi)
        if conf > 0.3 and len(txt) >= 3:
            wagon_frames[wagon_index].append(txt)

    # -------------------------------------------------
    # DISPLAY
    # -------------------------------------------------
    cv2.line(frame, (x, 0), (x, H), (0,0,255), 2)
    cv2.putText(frame, f"Wagons: {wagon_index}", (20,40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
    cv2.imshow("Wagon Counter + OCR", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# =====================================================
# FINAL AGGREGATION + CSV
# =====================================================
csv_path = PROJECT_ROOT / "wagon_results.csv"
with open(csv_path, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["wagon_index", "ocr_text"])

    for idx in range(1, wagon_index+1):
        texts = wagon_frames[idx]
        if texts:
            text = Counter(texts).most_common(1)[0][0]
        else:
            text = "<NO_TEXT>"
        writer.writerow([idx, text])

print("\n===================================")
print(f"‚úÖ TOTAL WAGONS: {wagon_index}")
print(f"üìÑ CSV saved to: {csv_path}")
print("===================================")


üìÇ Project root: c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM
üé• Video: c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\data\raw_videos\high_speed_axis_55kmph_night.mp4
üî† OCR model: c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\onnx_models\ocr.onnx
üöÜ Wagon counting started


Processing video:  36%|‚ñà‚ñà‚ñà‚ñå      | 1097/3082 [00:13<00:17, 111.00it/s]

üÜï WAGON #1 at frame 1089


Processing video:  37%|‚ñà‚ñà‚ñà‚ñã      | 1130/3082 [00:14<00:24, 79.36it/s] 

üÜï WAGON #2 at frame 1119


Processing video:  37%|‚ñà‚ñà‚ñà‚ñã      | 1155/3082 [00:14<00:29, 66.20it/s]

üÜï WAGON #3 at frame 1149


Processing video:  39%|‚ñà‚ñà‚ñà‚ñä      | 1192/3082 [00:15<00:27, 69.09it/s]

üÜï WAGON #4 at frame 1179


Processing video:  40%|‚ñà‚ñà‚ñà‚ñâ      | 1220/3082 [00:15<00:30, 62.05it/s]

üÜï WAGON #5 at frame 1210


Processing video:  41%|‚ñà‚ñà‚ñà‚ñà      | 1249/3082 [00:16<00:27, 65.68it/s]

üÜï WAGON #6 at frame 1240


Processing video:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 1279/3082 [00:16<00:26, 67.95it/s]

üÜï WAGON #7 at frame 1270


Processing video:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 1306/3082 [00:17<00:32, 55.47it/s]

üÜï WAGON #8 at frame 1300


Processing video:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 1338/3082 [00:17<00:28, 60.19it/s]

üÜï WAGON #9 at frame 1330


Processing video:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 1372/3082 [00:18<00:28, 60.04it/s]

üÜï WAGON #10 at frame 1360


Processing video:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 1401/3082 [00:18<00:26, 64.01it/s]

üÜï WAGON #11 at frame 1390


Processing video:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 1430/3082 [00:19<00:25, 64.80it/s]

üÜï WAGON #12 at frame 1420


Processing video:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 1458/3082 [00:19<00:27, 59.32it/s]

üÜï WAGON #13 at frame 1450


Processing video:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 1486/3082 [00:20<00:25, 63.23it/s]

üÜï WAGON #14 at frame 1480


Processing video:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 1528/3082 [00:20<00:26, 58.88it/s]

üÜï WAGON #15 at frame 1522


Processing video:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 1561/3082 [00:21<00:25, 59.80it/s]

üÜï WAGON #16 at frame 1552


Processing video:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1590/3082 [00:21<00:22, 65.51it/s]

üÜï WAGON #17 at frame 1582


Processing video:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 1619/3082 [00:22<00:22, 66.42it/s]

üÜï WAGON #18 at frame 1612


Processing video:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 1650/3082 [00:22<00:20, 68.90it/s]

üÜï WAGON #19 at frame 1642


Processing video:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 1680/3082 [00:23<00:20, 67.25it/s]

üÜï WAGON #20 at frame 1672


Processing video:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1710/3082 [00:23<00:20, 65.80it/s]

üÜï WAGON #21 at frame 1702


Processing video:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 1746/3082 [00:24<00:19, 68.08it/s]

üÜï WAGON #22 at frame 1732


Processing video:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 1774/3082 [00:24<00:20, 65.34it/s]

üÜï WAGON #23 at frame 1762


Processing video:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 1803/3082 [00:25<00:19, 64.37it/s]

üÜï WAGON #24 at frame 1792


Processing video:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1832/3082 [00:25<00:19, 64.75it/s]

üÜï WAGON #25 at frame 1822


Processing video:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 1860/3082 [00:25<00:19, 63.85it/s]

üÜï WAGON #26 at frame 1852


Processing video:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 1896/3082 [00:26<00:17, 67.05it/s]

üÜï WAGON #27 at frame 1882


Processing video:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 1925/3082 [00:26<00:17, 65.41it/s]

üÜï WAGON #28 at frame 1912


Processing video:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 1953/3082 [00:27<00:18, 61.86it/s]

üÜï WAGON #29 at frame 1942


Processing video:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1981/3082 [00:27<00:17, 63.29it/s]

üÜï WAGON #30 at frame 1972


Processing video:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 2010/3082 [00:28<00:16, 64.92it/s]

üÜï WAGON #31 at frame 2002


Processing video:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 2038/3082 [00:28<00:16, 63.52it/s]

üÜï WAGON #32 at frame 2032


Processing video:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 2069/3082 [00:29<00:14, 69.05it/s]

üÜï WAGON #33 at frame 2062


Processing video:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 2104/3082 [00:29<00:14, 65.98it/s]

üÜï WAGON #34 at frame 2095


Processing video:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 2133/3082 [00:30<00:14, 66.07it/s]

üÜï WAGON #35 at frame 2125


Processing video:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 2162/3082 [00:30<00:14, 64.53it/s]

üÜï WAGON #36 at frame 2155


Processing video:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 2198/3082 [00:31<00:13, 65.47it/s]

üÜï WAGON #37 at frame 2185


Processing video:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 2226/3082 [00:31<00:13, 62.76it/s]

üÜï WAGON #38 at frame 2215


Processing video:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 2255/3082 [00:32<00:12, 65.67it/s]

üÜï WAGON #39 at frame 2245


Processing video:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 2284/3082 [00:32<00:12, 65.03it/s]

üÜï WAGON #40 at frame 2275


Processing video:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 2313/3082 [00:32<00:11, 69.47it/s]

üÜï WAGON #41 at frame 2305


Processing video:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 2343/3082 [00:33<00:11, 66.78it/s]

üÜï WAGON #42 at frame 2335


Processing video:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 2379/3082 [00:33<00:10, 67.38it/s]

üÜï WAGON #43 at frame 2365


Processing video:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 2408/3082 [00:34<00:10, 66.76it/s]

üÜï WAGON #44 at frame 2395


Processing video:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 2437/3082 [00:34<00:10, 64.48it/s]

üÜï WAGON #45 at frame 2425


Processing video:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2466/3082 [00:35<00:09, 64.29it/s]

üÜï WAGON #46 at frame 2455


Processing video:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2494/3082 [00:35<00:09, 61.76it/s]

üÜï WAGON #47 at frame 2487


Processing video:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2529/3082 [00:36<00:08, 61.79it/s]

üÜï WAGON #48 at frame 2517


Processing video:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2558/3082 [00:36<00:08, 65.34it/s]

üÜï WAGON #49 at frame 2550


Processing video:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2587/3082 [00:37<00:07, 65.21it/s]

üÜï WAGON #50 at frame 2580


Processing video:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2622/3082 [00:37<00:07, 62.42it/s]

üÜï WAGON #51 at frame 2610


Processing video:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2650/3082 [00:38<00:06, 62.42it/s]

üÜï WAGON #52 at frame 2640


Processing video:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 2678/3082 [00:38<00:06, 63.42it/s]

üÜï WAGON #53 at frame 2670


Processing video:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 2707/3082 [00:39<00:05, 63.66it/s]

üÜï WAGON #54 at frame 2700


Processing video:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 2742/3082 [00:39<00:05, 64.01it/s]

üÜï WAGON #55 at frame 2732


Processing video:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 2771/3082 [00:40<00:05, 61.56it/s]

üÜï WAGON #56 at frame 2762


Processing video:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 2801/3082 [00:40<00:04, 66.60it/s]

üÜï WAGON #57 at frame 2792


Processing video:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 2830/3082 [00:40<00:03, 66.68it/s]

üÜï WAGON #58 at frame 2822


Processing video:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 2888/3082 [00:41<00:02, 69.09it/s]

üö´ Train exited scene

‚úÖ TOTAL WAGONS: 58
üìÑ CSV saved to: c:\Users\Manas Mehta\Desktop\PROJECTS\AIDTM\wagon_results.csv





In [13]:
%pip install easyocr


Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting opencv-python-headless (from easyocr)
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.7-cp313-cp313-win_amd64.whl.metadata (5.0 kB)
Collecting Shapely (from easyocr)
  Downloading shapely-2.1.2-cp313-cp313-win_amd64.whl.metadata (7.1 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.4.0-cp313-cp313-win_amd64.whl.metadata (8.8 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.13.0-py3-none-win_amd64.whl.metadata (5.1 kB)
Downloading easyocr-1.7.2-py3-none-any.whl (2.9 MB)
   ---------------------------------------- 0.0/2.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.9 MB ? eta -:--:--
   ------- -------------------------------- 0.5/2.9 MB 1.4 MB/s eta 0:00:02
   ---------- ----------------------------- 0.8/2.9 MB 1.8 MB/s eta 0:00:02
   -------

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'c:\\Users\\Manas Mehta\\Desktop\\PROJECTS\\gpuenv\\Lib\\site-packages\\cv2\\cv2.pyd'
Check the permissions.


[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
import easyocr
reader = easyocr.Reader(['en','hi'])
result = reader.readtext(roi)


ModuleNotFoundError: No module named 'easyocr'