In [None]:
import os, cv2, torch, numpy as np
from ultralytics import YOLO

#VERY FAST GEAVY RUN INFERENCE
# ---- config ----
home     = os.path.expanduser("~")
weights  = f"{home}/models/jakes-loped/jakes-finder-mk1/1/weights.pt"
RAIL_ID  = 9
ALPHA    = 0.4
IMG_SIZE = 512     # try 448–640 based on your accuracy/speed needs
CONF     = 0.30
IOU      = 0.45

# ---- device/precision ----
if torch.cuda.is_available():
    device, half = 0, True
elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
    device, half = "mps", False
else:
    device, half = "cpu", False

# ---- load once, fuse, warmup ----
model = YOLO(weights)
try: model.fuse()
except Exception: pass

dummy = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8)
_ = model.predict(dummy, task="segment", imgsz=IMG_SIZE, device=device,
                  conf=CONF, iou=IOU, classes=[RAIL_ID],
                  verbose=False, half=half)

# ---- fast per-frame function ----
def process_frame(img_bgr):
    res = model.predict(img_bgr, task="segment", imgsz=IMG_SIZE, device=device,
                        conf=CONF, iou=IOU, classes=[RAIL_ID],
                        max_det=20, verbose=False, half=half)[0]

    if res.masks is None or res.boxes is None or len(res.boxes) == 0:
        return img_bgr  # no rails

    # Union rail masks
    m = res.masks.data                    # [N, h_m, w_m] tensor (model mask size)
    union = (m.sum(dim=0) > 0).float().cpu().numpy()  # [h_m, w_m] in {0,1}

    # --- ensure mask matches frame size ---
    H, W = img_bgr.shape[:2]
    if union.shape != (H, W):
        union = cv2.resize(union, (W, H), interpolation=cv2.INTER_NEAREST)
    mask = union.astype(bool)

    # --- fast masked blend ---
    overlay = img_bgr.copy()
    overlay[mask] = (0, 0, 255)  # BGR rail color
    out = cv2.addWeighted(overlay, ALPHA, img_bgr, 1 - ALPHA, 0)
    return out


# ---- example: directory poll (replace with your frame source) ----
if __name__ == "__main__":
    import glob, time
    image_dir = f"{home}/SubwaySurfers/train_screenshots"
    seen = set()
    while True:
        for p in sorted(glob.glob(os.path.join(image_dir, "frame_*.jpg"))):
            if p in seen: continue
            img = cv2.imread(p)
            if img is None: continue
            out = process_frame(img)
            cv2.imshow("rails", out)
            cv2.waitKey(1)
            seen.add(p)
        time.sleep(0.02)


In [None]:
import os, cv2, torch, numpy as np, time
from ultralytics import YOLO

#INTENTIONAL SLOWED AND PRINT RATE

# ---- config ----
home     = os.path.expanduser("~")
weights  = f"{home}/models/jakes-loped/jakes-finder-mk1/1/weights.pt"
RAIL_ID  = 9
ALPHA    = 0.4
IMG_SIZE = 512
CONF     = 0.30
IOU      = 0.45

# ---- device/precision ----
if torch.cuda.is_available():
    device, half = 0, True
elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
    device, half = "mps", False
else:
    device, half = "cpu", False

def _sync():
    # ensure accurate timings (GPU is async)
    try:
        if device == 0 and torch.cuda.is_available():
            torch.cuda.synchronize(0)
        elif device == "mps" and hasattr(torch, "mps") and hasattr(torch.mps, "synchronize"):
            torch.mps.synchronize()
    except Exception:
        pass

# ---- load once, fuse, warmup ----
model = YOLO(weights)
try: model.fuse()
except Exception: pass

dummy = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8)
_ = model.predict(dummy, task="segment", imgsz=IMG_SIZE, device=device,
                  conf=CONF, iou=IOU, classes=[RAIL_ID],
                  verbose=False, half=half)

# ---- fast per-frame function with timings ----
def process_frame(img_bgr):
    t0 = time.perf_counter(); _sync()
    res = model.predict(img_bgr, task="segment", imgsz=IMG_SIZE, device=device,
                        conf=CONF, iou=IOU, classes=[RAIL_ID],
                        max_det=20, verbose=False, half=half)[0]
    _sync(); t1 = time.perf_counter()
    infer_ms = (t1 - t0) * 1000.0

    if res.masks is None or res.boxes is None or len(res.boxes) == 0:
        return img_bgr, infer_ms, 0.0, infer_ms

    # Union rail masks
    m = res.masks.data                    # [N, h_m, w_m]
    union = (m.sum(dim=0) > 0).float().cpu().numpy()

    H, W = img_bgr.shape[:2]
    if union.shape != (H, W):
        union = cv2.resize(union, (W, H), interpolation=cv2.INTER_NEAREST)
    mask = union.astype(bool)

    # Blend
    t2 = time.perf_counter()
    overlay = img_bgr.copy()
    overlay[mask] = (0, 0, 255)  # BGR
    out = cv2.addWeighted(overlay, ALPHA, img_bgr, 1 - ALPHA, 0)
    t3 = time.perf_counter()
    post_ms = (t3 - t2) * 1000.0
    total_ms = (t3 - t0) * 1000.0
    return out, infer_ms, post_ms, total_ms

# ---- example: directory poll (replace with your frame source) ----
if __name__ == "__main__":
    import glob
    image_dir = f"{home}/SubwaySurfers/train_screenshots"
    seen = set()
    while True:
        updated = False
        for p in sorted(glob.glob(os.path.join(image_dir, "frame_*.jpg"))):
            time.sleep(1.0)  # small poll delay
            if p in seen: continue
            img = cv2.imread(p)
            if img is None: continue
            out, infer_ms, post_ms, total_ms = process_frame(img)
            fps = 1000.0 / total_ms if total_ms > 0 else float('inf')
            print(f"{os.path.basename(p)}  infer={infer_ms:.1f}ms  post={post_ms:.1f}ms  total={total_ms:.1f}ms  (~{fps:.1f} FPS)")
            cv2.imshow("rails", out)
            cv2.waitKey(1)
            seen.add(p)
            updated = True
        if not updated:
            time.sleep(0.02)  # small poll delay
