In [None]:
import cv2
import requests

# For Android IP Webcam
# import numpy as np
# from requests.auth import HTTPBasicAuth

from dotenv import load_dotenv
import os
load_dotenv()

from ultralytics import YOLO
import torch
import numpy as np

from threading import Timer
import time
import datetime

import base64

user = os.getenv('USER')
password = os.getenv('PASSWORD')
url = os.getenv('URL')

ntfy_user=os.getenv('NTFY_USER')
ntfy_pass=os.getenv('NTFY_PASS')

proj_path = os.getenv('PROJ_PATH')
img_path = os.getenv('IMG_SAVE_PATH')

if img_path is None or img_path == "":
  img_path=proj_path+"/saved_img"

host = os.getenv('HOSTNAME')
topic = os.getenv('TOPIC')

model = YOLO(os.getenv('MODEL'))

suspend = os.getenv('SUSPEND')

os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;0"

In [None]:
import cv2
import threading

class RTSPStream:
  def __init__(self, rtsp_url):
    self.rtsp_url = rtsp_url
    self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
    self.frame = None
    self.lock = threading.Lock()
    self.running = True
    threading.Thread(target=self.update, daemon=True).start()

  def update(self):
    while self.running:
      ret, frame = self.cap.read()
      if ret:
        with self.lock:
          self.frame = frame

  def get_frame(self):
    with self.lock:
      return self.frame.copy() if self.frame is not None else None

  def stop(self):
    self.running = False
    self.cap.release()

In [None]:
def _extract_model_prediction(model, img, device) -> dict:
  result = model(img, device=device)[0]
  max_conf = 0
  for conf, cs in zip(result.boxes.conf, result.boxes.cls):
    if conf >= 0.3 and result.names[int(cs)] == 'cat' and (conf >= max_conf):
      max_conf = conf
  
  msg_dict = None
  if max_conf >= 0.8:
    msg_dict = {
      "pr": "default",
      "title": "Cat Detected!",
      "msg": f"Found cat at conf lv.: {conf}",
      # "img": img,
      "tags": "tada"
    }
  elif max_conf >= 0.3:
    msg_dict = {
      "pr": "low",
      "title": "Cat Detected! Probably...",
      "msg": f"Found cat at conf lv.: {conf}. This could be wrong.",
      # "img": img,
      "tags": "tada"
    }
  else:
    msg_dict = None

  return msg_dict

In [None]:
def _push_ntfy(
    host:str = None,
    topic:str = None,
    msg_dict:dict= None,
    img_path = None
    ):
  auth = base64.b64encode((ntfy_user+":"+ntfy_pass).encode('UTF-8'))

  requests.post(
    f"https://{host}/{topic}",               
    data=msg_dict['msg'].encode(encoding='utf-8'),
    headers={
      "Authorization": auth,
      "Title": msg_dict['title'],
      "Priority": msg_dict['pr'],
      "Tags": msg_dict['tags']
    }
  )

  if img_path is not None:
    data = open(img_path, "rb")
    filename = img_path.split('/')[-1]
    requests.put(
      f"https://{host}/{topic}",
      data=data,
      headers={"Filename": filename}
    )

In [None]:
def _extract_info_diff(prev_frame, frame):
  info_KB = 0
  if prev_frame is not None and frame is not None:
    frame = cv2.fastNlMeansDenoisingColored(frame)
    prev_frame = cv2.fastNlMeansDenoisingColored(prev_frame)
    fram_diff = frame - prev_frame
    
    info_bits = torch.tensor(fram_diff).abs().log2()
    info_bits = (info_bits.nan_to_num() * ~torch.isneginf(info_bits)) # in bits
    info_KB = info_bits.sum()/(8*1024)
  return info_KB, fram_diff

In [None]:
stream = RTSPStream(url)

det_thres = 80 # info difference in KB
msg_activation = 500
msg_act_step = 20
msg_act_thres = 1000
device = "mps"

while True:
  try:
    try:
      if frame is not None:
        prev_frame = frame
        fram_diff = np.empty(frame.shape)
      else:
        prev_frame = None
        fram_diff = None
    except NameError:
      prev_frame = None

    frame = stream.get_frame()
    filename = f"{img_path}/{datetime.datetime.now().strftime('%c')}.jpg"

    info_KB, fram_diff = _extract_info_diff(prev_frame=prev_frame, frame=frame)
    print(f"diff info_KB: {info_KB:.2f}KB")

      # if info > fram_diff.shape[0]*fram_diff.shape[1]*fram_diff.shape[2]*det_thres:
    if info_KB > det_thres: # difference bigger than 80KB
      if fram_diff is not None:
        cv2.imshow(winname="diff", mat=fram_diff)
      
      model_activation = 100
      model_act_step = 10
      # put model prediction process here if desired.

      while model_activation > 0 and msg_activation >= 0:
        msg_dict = _extract_model_prediction(
          model = model,
          img = frame,
          device = device
        )

        if msg_dict is not None:
          msg_activation += msg_act_step
          model_activation += model_act_step
        elif msg_activation >= 0:
          msg_activation -= (msg_act_step/10)
          model_activation -= (model_act_step/2)

        if msg_activation >= msg_act_thres:
          cv2.imwrite(filename, frame)
          _push_ntfy(
            host = host, 
            topic = topic,
            msg_dict = msg_dict,
            ntfy_user = ntfy_user,
            ntfy_pass = ntfy_pass,
            img_path = filename
          )
          
          time.sleep(suspend)
        
      if msg_activation != 500:
        msg_activation = 500

  except KeyboardInterrupt:
    break

cv2.destroyAllWindows()
cv2.waitKey(1)
stream.stop()