In [15]:
import itertools
import enum
import numpy as np
import time
import cv2
import imutils
from imutils.video import FPS
import pandas as pd

In [16]:
class Direction(enum.Enum):
  ONBOARD = 1
  OFFBOARD= -1
  NAN = 0

In [17]:
class detection_tracker:
  
  export_data = {}

  def __init__(self, capture, frame_num, frame, bounding_box, label):
    self.capture = capture
    self.first_frame = frame_num
    self.tracker = cv2.TrackerMIL_create()
    self.tracker.init(frame, bounding_box)
    self.label = label
    print('Made a tracker for label' + label + '.')
  
  def tracker_end(self):
    self.duration = self.last_frame - self.first_frame
    # build export data
    start_timestamp = TimestampFromFrame.stampFromFrame(self.first_frame, capture=self.capture)
    last_timestamp = TimestampFromFrame.stampFromFrame(self.last_frame, capture=self.capture)
    nested_dict = {(start_timestamp, last_timestamp): self.label, 'direction': self.direction}

  def update_tracker(self, frame, frame_num):
    okay, bounding_box = self.tracker.update(frame)
    if okay:
      self.last_frame = frame_num
      if self.boundingBox:
        self.boundingBox = BBox(bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3])
      return True
    else:
      self.tracker_end()
      return False


### Austin's Timestamp Class

In [18]:
class TimestampFromFrame:
  
    # Function to get Timestamp from Frame input
    def stampFromFrame(frame, capture):
        time_stamp = frame / capture.get(cv2.CAP_PROP_FPS)
        return time_stamp

### John's data export class FisheriesData

In [19]:
class FisheriesData:
    def __init__(self):
        self.species = "species"
        self.timeStamp = "time_stamp"
        self.direction = "direction"
        self.dataFrame = []
        self.speciesList = []
        self.timeStampList = []
        self.directionList = []
    
    def makeDF(self):
        self.dataFrame = pd.DataFrame({self.species:self.speciesList,self.timeStamp:self.timeStampList,self.direction:self.directionList})
    
    def addData(self, species, timeStamp, status):
        self.speciesList.append(species)
        self.timeStampList.append(timeStamp)
        self.directionList.append(status)

    def writeCSV(self):
        self.dataFrame.to_csv('output.csv', sep='\t', encoding='utf-8')

    def writeExcel(self):
        self.dataFrame.to_excel('output.xlsx', sheet_name='sheet1', index=False)

    def writeXML(self):
        self.dataFrame.to_xml('output.xml')

    def writeJSON(self):
        self.dataFrame.to_json('output.json', orient='records', indent=2)

### Bounding Box Class (tracker alt)

In [22]:
class BBox:
  x1 = 0
  y1 = 0
  x2 = 0
  y2 = 0
  label = ""
  
  # Consturctor for BBox taking x1,x2,y1,y2
  # def __init__(self, x1, x2, y1, y2, label):
  #   if x1>x2 or y1>y2:
  #     raise ValueError("Coordinates are invalid")
  #   if label == "" or not label:
  #     raise ValueError("Please include label")
  #   self.x1, self.y1, self.x2, self.y2 = x1, y1, x2, y2
  #   self.label = label

  # Constructor for BBox taking x,y w,h
  # Use for Yolo detections
  def __init__(self, x:int, y:int, w:int, h:int, label:str):
    if x>(x+w) or y>(y+h):
      raise ValueError("Coordinates are invalid")
    if label == "" or not label:
      raise ValueError("Please include label")
    self.x1, self.y1, self.x2, self.y2 = x, y, (x+w), (y+h)
    self.label = label

  # Takes a BBox class instance to compare the overlap area 
  # and returns an value for overlap area
  def intersection_area(self, bbox: object) -> float:
    if type(bbox) != self:
      raise TypeError("BBox should be an instance of a BBox Class")
    dx = min(self.x1, bbox.x1) - max(self.x2, bbox.x2)
    dy = min(self.y1, bbox.y1) - max(self.y2, bbox.y2)
    if (dx>=0) and (dy>=0):
      if self.label == bbox.label:
        return dx*dy
    return -1

  # Takes a BBox and compares x positions 
  def direction_of_motion(self, bbox: object):
    if type(bbox) != self:
      raise TypeError("BBox should be an instance of a BBox Class")
    dx = self.x1 - bbox.x1
    if dx > 0:
      return Direction.ONBOARD
    elif dx < 0:
      return Direction.OFFBOARD
    elif dx == 0:
      return Direction.NAN

# Yolo Detector modified from object_detector.ipynb

In [21]:
INPUT_FILE='fish.avi'
OUTPUT_FILE='output2.mp4'
LABELS_FILE='Model/obj.names'
CONFIG_FILE='Model/yolov4-obj2.cfg'
WEIGHTS_FILE='Model/yolov4-obj2_best.weights'
CONFIDENCE_THRESHOLD=0.3

H=None
W=None

# capture input video
video_capture = cv2.VideoCapture(INPUT_FILE)

# get input video's frame size
frame_width = int(video_capture.get(3))
frame_height = int(video_capture.get(4))
frame_size = (frame_width,frame_height)

# get input video's fps
input_fps = video_capture.get(cv2.CAP_PROP_FPS)

fps = FPS().start()

# fourcc = cv2.VideoWriter_fourcc(*"MJPG") # for avi
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # for mp4
writer = cv2.VideoWriter(OUTPUT_FILE, fourcc, input_fps, frame_size, True)

# make Labels with labels_file
LABELS = open(LABELS_FILE).read().strip().split("\n")

# set random color for labels and bounding boxes
np.random.seed(4)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

# load the YOLO network model with config and weights file
net = cv2.dnn.readNetFromDarknet(CONFIG_FILE, WEIGHTS_FILE)

# determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]
cnt =0

# iterate through video frames
while True:
	cnt+=1
	print ("Frame number", cnt)
	ok, image = video_capture.read()
	if not ok:
		break
	# transform image into a blob
	blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
	net.setInput(blob)
	if W is None or H is None:
		(H, W) = image.shape[:2]
	layerOutputs = net.forward(ln)

	# initialize our lists of detected bounding boxes, confidences, and
	# class IDs, respectively
	boxes = []
	confidences = []
	classIDs = []

	# loop over each of the layer outputs
	for output in layerOutputs:
		# loop over each of the detections
		for detection in output:
			# extract the class ID and confidence (i.e., probability) of
			# the current object detection
			scores = detection[5:]
			classID = np.argmax(scores)
			confidence = scores[classID]

			# filter out weak predictions by ensuring the detected
			# probability is greater than the minimum probability
			if confidence > CONFIDENCE_THRESHOLD:
				# scale the bounding box coordinates back relative to the
				# size of the image, keeping in mind that YOLO actually
				# returns the center (x, y)-coordinates of the bounding
				# box followed by the boxes' width and height
				box = detection[0:4] * np.array([W, H, W, H])
				(centerX, centerY, width, height) = box.astype("int")

				# use the center (x, y)-coordinates to derive the top and
				# and left corner of the bounding box
				x = int(centerX - (width / 2))
				y = int(centerY - (height / 2))

				# Add a good confidence detection to the detection tracker
				detection_tracker(video_capture, cnt, image, (x, y, width, height), LABELS[classID])

				# update our list of bounding box coordinates, confidences,
				# and class IDs
				boxes.append([x, y, int(width), int(height)])
				confidences.append(float(confidence))
				classIDs.append(classID)

	# apply non-maxima suppression to suppress weak, overlapping bounding
	# boxes
	idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD,
		CONFIDENCE_THRESHOLD)

	# ensure at least one detection exists
	if len(idxs) > 0:
		# loop over the indexes we are keeping
		for i in idxs.flatten():
			# extract the bounding box coordinates
			(x, y) = (boxes[i][0], boxes[i][1])
			(w, h) = (boxes[i][2], boxes[i][3])

			color = [int(c) for c in COLORS[classIDs[i]]]

			cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
			text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
			cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
				0.5, color, 2)

	# show the output image
	cv2.imshow("output", image)
	writer.write(cv2.resize(image,frame_size))
	fps.update()
	key = cv2.waitKey(1) & 0xFF
	if key == ord("q"):
		break

fps.stop()

print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

# do a bit of cleanup
cv2.destroyAllWindows()

# release the file pointers
print("[INFO] cleaning up...")
writer.release()
video_capture.release()

Frame number 1


error: OpenCV(4.5.5) D:\a\opencv-python\opencv-python\opencv\modules\highgui\src\window.cpp:1268: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'
