In [16]:
import cv2
import numpy as np
import os

In [17]:
yoloCfg = "/home/rko8001/Downloads/BTP/btp-models/object-detection/yolov3-tiny.cfg"
yoloWeights = "/home/rko8001/Downloads/BTP/btp-models/object-detection/yolov3-tiny.weights"
coco_names = "/home/rko8001/Downloads/BTP/btp-models/object-detection/coco.names"
'''
    For Object detection, we have taken a threshold of 5 frames. 
    If the object is detected once, then next 'x' consecutive frames will not be predicted
    due to multiple predictions on same apple.
'''
framesToSkip = 5

''' 
    Width Threshold, if apple is present in between this threshold
    then it will be marked as detected
'''
widthBegin, widthEnd = 300, 320


'''
    While object detection, we have taken a confidence threshold of 0.4
    with this confidence, we are able to detect the object with good accuracy and speed
'''
confidenceInterval = 0.4

In [18]:
net = cv2.dnn.readNet(yoloWeights, yoloCfg)

classes = []
with open(coco_names, "r") as f:
    classes = [line.strip() for line in f.readlines()]

layer_names = net.getUnconnectedOutLayersNames()

In [19]:
def extractDimensions(detection, frame):
    height, width, _ = frame.shape
    center_x = int(detection[0] * width)
    center_y = int(detection[1] * height)
    w = int(detection[2] * width)
    h = int(detection[3] * height)
    x = int(center_x - w / 2)
    y = int(center_y - h / 2)
    return x, y, w, h, center_x, center_y

In [20]:
def objectDetection(frame): 
    global confidenceInterval, widthBegin, widthEnd

    inMiddle = False
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(layer_names)

    # Process the outputs and draw bounding boxes
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence >= confidenceInterval:  # Adjust the confidence threshold as needed
                x, y, w, h, center_x, center_y = extractDimensions(detection, frame)
                cv2.rectangle(frame, (x, y), (x + w, y+h), (0, 255, 0), 2)

                if (center_x >= widthBegin and center_x <= widthEnd): 
                    inMiddle = True
                break

    return frame, inMiddle

In [21]:
def display_camera(camera):
    global framesToSkip
    sortCategory, prediction, previousFrame, skipFrame = "color", None, False, 0

    while True:
        success, frame = camera.read()
        previousFrame = frame
        if not success:
            break

        newFrame, objectDetected = objectDetection(frame)

        if skipFrame == 0:
            if objectDetected == True:
                # do the prediction
                skipFrame += 1
        else: 
            skipFrame = (skipFrame + 1) % (framesToSkip + 1 )

        cv2.imshow("Object Detection", newFrame)

    camera.release()
    cv2.destroyAllWindows()


In [22]:
def display_camera_analysis(video_file, output_file):
    global framesToSkip
    sortCategory, prediction, previousFrame, skipFrame = "color", None, False, 0

    # Open the video file
    video = cv2.VideoCapture(video_file)

    # Get video properties
    fps = int(video.get(cv2.CAP_PROP_FPS))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print(f"Specs of Video =  FPS: {fps}, Width: {width}, Height: {height}")
    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))

    while True:
        success, frame = video.read()
        previousFrame = frame
        if not success:
            break

        newFrame, objectDetected = objectDetection(frame)

        if skipFrame == 0:
            if objectDetected:
                # do the prediction
                skipFrame += 1
        else:
            skipFrame = (skipFrame + 1) % (framesToSkip + 1)

        out.write(newFrame)
        # cv2.imshow("Object Detection", newFrame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()


In [23]:
def checkFrameSkipLogic(framesOutput):
    global framesToSkip
    skipFrame = 0
    for i in range(len(framesOutput)):
        objectDetected = framesOutput[i]

        if skipFrame == 0:
            if objectDetected == True:
                print("Prediction at Frame: ", i)
                skipFrame += 1
        else: 
            skipFrame = (skipFrame + 1) % (framesToSkip + 1 )
        

In [24]:
def logicCheck():
    framesOutput = [True, False, True, False, True, False, True, True, False, False, True, True, True]
    checkFrameSkipLogic(framesOutput)

In [25]:
def main():
    camera = cv2.VideoCapture(2)
    # Example usage
    input_video = '../videos/6sec.mp4'
    video_name = os.path.basename(input_video)    
    output_video = f'./output/{video_name}.avi'
    display_camera_analysis(input_video, output_video)

In [26]:
if __name__ == "__main__":
    main()
    # logicCheck()

[ WARN:0@111.003] global cap_v4l.cpp:997 open VIDEOIO(V4L2:/dev/video2): can't open camera by index
[ERROR:0@111.119] global obsensor_uvc_stream_channel.cpp:159 getStreamChannelGroup Camera index out of range


Specs of Video =  FPS: 30, Width: 1280, Height: 720
