In [2]:
import cv2
import numpy as np
import os

In [3]:
yoloCfg = "/home/rko8001/Downloads/BTP/btp-models/object-detection/yolov3-tiny.cfg"
yoloWeights = "/home/rko8001/Downloads/BTP/btp-models/object-detection/yolov3-tiny.weights"
coco_names = "/home/rko8001/Downloads/BTP/btp-models/object-detection/coco.names"
'''
    For Object detection, we have taken a threshold of 5 frames. 
    If the object is detected once, then next 'x' consecutive frames will not be predicted
    due to multiple predictions on same apple.
'''
framesToSkip = 5

''' 
    Width Threshold, if apple is present in between this threshold
    then it will be marked as detected
'''
widthBegin, widthEnd = 300, 320


'''
    While object detection, we have taken a confidence threshold of 0.4
    with this confidence, we are able to detect the object with good accuracy and speed
'''
confidenceInterval = 0.4

In [4]:
net = cv2.dnn.readNet(yoloWeights, yoloCfg)

classes = []
with open(coco_names, "r") as f:
    classes = [line.strip() for line in f.readlines()]

layer_names = net.getUnconnectedOutLayersNames()

In [5]:
def extractDimensions(detection, frame):
    height, width, _ = frame.shape
    center_x = int(detection[0] * width)
    center_y = int(detection[1] * height)
    w = int(detection[2] * width)
    h = int(detection[3] * height)
    x = int(center_x - w / 2)
    y = int(center_y - h / 2)
    return x, y, w, h, center_x, center_y

In [6]:
def objectDetection(frame): 
    global confidenceInterval, widthBegin, widthEnd

    inMiddle = False
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(layer_names)

    # Process the outputs and draw bounding boxes
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence >= confidenceInterval:  # Adjust the confidence threshold as needed
                x, y, w, h, center_x, _= extractDimensions(detection, frame)
                cv2.rectangle(frame, (x, y), (x + w, y+h), (0, 255, 0), 2)

                if (center_x >= widthBegin and center_x <= widthEnd): 
                    inMiddle = True
                break

    return frame, inMiddle

In [7]:
def objectDetection2(frame): 
    global confidenceInterval, widthBegin, widthEnd
    class_ids = []
    confidences = []
    boxes = [] 
    inMiddle = False
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(layer_names)

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and class_id == classes.index('apple'):
                x, y, w, h, center_x, center_y = extractDimensions(detection, frame)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    # Apply non-max suppression
    
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Draw bounding boxes
    font = cv2.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = f'Apple {confidences[i]:.2f}'
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(frame, label, (x, y + 30), font, 2, (0, 255, 0), 2)
    
    return frame, inMiddle

In [8]:
def display_camera(camera):
    global framesToSkip
    sortCategory, prediction, previousFrame, skipFrame = "color", None, False, 0

    while True:
        success, frame = camera.read()
        previousFrame = frame
        if not success:
            break

        newFrame, objectDetected = objectDetection(frame)
        
        if skipFrame == 0:
            if objectDetected == True:
                # do the prediction
                print("objectDetected and predicted")
                skipFrame += 1
        else: 
            if(objectDetected == True):
                print("objectDetected but not predicted")
            skipFrame = (skipFrame + 1) % (framesToSkip + 1 )

        cv2.imshow("Object Detection", newFrame)

    camera.release()
    cv2.destroyAllWindows()


In [15]:
def display_camera_analysis(video_file, output_file):
    global framesToSkip
    sortCategory, prediction, previousFrame, skipFrame = "color", None, False, 0

    # predictionTime 
    predictionTime = None

    # Open the video file
    video = cv2.VideoCapture(video_file)

    # Get video properties
    fps = int(video.get(cv2.CAP_PROP_FPS))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print(f"Specs of Video =  FPS: {fps}, Width: {width}, Height: {height}")
    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))

    while True:
        timeBegin = cv2.getTickCount()
        success, frame = video.read()
        previousFrame = frame
        if not success:
            break

        newFrame, objectDetected = objectDetection(frame)
        if skipFrame == 0:
            if objectDetected == True:
                # do the prediction
                prediction = "Red"
                print("objectDetected and predicted")
                skipFrame += 1
        else: 
            if(objectDetected == True):
                print("objectDetected but not predicted")
            skipFrame = (skipFrame + 1) % (framesToSkip + 1 )

        timeEnd = cv2.getTickCount()
        totalTime = (timeEnd - timeBegin) / cv2.getTickFrequency()

        if prediction != None :
            predictionTime = totalTime
            print(f"Total Time for a single Frame Detection and Prediction: {totalTime:.2f} seconds")

        prediction = None   

        out.write(newFrame)
        cv2.imshow("Object Detection", newFrame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()
    return predictionTime


In [10]:
def checkFrameSkipLogic(framesOutput):
    global framesToSkip
    skipFrame = 0
    for i in range(len(framesOutput)):
        objectDetected = framesOutput[i]

        if skipFrame == 0:
            if objectDetected == True:
                print("Prediction at Frame: ", i)
                skipFrame += 1
        else: 
            skipFrame = (skipFrame + 1) % (framesToSkip + 1 )
        

In [11]:
def logicCheck():
    framesOutput = [True, False, True, False, True, False, True, True, False, False, True, True, True]
    checkFrameSkipLogic(framesOutput)

In [17]:
def main():
    # Example usage
    inputs = [6]
    for input in inputs: 
        predTimes = []
        print("Analysis for Apple Crossing Time: ", input, " seconds")
        input_video = f'../videos/cropped{input}sec.mp4'
        video_name = os.path.basename(input_video)    
        output_video = f'./output/{video_name}.avi'
        
        for _ in range(10):
            predTime = display_camera_analysis(input_video, output_video)
            predTimes.append(predTime)
        
        print()
        print("Avg PredictionTime: ", sum(predTimes)/len(predTimes), "secs")
        print()
        print()
    
    


In [18]:
if __name__ == "__main__":
    main()
    # logicCheck()

Analysis for Apple Crossing Time:  6  seconds
Specs of Video =  FPS: 30, Width: 720, Height: 416
objectDetected and predicted
Total Time for a single Frame Detection and Prediction: 0.07 seconds
objectDetected but not predicted
objectDetected but not predicted
Specs of Video =  FPS: 30, Width: 720, Height: 416
objectDetected and predicted
Total Time for a single Frame Detection and Prediction: 0.06 seconds
objectDetected but not predicted
objectDetected but not predicted
Specs of Video =  FPS: 30, Width: 720, Height: 416
objectDetected and predicted
Total Time for a single Frame Detection and Prediction: 0.07 seconds
objectDetected but not predicted
objectDetected but not predicted
Specs of Video =  FPS: 30, Width: 720, Height: 416
objectDetected and predicted
Total Time for a single Frame Detection and Prediction: 0.06 seconds
objectDetected but not predicted
objectDetected but not predicted
Specs of Video =  FPS: 30, Width: 720, Height: 416
objectDetected and predicted
Total Time for