In [1]:
import glob
import sys 
import cv2 as cv
import pdb
import numpy as np
import os
import copy


In [2]:
def solve_all_videos(videos_path, text_files_path):
    # Get a list of all text files in the specified path
    text_files = glob.glob(os.path.join(text_files_path, "*.txt"))
    all_text_files = []  # Store the contents of all text files
    all_videos = []  # Store all video objects

    # Loop through the file list and read in each file
    for filename in text_files:
        with open(filename, 'r') as file:
            file_contents = file.readlines()
            all_text_files.append(file_contents)

    # Get a list of all video files in the specified path
    videos = glob.glob(os.path.join(videos_path, "*.mp4"))

    for video_path in videos:
        video = cv.VideoCapture(video_path)
        all_videos.append(video)

    # Iterate over every video and compute the txt file
    for i in range(len(all_videos)):
        # Creates an instance of the DaSiamRPN tracker object using the attached files
        tracker = cv.TrackerDaSiamRPN_create()

        # Select the video for processing
        cap = all_videos[i]
        success, frame = cap.read()

        output = []  # Store the output lines for the txt file
        output_array = None  # Store the output bounding box coordinates as an array
        x1, y1, x2, y2 = None, None, None, None  # Initial bounding box coordinates

        # Read and process the lines from the corresponding text file
        lines = [line.strip() for line in all_text_files[i]]
        for idx, line in enumerate(lines):
            output.append(line)

            # Extract the bounding box coordinates from the second line
            if idx == 1:
                f, x1, y1, x2, y2 = map(int, line.split(" "))
                output.append('\n')
                output_array = np.array([f, x1, x2, y1, y2])

        bbox = (x1, y1, x2 - x1, y2 - y1)  # Bounding box format: (x, y, width, height)
        tracker.init(frame, bbox)  # Initialize the tracker with the first frame and bounding box

        frame_counter = 1  # Initialize frame counter

        def drawBox(img, bbox):
            # Draw bounding box on the image
            x, y, w, h = map(int, bbox)
            cv.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 3, 3)

        while True:
            success, img = cap.read()  # Read the next frame from the video

            if not success:
                break  # Break if no more frames are available

            success, bbox = tracker.update(img)  # Update the tracker with the current frame

            # Output the results as long as the tracker is active
            if success:
                drawBox(img, bbox)  # Draw the bounding box on the image
                x, y, w, h = map(int, bbox)  # Extract the updated bounding box coordinates

                # Append the frame number and bounding box coordinates to the output
                output.append(f"{frame_counter} {x} {y} {x+w} {y+h}\n")
                output_array = np.vstack((output_array, np.array([frame_counter, x, y, x+w, y+h])))

            # Break if the object goes out of bounds or shrinks below a certain size
            if (x < 0 or y < 0 or x + w > img.shape[1] or y + h > img.shape[0]) and (w < (x2 - x1) / 2.25 or h < (y2 - y1) / 2.25):
                break

            #cv.imshow("Tracking", img)  # Display the image with tracking information
            key = cv.waitKey(1) & 0xff  # Check for keyboard input
            if key == ord('q'):
                break  # Break if 'q' is pressed

            frame_counter += 1  # Increment frame counter

        with open(f"results_task2/{i+1:02d}_predicted.txt", 'w') as file:
            file.write(' '.join(output))  # Write the output lines to the txt file


    cap.release()  # Release the video capture object
    cv.destroyAllWindows()  # Close any open windows


In [11]:
videos_path = "test/Task2"
text_files_path = "test/Task2"
solve_all_videos(videos_path,text_files_path)