<h2>File paths and imports</h2>

In [1]:
from ui_lib import *

# path of the model
model_path = "C:/Users/Theo/Documents/Unif/Models/body/v8s/weights/best.pt"

# video paths:
# input video directory (without any annotation)
input_video_directory = "C:/Users/Theo/Documents/Unif/chimprec-videos/input_videos"
# output (final version - with human interaction)
output_video_directory = "C:/Users/Theo/Documents/Unif/chimprec-videos/output_videos"
# directory containing all the manual modifications
mannual_annotations_directory = "C:/Users/Theo/Documents/Unif/chimprec-videos/input_videos/manual_annotations"

output_video_directory_temp = f"{output_video_directory}/temp"
raw_text_output_directory = f"{output_video_directory_temp}/raw_output"

# Create the directories if they do not exist yet
os.makedirs(input_video_directory, exist_ok=True)
os.makedirs(output_video_directory, exist_ok=True)
os.makedirs(mannual_annotations_directory, exist_ok=True)
os.makedirs(output_video_directory_temp, exist_ok=True)
os.makedirs(raw_text_output_directory, exist_ok=True)



<h2>First step:</h2>
<h3>Processing the raw video without annotation and produce a textual output (stored in <i>input_text_file_path</i>) and a visual output (accessible via <i>output_video_path</i>).</h3>

In [None]:
max_cosine_distance = 0.5       # maximal distance to match an object (lower = more strict)
nn_budget = None                # maximal buffer size
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)

# YOLOv8s initialisation
YOLOv8s = YOLO(model_path)

# DeepSORT initialisation
DeepSort = DeepSortTracker(metric)

# Osnet initialisation
Osnet = torchreid.models.build_model(name='osnet_x1_0', num_classes=751, pretrained=True)
Osnet.eval()

for input_video in os.listdir(input_video_directory):
    if input_video.endswith(".mp4") or input_video.endswith(".MP4"):

        full_video_path = os.path.join(input_video_directory, input_video)
        video_name = os.path.splitext(input_video)[0]

        # production of the textual outputs
        perform_tracking(
            input_video_path = full_video_path, 
            output_text_file_path = f"{raw_text_output_directory}/{video_name}.txt", 
            detection_model = YOLOv8s, 
            tracker = DeepSort,
            confidence_threshold = 0.5, 
            model_feature_extraction = Osnet
        )
        print(f"Annotations ready for video: {full_video_path}")

        # production of the visual output
        draw_bbox_from_file(
            file_path = f"{raw_text_output_directory}/{video_name}.txt", 
            input_video_path = full_video_path, 
            output_video_path = f"{output_video_directory_temp}/{video_name}-(temp).mp4",
            annotation_type="bbox",
            draw_frame_count=True
        )
        print(f"Treatment done: {full_video_path}")

Successfully loaded imagenet pretrained weights from "C:\Users\Theo/.cache\torch\checkpoints\osnet_x1_0_imagenet.pth"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']
10
20
30
40
50
60
70
80
90
100
Annotations ready for video: C:/Users/Theo/Documents/Unif/chimprec-videos/input_videos\C0005.MP4
Treatment done: C:/Users/Theo/Documents/Unif/chimprec-videos/input_videos\C0005.MP4
10
20
30
40
50
60
70
80
90
100
Annotations ready for video: C:/Users/Theo/Documents/Unif/chimprec-videos/input_videos\sample1.MP4
Treatment done: C:/Users/Theo/Documents/Unif/chimprec-videos/input_videos\sample1.MP4


<h2>Third step:</h2>
<h3>Processing the output of the two first steps to produce a textual output (stored in <i>output_text_file_path</i>) and a visual output (accessible via <i>output_edited_video_path</i>).</h3>

In [2]:
for input_video in os.listdir(input_video_directory):
    if input_video.endswith(".mp4") or input_video.endswith(".MP4"):
        full_video_path = os.path.join(input_video_directory, input_video)
        video_name = os.path.splitext(input_video)[0]
        annotation_file = f"{mannual_annotations_directory}/{video_name}.txt"

        raw_reader = raw_tracking_data_reader(f"{raw_text_output_directory}/{video_name}.txt")

        try:
            edit_reader = modification_reader(annotation_file)
        except:
            print(f"Error: the manual annotation file related to the video <{full_video_path}> is not found. It must be located at <{annotation_file}>.")
            continue
        
        metadata_file_path = f"{output_video_directory}/{video_name}-treated.txt"
        output_video_path = f"{output_video_directory}/{video_name}-treated.mp4"
        writer = data_writer(metadata_file_path)

        # computation of the new metadata file
        modified_data = edit_raw_output(raw_reader, edit_reader) 

        # production of the textual output
        writer.write(modified_data)

        # production of the visual output
        draw_bbox_from_file(
            file_path = metadata_file_path, 
            input_video_path = full_video_path, 
            output_video_path = output_video_path,
            annotation_type="triangle"
        )

Error: the manual annotation file related to the video <C:/Users/Theo/Documents/Unif/chimprec-videos/input_videos\sample1.MP4> is not found. It must be located at <C:/Users/Theo/Documents/Unif/chimprec-videos/input_videos/manual_annotations/sample1.txt>.
