In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import xml.etree.ElementTree
import sys
import pickle
import warnings
import shutil
import os
import pandas as pd
import re
from moviepy.video.io.VideoFileClip import VideoFileClip
import json
import xml.etree.ElementTree as ET

warnings.filterwarnings('ignore')

def convert_video_to_sequences(video_file_path, video_file_name, output_path):
    image_list = None
    if video_file_path is not None and output_path is not None:
        video = VideoFileClip(video_file_path)
        image_list = video.write_images_sequence('{0}/{1}_frame_%05d.jpg'.format(output_path, video_file_name), fps=None, verbose=True, withmask=True)
    return image_list

def get_frames_from_span(frame_dict, label):
    frames = {}
    if frame_dict:
        x = int(frame_dict['x'])
        y = int(frame_dict['y'])
        width = int(frame_dict['width'])
        height = int(frame_dict['height'])
        frame_span = frame_dict['framespan']
        
        for i in range(int(frame_span.split(':')[0]) , int(frame_span.split(':')[1])+1):
            if i not in frames:
                frames[i] = list()
            frames[i].append({'xmin':x-1, 'ymin':y-1, 'xmax':x + width-1, 'ymax':y + height-1, 'label': label })
    return frames

def parse_xgtf_ann_file(file_name, object_type): 
    master_frame_map = {}
    namespace = {'viper': 'http://lamp.cfar.umd.edu/viper#'}
    root = ET.parse(file_name).getroot()
    for child in root.findall(".//viper:data", namespace):
        for obj in child.findall(".//viper:object", namespace):
            if obj.attrib['name'] == object_type:
                object_id = obj.attrib['id']
                for attr in obj.getchildren():
                    if 'attribute' in attr.tag and (attr.attrib['name'] == 'Location' or attr.attrib['name'] == 'obox'):   
                        for bbox in attr.getchildren():
                            if 'bbox' in bbox.tag:
                                frame_map = get_frames_from_span(bbox.attrib, object_type)
                                if not master_frame_map:
                                    master_frame_map = frame_map
                                else:
                                    for frame in frame_map.keys():
                                        if frame_map[frame] is not None:
                                            if frame not in master_frame_map:
                                                master_frame_map[frame] = list()
                                            master_frame_map[frame].extend(frame_map[frame])
    return master_frame_map



def write_ann_files(video_file_name, frame_map, output_folder):
    for frame in frame_map.keys():
        with open('{0}/{1}_frame_{2:05d}.json'.format(output_folder, video_file_name, int(frame)-1), 'w') as fp:
            json.dump(frame_map[frame], fp)

#path of the annotated video file
root_video_folder = 'AnnotatedVideoFilePath'
#path of the annotation file of Viper-GT .xgtf file
root_annotation_folder = 'xgtfFilePath'

#Choose a path for the output images/frames folder
frame_output_folder = 'OutputImagesFolderPath'
#Choose a path for the output annotation files
ann_output_folder = 'OutputAnnotationsFolderPath'
#Choose a path for the root folder
root_folder = 'RootFolderPath'

ann_ext = '.xgtf'

#Select the name of the object you want
object_type= 'ObjectName'

meta_data = pd.DataFrame(columns=['id', 'img_file', 'ann_file'])
file_counter = 0

for x in os.listdir(root_video_folder):
    video_file_name = x[0:x.rindex('.')]
    video_file_path = '{0}/{1}'.format(root_video_folder, x)
    ann_file = '{0}/{1}{2}'.format(root_annotation_folder, video_file_name, ann_ext)
    
    print('Processing {0}...'.format(x))
    image_list = convert_video_to_sequences(video_file_path, video_file_name, frame_output_folder)

    print('Creating frame level annotations...')
    frame_map = parse_xgtf_ann_file(ann_file, object_type)
    print(frame_map)

    if isinstance(frame_map, dict):  
        frames = np.array(list(frame_map.keys())) - 1 
        write_ann_files(video_file_name, frame_map, ann_output_folder)
        meta_data_tup = []
    else:
        print("Error: frame_map is not a dictionary.")

    meta_data_tup = []
    for index, file_path in enumerate(image_list):
        frame_number_search = re.search('.*_frame_(\d+).jpg', file_path[file_path.rindex('/')+1 :], re.IGNORECASE)
        if frame_number_search:
            if int(frame_number_search.group(1)) in frames:
                file_counter = file_counter + 1
                meta_data_tup.append((file_counter, file_path[file_path.rindex('/')+1 :],
                      '{0}.json'.format(file_path[file_path.rindex('/')+1 :file_path.rindex('.')])))
        
    meta_data = pd.concat([meta_data, pd.DataFrame(meta_data_tup, columns=['id', 'img_file', 'ann_file'])], ignore_index=True)
    
meta_data.to_csv('{0}/metadata.csv'.format(root_folder), index=False)
print('Finished processing....')

In [None]:
def convert_annotations_yolo(json_data, image_width, image_height):
    annotations_yolo = []
    for annotation in json_data:
        label = annotation['label']
        xmin = annotation['xmin']
        ymin = annotation['ymin']
        xmax = annotation['xmax']
        ymax = annotation['ymax']
        
        x_center = (xmin + xmax) / (2 * image_width)
        y_center = (ymin + ymax) / (2 * image_height)
        width = (xmax - xmin) / image_width
        height = (ymax - ymin) / image_height
        
        annotations_yolo.append(f"{label} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    return annotations_yolo

annotations_folder = ann_output_folder
images_folder = frame_output_folder

#Choose a path for the new dataset in yolov8 format.
output_folder = "YoloV8DatasetPath"

os.makedirs(output_folder, exist_ok=True)

for annotation_file in os.listdir(annotations_folder):
    if annotation_file.endswith('.json'):
        image_name = os.path.splitext(annotation_file)[0] + '.jpg'
        annotation_path = os.path.join(annotations_folder, annotation_file)
        image_path = os.path.join(images_folder, image_name)
        
        if not os.path.exists(image_path):
            print(f"Matching image not found for {annotation_file}. Skipping...")
            continue
        
        with open(annotation_path, 'r') as f:
            json_data = json.load(f)
        
        image = Image.open(image_path)
        image_width, image_height = image.size
        
        annotations_yolo = convert_annotations_yolo(json_data, image_width, image_height)
        
        output_file = os.path.join(output_folder, os.path.splitext(annotation_file)[0] + '.txt')
        with open(output_file, 'w') as f:
            f.write('\n'.join(annotations_yolo))

print("Conversion completed.")