In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
import numpy as np
from tqdm.auto import tqdm
import mmcv
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from torch.utils.data import Dataset
import data

In [6]:
data.ClimbingDataset.test_seqs, data.ClimbingDataset.val_seqs

({'IMG_2139': slice(0, 180, None),
  'IMG_2140': slice(360, 540, None),
  'IMG_2141': slice(720, 900, None),
  'IMG_2142': slice(1080, 1260, None),
  'IMG_2320': slice(1440, 1620, None),
  'VID_20210123_091729': slice(1800, 1980, None),
  'VID_20210123_104706': slice(2160, 2340, None),
  'VID_20210123_110129': slice(2520, 2700, None),
  'VID_20210123_111337': slice(2880, 3060, None),
  'VID_20210123_111921': slice(3240, 3420, None)},
 {'IMG_2139': slice(180, 360, None),
  'IMG_2140': slice(540, 720, None),
  'IMG_2141': slice(900, 1080, None),
  'IMG_2142': slice(1260, 1440, None),
  'IMG_2320': slice(1620, 1800, None),
  'VID_20210123_091729': slice(1980, 2160, None),
  'VID_20210123_104706': slice(2340, 2520, None),
  'VID_20210123_110129': slice(2700, 2880, None),
  'VID_20210123_111337': slice(3060, 3240, None),
  'VID_20210123_111921': slice(3420, 3600, None)})

In [3]:
def load_data(path, stop=None):
    files = os.listdir(path)
    files.sort(key=lambda s: int(s.split('.')[0]))
    no_files = stop if stop else len(files)
    files = files[:no_files]
    data = np.zeros((no_files, 17, 3))
    for i, f in tqdm(enumerate(files), total=no_files):
        kp_frame = np.load(path + f, allow_pickle=True)
        if len(kp_frame) > 0:
            kp_frame = kp_frame[0]['keypoints']
            data[i] = kp_frame
    return data


In [4]:
folder = '/media/tormod/Den Lille/Thesis/mmpose_results/'
video = 'VID_20210123_111337'
path = f'{folder}{video}/'

data = load_data(path)

HBox(children=(FloatProgress(value=0.0, max=3760.0), HTML(value='')))




In [7]:
keypoints = ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle"]
my_keypoints = {"nose": "Nose",
                "left_ear": "Left Ear", 
                "right_ear": "Right Ear", 
                "left_shoulder": "Left Shoulder", 
                "right_shoulder": "Right Shoulder", 
                "left_elbow": "Left Elbow", 
                "right_elbow": "Right Elbow", 
                "left_wrist": "Left Wrist",
                "right_wrist": "Right Wrist",
                "left_hip": "Left Hip",
                "right_hip": "Right Hip",
                "left_knee": "Left Knee",
                "right_knee": "Right Knee",
                "left_ankle": "Left Ankle",
                "right_ankle": "Right Ankle"}

def keypoint_str(keypoint):
    return f'{keypoint[0]:.2f},{keypoint[1]:.2f}'

def outside(keypoint, img_shape):
    return keypoint[0] < 0 or keypoint[1] < 0 or keypoint[0] >= img_shape[1] or keypoint[1] >= img_shape[0]

def clear_tracks(tree):
    tree = ET.parse(anno_file)
    root = tree.getroot()
    for t in root.findall('track'):
        root.remove(t)
    return tree

def to_cvat(anno_file, out_file, data):
    tree = ET.parse(anno_file)
    tree = clear_tracks(tree)
    root = tree.getroot()
    size = root.find('meta/task/original_size')
    img_shape = int(size.find('height').text), int(size.find('width').text)
    
    write_conf = 0.8
    
    def add_track(track_id, label=None):
        if label is None:
            label = my_keypoints[keypoints[track_id]]
        track = ET.SubElement(root, 'track')
        track.set('id', str(track_id))
        track.set('label', label)
        track.set('source', 'mmpose')
        return track
    
    def frame_loop(track, track_id, keypoint_fn, draw_if_fn, inferred=False):
        guess_rate = 30 if inferred else 5
        frange = range(data.shape[0])
        frange = filter(lambda i: i%guess_rate == 0, frange)
            
        for frame_id in frange:
            if draw_if_fn(frame_id) or frame_id==0:
                point = ET.SubElement(track, 'points')
                keypoint = keypoint_fn(frame_id)
                point_str = keypoint_str(keypoint)
                point.set('frame', str(frame_id))
                point.set('outside', '1' if outside(keypoint, img_shape) else '0')
                point.set('occluded', '0')
                point.set('keyframe', '1')
                point.set('points', point_str)
    
    
    def draw_if_conf(joint_ids):
        draw_if_fn = lambda frame_id: np.all(data[frame_id, joint_ids, 2] >= write_conf)
        return draw_if_fn
    
    # Standard joints (in coco results)
    for track_id in range(17):
        if track_id in [1,2]:
            continue
        track = add_track(track_id)
        standard_keypoint_fn = lambda frame_id: data[frame_id, track_id]
        standard_draw_if_fn = draw_if_conf([track_id])
        frame_loop(track, track_id, standard_keypoint_fn, standard_draw_if_fn)
        
    # Neck
#     track_id = 18
#     track = add_track(track_id, label='Neck')
#     neck_keypoint_fn = lambda frame_id: data[frame_id, 5:7, 0:2].mean(axis=0)
#     neck_draw_if_fn = draw_if_conf([5,6])
#     frame_loop(track, track_id, neck_keypoint_fn, neck_draw_if_fn, inferred=True)
        
    # Left Hand
    track_id = 19
    track = add_track(track_id, label='Left Hand')
    def lhand_keypoint_fn(frame_id):
        elbow = data[frame_id, 7, 0:2]
        wrist = data[frame_id, 9, 0:2]
        keypoint = wrist + ((wrist - elbow) * 0.2)
        return keypoint
    lhand_draw_if_fn = draw_if_conf([7,9])
    frame_loop(track, track_id, lhand_keypoint_fn, lhand_draw_if_fn, inferred=True)

    # Rigth Hand
    track_id = 20
    track = add_track(track_id, label='Right Hand')
    def rhand_keypoint_fn(frame_id):
        elbow = data[frame_id, 8, 0:2]
        wrist = data[frame_id, 10, 0:2]
        keypoint = wrist + ((wrist - elbow) * 0.2)
        return keypoint
    rhand_draw_if_fn = draw_if_conf([8,10])
    frame_loop(track, track_id, rhand_keypoint_fn, rhand_draw_if_fn, inferred=True)
    
        
    # Left Foot
    track_id = 21
    track = add_track(track_id, label='Left Foot')
    def lfoot_keypoint_fn(frame_id):
        knee = data[frame_id, 13, 0:2]
        ankle = data[frame_id, 15, 0:2]
        keypoint = ankle + ((ankle - knee) * 0.2)
        return keypoint
    lfoot_draw_if_fn = draw_if_conf([13,15])
    frame_loop(track, track_id, lfoot_keypoint_fn, lfoot_draw_if_fn, inferred=True)
        
    # Right Foot
    track_id = 22
    track = add_track(track_id, label='Right Foot')
    def rfoot_keypoint_fn(frame_id):
        knee = data[frame_id, 14, 0:2]
        ankle = data[frame_id, 16, 0:2]
        keypoint = ankle + ((ankle - knee) * 0.2)
        return keypoint
    rfoot_draw_if_fn = draw_if_conf([14,16])
    frame_loop(track, track_id, rfoot_keypoint_fn, rfoot_draw_if_fn, inferred=True)

    tree.write(out_file)
    

In [8]:
anno_folder = '/media/tormod/Den Lille/Thesis/annotations/'
anno_file = f'{anno_folder}{video}.xml'
out_file = '2d_out.xml'

to_cvat(anno_file, out_file, data)

In [41]:
def merge_xml(from_file, into_file, out_file, frame_limits=None):
    into_tree = ET.parse(into_file)
    into_root = into_tree.getroot()
    
    from_tree = ET.parse(from_file)
    from_root = from_tree.getroot()
    from_tracks = from_root.findall('track')
    
    out_tree = ET.parse(into_file)
    out_tree = clear_tracks(out_tree)
    out_root = out_tree.getroot()
    
    #print(into_root.findall('track'))
    for into_track in into_root.findall('track'):
        attrib = into_track.attrib
        label = attrib['label']
        from_track = from_root.find(f"track[@label='{label}']")
        
        limit = 4250 
        if frame_limits is not None and label in frame_limits:
            limit = frame_limits[label]
            print(label, limit)
        
        out_track = ET.SubElement(out_root, 'track')
        out_track.set('label', label)
        out_track.set('source', 'merge')
        
        
        for point in into_track.getchildren():
            if int(point.attrib['frame']) <= limit:
                out_track.append(point)

        for point in from_track.getchildren():
            if int(point.attrib['frame']) > limit:
                out_track.append(point)
    
    out_tree.write(out_file)
    

In [32]:
into_file = '/home/tormod/Downloads/IMG_2139.xml'
from_file = '2d_out.xml'
out_file = 'temp.xml'

frame_limits = {'Nose': 6830,
                'Left Ear': 4560}

merge_xml(from_file, into_file, out_file, frame_limits)

NameError: name 'merge_xml' is not defined