# Code used to generate the data set

In [26]:
import numpy as np
import os
import math
import shutil
# from moviepy.video.io.VideoFileClip import VideoFileClip
# from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

In [27]:
if not os.path.exists('videos'):
    shutil.unpack_archive('videos.zip', 'videos')


In [None]:
#parse metadata
with open("data_unprocessed/metadata.txt") as file:
    lines =  np.array([line.rstrip().split(";") for line in file])

In [29]:
newpath = r'./temp' 
if not os.path.exists(newpath):
    os.makedirs(newpath)

Ideally we would use a python library to download the youtube videos here. Unfortunatly a recent change to Youtubes cypher system has rendered all of these tools non-functional. Since our data set has a small number of long videos, we have opted to download them manually.

Currently the raw videos are not included in the repo, later I will include them in a zip archive if the github file limit allows it

In [30]:
from moviepy.config import FFMPEG_BINARY
import subprocess

def ffmpeg_extract_subclip(
    inputfile, start_time, end_time, outputfile=None, logger="bar"
):
    """Makes a new video file playing video file between two times.

    Parameters
    ----------

    inputfile : str
      Path to the file from which the subclip will be extracted.

    start_time : float
      Moment of the input clip that marks the start of the produced subclip.

    end_time : float
      Moment of the input clip that marks the end of the produced subclip.

    outputfile : str, optional
      Path to the output file. Defaults to
      ``<inputfile_name>SUB<start_time>_<end_time><ext>``.
    """
    if not outputfile:
        name, ext = os.path.splitext(inputfile)
        t1, t2 = [int(1000 * t) for t in [start_time, end_time]]
        outputfile = "%sSUB%d_%d%s" % (name, t1, t2, ext)

    cmd = [
        FFMPEG_BINARY,
        "-y",
        "-ss",
        "%0.2f" % start_time,
        "-i",
        inputfile,
        "-to",
        "%0.2f" % end_time,
        "-map",
        "0",
        "-vcodec",
        "copy",
        "-acodec",
        "copy",
        "-copyts",
        outputfile,
    ]
    subprocess.run(cmd)

In [31]:
links = np.unique(lines.transpose()[4])

i=0
while i < len(lines):
    
    if float(lines[i][5]) == float(lines[i+1][5])  and lines[i][4] == lines[i+1][4]:
        sample = lines[i if lines[i][6]> lines[i+1][6]  else i+1]
        start = float(lines[i][5])
        end = max(float(lines[i][6]), float(lines[i+1][6]))
        
        name = lines[i][0]+"&"+lines[i+1][0]
        i+=2
    else:
        sample = lines[i]
        start = float(sample[5])
        end = float(sample[6])
        name = sample[0]
        i+=1

    idx = np.where(links == sample[4])
    video = sample[4].split("=")[-1]
    ## If its the first video, remove once all have been downloaded
    # if idx[0][0] == 0:
    ffmpeg_extract_subclip("videos/"+video+".mp4", start, end, "cropped/" + name+".mp4")
        
       



Create a zip archive of the output so the videos can be commited to github

In [32]:
if os.path.isfile("videos_processed.zip"):
    os.remove("videos_processed.zip")
shutil.make_archive("videos_processed", 'zip', "cropped")

'c:\\Users\\mikeG\\Documents\\school\\cisc-452\\CISC-452-Group-22\\videos_processed.zip'

In [None]:
# cropped\1_2018-07-12_1.mp4

run = "34_2018-07-06_2"

import cv2
vidcap = cv2.VideoCapture('cropped\\'+run+'.mp4')
success,image = vidcap.read()
count = 0

with open("data_unprocessed/skeletons/"+run+".data") as file:
    data =  np.array([line.rstrip().split("#") for line in file])

frame_count =int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1

print(data)

offset = frame_count - int(data[-1][0])

print ("Number of frames: ", int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1)
while success:
    if count < offset:
        count += 1
        success,image = vidcap.read()
        continue
    cv2.imwrite("img/frame%d.jpg" % (count-offset), image)     # save frame as JPEG file      
    success,image = vidcap.read()
    print('Read a new frame: ', success)
    count += 1

In [None]:
import json
import os
import sys

def print_to_log(*args, sep=' ', end='\n', file=sys.stdout, flush=False):
    """
    Custom print function that appends the output to 'log.log' and writes to the console.

    Args:
        *args: Values to be printed.
        sep (str): Separator between values (default: ' ').
        end (str): End character (default: '\n').
        file: File-like object to write to (default: sys.stdout).
        flush (bool): Whether to forcibly flush the stream.
    """
    message = sep.join(map(str, args)) + end  # Construct the message

    # Append to the log file
    with open("log.log", "a") as log_file:
        log_file.write(message)


def parse_metadata(metadata_path):
    metadata = {}
    with open(metadata_path, 'r') as f:
        for line in f:
            parts = line.strip().split(';')
            metadata[parts[0]] = {
                'id_climber': parts[1],
                'date': parts[2],
                'run_number': parts[3],
                'url': parts[4],
                'start': float(parts[5]),
                'end': float(parts[6]),
                'time_sec': float(parts[7]),
                'time_frames': int(parts[8]),
                'finished': int(parts[9]),
                'side': parts[10],
                'fps': float(parts[11])
            }
    return metadata

def parse_skeletons(skeletons_dir):
    skeleton_data = {}
    for file_name in os.listdir(skeletons_dir):
        run_id = file_name.replace('.data', '')
        skeleton_data[run_id] = []
        with open(os.path.join(skeletons_dir, file_name), 'r') as f:
            for line in f:
                if "NULL" in line:
                    continue
                parts = line.strip().split('#')
                frame_number = int(parts[0])
                keypoints = parts[1].split(';')
                keypoints = [float(coord) for kp in keypoints for coord in kp.split(',')]
                skeleton_data[run_id].append({
                    'frame_number': frame_number,
                    'keypoints': keypoints
                })
    return skeleton_data

def generate_coco_json(images_dir, skeletons, metadata, output_json):
    coco_data = {
        'images': [],
        'annotations': [],
        'categories': [{
            'id': 1,
            'name': 'person',
            'keypoints': [f'j{i+1}' for i in range(16)],
            'skeleton': [[i, i+1] for i in range(1, 16)]  # Simple linear skeleton structure
        }]
    }

    annotation_id = 1
    for run_id, frames in skeletons.items():
        climber_metadata = metadata.get(run_id, {})
        for frame in frames:
            image_id = len(coco_data['images']) + 1
            frame_number = frame['frame_number']
            image_path = os.path.join(images_dir, f'frame{frame_number}.png')
            coco_data['images'].append({
                'id': image_id,
                'file_name': image_path,
                'height': 1920,
                'width': 1080
            })
            coco_data['annotations'].append({
                'id': annotation_id,
                'image_id': image_id,
                'category_id': 1,
                'keypoints': frame['keypoints'],
                'num_keypoints': sum(1 for x in frame['keypoints'][2::3] if x > 0),
                'climber_id': climber_metadata.get('id_climber', 'unknown'),
                'run_metadata': climber_metadata 
            })
            annotation_id += 1

    with open(output_json, 'w') as f:
        json.dump(coco_data, f, indent=4)

# Paths
images_dir = 'img'
skeletons_dir = 'data_unprocessed/skeletons'
metadata_file = 'data_unprocessed/metadata.txt'
output_json = 'climbData.json'
test_json = 'test.json'

# Process
metadata = parse_metadata(metadata_file)
skeletons = parse_skeletons(skeletons_dir)

generate_coco_json(images_dir, skeletons, metadata, output_json)
