# This file will convert the JSON labels that were generated from the GymCam annotation tool into a csv with the follow format:

|file_name| video_file      | annotation_file | type   | reps |
|---------|-----------------|-----------------|--------|------|
|ohp5     | ./absolute/path | ./absolute/path | squats | 8    |

In [1]:
# GLOBAL VARIABLES
annotation_loc = "/Users/jamesperalta/Desktop/classes/CPSC-502/ResearchProposal/WorkoutRecognitionThesis/video_dataset/test/labels"
videos_loc = "/Users/jamesperalta/Desktop/classes/CPSC-502/ResearchProposal/WorkoutRecognitionThesis/video_dataset/test/"
output_loc = "/Users/jamesperalta/Desktop/classes/CPSC-502/ResearchProposal/WorkoutRecognitionThesis/video_dataset/"
output_filename = "test_annotations.csv"

## Imports

In [2]:
import os
import re
import json
import pandas as pd

## Generate the annotation file paths

In [3]:
# Helper functions
def extract_file_type(file_path):
    file_type = re.split("\d+_mp4_", file_path, maxsplit=1, flags=0)[0]
    return file_type.lower()

def extract_file_name(file_path):
    file_name = re.split("_mp4_", file_path, maxsplit=1, flags=0)[0]
    return file_name.lower()

In [4]:
annotation_files = os.listdir(annotation_loc)
annotation_files = sorted(annotation_files, key=extract_file_type )

parsed_json_list = []
for file in annotation_files:
    absolute_path = "{}/{}".format(annotation_loc, file)
    with open(absolute_path) as json_file:
        data = json.load(json_file)
        subject_block = data["subject_blocks"][0]
        events_block = subject_block["events"]
    
        # Retrieve the workout name
        workout_name = subject_block["name"].lower()

        # Retrieve the rep count
        rep_count = ""
        for event in events_block:
            if event["type"] == 2:
                rep_count = event["data"]
                break
                
        parsed_json_list.append({"file_name": extract_file_name(file), "annotation_file": absolute_path,
                                "workout_type": workout_name, "reps": rep_count })

annotation_df = pd.DataFrame(parsed_json_list)

In [5]:
annotation_df

Unnamed: 0,file_name,annotation_file,workout_type,reps
0,ohp15,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,9reps
1,ohp0,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,10reps
2,ohp20,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,10reps
3,ohp10,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,10reps
4,ohp30,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,9reps
5,ohp35,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,9reps
6,ohp5,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,11reps
7,squat30,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,squat,10reps
8,squat10,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,squat,8reps
9,squat39,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,squat,8reps


## Generate the video file paths

In [6]:
video_files = os.listdir(videos_loc)
video_type = ".mp4"
parsed_video_list = []
for video in video_files:
    if video.find(video_type) != -1:
        video_name = video.split(video_type)[0]
        video_location = "{}{}".format(videos_loc, video)
        parsed_video_list.append({"file_name": video_name, "video_file": video_location})

video_df = pd.DataFrame(parsed_video_list)

In [7]:
video_df

Unnamed: 0,file_name,video_file
0,squat10,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
1,squat39,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
2,squat15,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
3,squat5,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
4,ohp30,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
5,ohp35,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
6,ohp20,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
7,squat0,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
8,ohp0,/Users/jamesperalta/Desktop/classes/CPSC-502/R...
9,ohp10,/Users/jamesperalta/Desktop/classes/CPSC-502/R...


## Report videos with missing labels

In [8]:
# Left join video files with annotion files
# video_df['file_name'] = video_df['file_name'].astype(object)
# annotation_df['file_name'] = annotation_df['file_name'].astype(object)

final_result = pd.merge(video_df, annotation_df, on="file_name", sort=True)
final_result

Unnamed: 0,file_name,video_file,annotation_file,workout_type,reps
0,ohp0,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,10reps
1,ohp10,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,10reps
2,ohp15,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,9reps
3,ohp20,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,10reps
4,ohp30,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,9reps
5,ohp35,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,9reps
6,ohp5,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,ohp,11reps
7,squat0,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,squat,10reps
8,squat10,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,squat,8reps
9,squat15,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,/Users/jamesperalta/Desktop/classes/CPSC-502/R...,squat,10reps


## Store the CSV

In [9]:
final_result.to_csv("{}{}".format(output_loc, output_filename), index=False)

## Check how many files are missing annotations

In [10]:
# final_result = final_result.append([{"file_name": "YO", "video_file": "/here/bro", "annotation_file": None, "workout_type": None, "reps":  None}])

In [11]:
files_no_annotation = final_result["annotation_file"].isna().sum()
print("There is {} video(s) with no annotations".format(files_no_annotation))

There is 0 video(s) with no annotations


In [12]:
final_result.loc[final_result["annotation_file"].isna()]

Unnamed: 0,file_name,video_file,annotation_file,workout_type,reps
