# Parses a folder full of annotation json files and generates statistics on all of the videos that have labels. A list of the videos labeled, all existing labels, and if any frames are not labelled

## Imports

In [15]:
import os
import json
import pandas as pd

In [8]:
# Accessing Annotation file constants
ANNOTATIONS = "subject_blocks"
ANNOTATION_TYPE = "name"
START = "enter_frame"
EXIT = "exit_frame"
LABEL_ARR = "events"
LABEL = "data"
METADATA = "video_metadata"
DURATION = "duration"

# Creating annotation arr constants
ANNOTATION_KEY = "annotation_type"
START_FRAME_KEY = "start_frame"
END_FRAME_KEY = "end_frame"
LABEL_KEY = "label"

## Analyze annotation files

In [72]:
annotation_loc = "/Users/jamesperalta/Desktop/classes/CPSC-502/ResearchProposal/WorkoutRecognitionThesis/video_dataset/new-train/labels"
annotation_files = os.listdir(annotation_loc)

frames_arr = []
for file in annotation_files:
    if "json" not in file:
        continue
    
    file_name = file.split("_")[0]
    print("Analyzing: " + file_name)
    with open("{}/{}".format(annotation_loc, file)) as json_file:
        data = json.load(json_file)
        # Retrieving the annotations
        annotations = data[ANNOTATIONS]
        annotation_arr = []
        for annotation in annotations:
            annotation_type = annotation[ANNOTATION_TYPE]
            start_frame = annotation[START]
            end_frame = annotation[EXIT]
            label = annotation[LABEL_ARR][0][LABEL]
            annotation_arr.append({ANNOTATION_KEY: annotation_type,
                                   START_FRAME_KEY: start_frame,
                                   END_FRAME_KEY: end_frame,
                                   LABEL_KEY: label})

        duration = data[METADATA][DURATION]
        for frame in range(0, duration):
            labels = []
            # Check which labels this frame has
            for annotation in annotation_arr:
                start_frame = annotation[START_FRAME_KEY]
                end_frame = annotation[END_FRAME_KEY]
                if frame >= start_frame and frame < end_frame:
                    labels.append(annotation[LABEL_KEY])

            frames_arr.append({
                "name": file_name,
                "frame": frame,
                "label": "-".join(labels)
            })

frames_df = pd.DataFrame(frames_arr)


Analyzing: ohp4
Analyzing: ohp8
Analyzing: ohp2
Analyzing: ohp0
Analyzing: ohp6
Analyzing: squat6
Analyzing: squat0
Analyzing: squat3
Analyzing: nothing1
Analyzing: squat7
Analyzing: ohp7
Analyzing: ohp3
Analyzing: squat2
Analyzing: ohp5
Analyzing: squat8
Analyzing: squat1
Analyzing: nothing0
Analyzing: nothing2
Analyzing: ohp1
Analyzing: squat4
Analyzing: squat5


In [73]:
frames_df

Unnamed: 0,name,frame,label
0,ohp4,0,ohp
1,ohp4,1,ohp
2,ohp4,2,ohp
3,ohp4,3,ohp
4,ohp4,4,ohp
...,...,...,...
10396,squat5,288,nothing
10397,squat5,289,nothing
10398,squat5,290,nothing
10399,squat5,291,nothing


In [75]:
for col in frames_df.columns: 
    if col not in ["name", "label"]:
        continue
#     unique = set(str(frames_df[[col]].values.tolist()))
    print("Unique values in " + col)
    print(frames_df[[col]][col].unique())

Unique values in name
['ohp4' 'ohp8' 'ohp2' 'ohp0' 'ohp6' 'squat6' 'squat0' 'squat3' 'nothing1'
 'squat7' 'ohp7' 'ohp3' 'squat2' 'ohp5' 'squat8' 'squat1' 'nothing0'
 'nothing2' 'ohp1' 'squat4' 'squat5']
Unique values in label
['ohp' 'nothing' 'squat']
