In [3]:
# This notebook will take in the directory containing the gesture folders,
#     ["CLOCKWISE", "COUNTERCLOCKWISE", "DOWN", "UP", "LEFT", "RIGHT"]

# and in each one of those contains a directory for each video in the following format:
#     <initials|firstname_initial>_<gesture_class>_<index_value>
# and in each one of those contains a series of frame .jpgs of the video in the following format:
#     "img_"<index>.jpg where the index is zero padded to have 5 digits.

# This notebook will generate a pandas dataframe containing the statistical data on participants, gesture, and frame counts

# This notebook will allow for a user to choose how to split the data for training, validation, and testing.
#     This will probably be done by appending 0, 1, 2 as a new column in the stat_data_df 
#     And saved as a csv file.

# This notebook ought to create a train folder, a validation folder and a test folder in it, as well as an annotation.txt file 
# to be able to use the generic dataloader for pytorch

In [5]:
import os
from pathlib import Path
import cv2
import pandas as pd
import numpy as np
from tqdm import tqdm
from tqdm.notebook import tqdm
tqdm.pandas()

In [16]:
GESTURE_NAMES = ["CLOCKWISE", "COUNTERCLOCKWISE", "DOWN", "UP", "LEFT", "RIGHT"]

FRAMES_DIR = Path("D:/__School/__Masters/____2021fALL/5280_aiwearables/combined_1st_2nd_iter_frames")

In [41]:
def get_dir_names_all_videos(home_dir):
    """returns the names of all of the files in a directories subfolders without the parent gesture folders"""
    vid_names = []
    for path, subdirs, files in os.walk(home_dir):
        for name in subdirs:
            vid_names.append(str(Path(os.path.join(path, name))))
    return vid_names[6:]

In [42]:
video_dir_names = get_dir_names_all_videos(FRAMES_DIR)
video_dir_names[:7]

['D:\\__School\\__Masters\\____2021fALL\\5280_aiwearables\\combined_1st_2nd_iter_frames\\CLOCKWISE\\AW_CLOCKWISE_00001',
 'D:\\__School\\__Masters\\____2021fALL\\5280_aiwearables\\combined_1st_2nd_iter_frames\\CLOCKWISE\\AW_CLOCKWISE_00002',
 'D:\\__School\\__Masters\\____2021fALL\\5280_aiwearables\\combined_1st_2nd_iter_frames\\CLOCKWISE\\AW_CLOCKWISE_00003',
 'D:\\__School\\__Masters\\____2021fALL\\5280_aiwearables\\combined_1st_2nd_iter_frames\\CLOCKWISE\\AW_CLOCKWISE_00004',
 'D:\\__School\\__Masters\\____2021fALL\\5280_aiwearables\\combined_1st_2nd_iter_frames\\CLOCKWISE\\AW_CLOCKWISE_00005',
 'D:\\__School\\__Masters\\____2021fALL\\5280_aiwearables\\combined_1st_2nd_iter_frames\\CLOCKWISE\\AW_CLOCKWISE_00006',
 'D:\\__School\\__Masters\\____2021fALL\\5280_aiwearables\\combined_1st_2nd_iter_frames\\CLOCKWISE\\AW_CLOCKWISE_00007']

In [43]:
len(video_dir_names)

815

In [44]:
video_dir_names[1].split("\\")

['D:',
 '__School',
 '__Masters',
 '____2021fALL',
 '5280_aiwearables',
 'combined_1st_2nd_iter_frames',
 'CLOCKWISE',
 'AW_CLOCKWISE_00002']

In [55]:
def create_metadata_df(dir_names):
    df_list = []
    
    for vid_name in dir_names:
        
        # split video into list 
        # ['D:','__School','__Masters','____2021fALL',
        #  '5280_aiwearables','combined_1st_2nd_iter_frames',
        #  'CLOCKWISE','AW_CLOCKWISE_00002']
        split_vid_name = vid_name.split("\\")
        
        # get full path
        video_path = Path(vid_name)
        
        # get class name
        gesture_class = split_vid_name[-2]
        
        # get participant_id
        video_dir_name_split = split_vid_name[-1].split("_")
        if len(video_dir_name_split) == 4:
            participant_id = "_".join(video_dir_name_split[:2])
        else:
            participant_id = video_dir_name_split[0]
            
        # get frame count
        frames = os.listdir(vid_name)
        num_frames = len(frames)
        
        df_list.append({
            "full_path": video_path,
            "gesture": gesture_class,
            "participant_id": participant_id,
            "num_frames": num_frames
        })
        
    df = pd.DataFrame(df_list)
    return df
        

In [56]:
df = create_metadata_df(video_dir_names)

In [57]:
df.head()

Unnamed: 0,full_path,gesture,participant_id,num_frames
0,D:\__School\__Masters\____2021fALL\5280_aiwear...,CLOCKWISE,AW,46
1,D:\__School\__Masters\____2021fALL\5280_aiwear...,CLOCKWISE,AW,44
2,D:\__School\__Masters\____2021fALL\5280_aiwear...,CLOCKWISE,AW,38
3,D:\__School\__Masters\____2021fALL\5280_aiwear...,CLOCKWISE,AW,38
4,D:\__School\__Masters\____2021fALL\5280_aiwear...,CLOCKWISE,AW,39


In [58]:
df.participant_id.value_counts()

MCM           72
AW            60
JBG           60
KP            60
SP            60
SNB           51
Ian_z         50
emmanuel_z    50
cooper_s      50
Lori_L        50
Madeline_U    50
Daniel_M      50
Miguel_Q      50
IA            42
RAM           30
IM            12
RCS           12
KH             6
Name: participant_id, dtype: int64