In [1]:
# This notebook processes the last two video sets recorded from in class and combines them into a final frame_dataset directory

In [32]:
import os
from pathlib import Path
import cv2
import pandas as pd
import numpy as np

import time

from threading import Thread
import sys

from queue import Queue

In [4]:
GESTURE_NAMES = ["CLOCKWISE", "COUNTERCLOCKWISE", "DOWN", "UP", "LEFT", "RIGHT"]

In [11]:
VIDEOS_DIR = Path("D:/__School/__Masters/____2021fALL/5280_aiwearables/videos")
TARGET_DIR = Path("D:/__School/__Masters/____2021fALL/5280_aiwearables/3rd_iter_frames")

In [10]:
def get_dir_names_all_videos(home_dir):
    """returns the names in a directories subfolders"""
    video_fnames = []
    for path, subdirs, files in os.walk(home_dir):
        for name in files:
            video_fnames.append(str(Path(os.path.join(path, name))))
    return video_fnames

In [12]:
video_dir_names = get_dir_names_all_videos(VIDEOS_DIR)

In [13]:
len(video_dir_names)

100

In [14]:
video_dir_names[0]

'D:\\__School\\__Masters\\____2021fALL\\5280_aiwearables\\videos\\Will_B\\CLOCKWISE\\WIN_20211027_17_25_02_Pro.mp4'

In [15]:
def get_metadata_all_videos(video_dir_names):
    fps_length_data = []
    
    for i, vid_dir in enumerate(video_dir_names):
        if i % 100 == 0:
            print(f"processing {i} of {len(video_dir_names)}...")
        split_dir = vid_dir.split("\\")
        fname = split_dir[-1]
        gesture_name = split_dir[-2].replace(" ", "").upper()
        subject_initials = split_dir[-3]
        
        video = cv2.VideoCapture(str(vid_dir))
        fps = video.get(cv2.CAP_PROP_FPS)
        frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        duration_seconds = frame_count / fps
        
        fps_length_data.append({"file_name": fname,
                                "subject_initials": subject_initials,
                                "gesture_name": gesture_name,
                                "frames_per_second": fps,
                                "frame_count": frame_count,
                                "duration_seconds": duration_seconds,
                                "dir_name": vid_dir})
    df = pd.DataFrame(fps_length_data)
    print("Done!")
    return df

In [16]:
df = get_metadata_all_videos(video_dir_names)

processing 0 of 100...
Done!


In [17]:
df.head(3)

Unnamed: 0,file_name,subject_initials,gesture_name,frames_per_second,frame_count,duration_seconds,dir_name
0,WIN_20211027_17_25_02_Pro.mp4,Will_B,CLOCKWISE,15.266469,33,2.1616,D:\__School\__Masters\____2021fALL\5280_aiwear...
1,WIN_20211027_17_25_08_Pro.mp4,Will_B,CLOCKWISE,15.302687,25,1.6337,D:\__School\__Masters\____2021fALL\5280_aiwear...
2,WIN_20211027_17_25_11_Pro.mp4,Will_B,CLOCKWISE,15.460546,26,1.6817,D:\__School\__Masters\____2021fALL\5280_aiwear...


In [18]:
df["rounded_fps"] = df.frames_per_second.round()

In [19]:
df.rounded_fps.value_counts()

15.0    50
30.0    50
Name: rounded_fps, dtype: int64

In [20]:
df.subject_initials.value_counts()

Will_B    50
Yen_P     50
Name: subject_initials, dtype: int64

In [21]:
df.gesture_name.value_counts()

DOWN                20
LEFT                20
RIGHT               20
UP                  20
CLOCKWISE           10
COUNTERCLOCKWISE    10
Name: gesture_name, dtype: int64

In [22]:
df.duration_seconds.round().value_counts()

2.0    82
1.0    13
3.0     5
Name: duration_seconds, dtype: int64

In [23]:
def create_video_folder_names(df, gesture_names):
    """returns a dataframe with a new column formatted 
    <subject_initials>_<gesture_name>_<incremented_id>
    based on gesture classes"""
    df_list = []
    
    def for_each_row(row):
        # build video filename
        folder_name = f"{row.subject_initials}_{row.gesture_name}_{row.increment_id:05d}"
        return folder_name
    
    for gesture_name in gesture_names:
         # creates a new dataframe with increment values for each gesture
        dff = df[df["gesture_name"] == gesture_name].copy()
        dff["increment_id"] = np.arange(1, dff.shape[0]+1)
        dff["gesture_video_foldername"] = dff.apply(for_each_row, axis=1)
        df_list.append(dff)
    
        
    dff = pd.concat(df_list)
    dff.reset_index(drop=True, inplace=True)
    return dff

In [24]:
df = create_video_folder_names(df, GESTURE_NAMES)

In [25]:
def build_frames_directory(df, target_directory, gesture_names):
    """builds a directory for each class, and in each class a folder name"""
    
    def for_each_row(row):
        # make internal video directories
        gesture_dir = Path(target_directory).joinpath(row.gesture_name)
        video_dir = gesture_dir.joinpath(row.gesture_video_foldername)
        video_dir.mkdir(parents=True, exist_ok=True)
        
    for gesture_name in gesture_names:
        # make class directories
        gesture_directory = Path(target_directory).joinpath(gesture_name)
        gesture_directory.mkdir(parents=True, exist_ok=True)
    
    df.apply(for_each_row, axis=1)

In [26]:
build_frames_directory(df, TARGET_DIR, GESTURE_NAMES)

In [27]:
def video_to_frames(input_dir, output_dir):
    """take in a video from input_dir and output the frames to the output_dir"""
    video = cv2.VideoCapture(str(input_dir))
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    for i in range(0, frame_count):
        success, image = video.read()
        if not success:
            continue
        cv2.imwrite( str(output_dir) + f"\\img_{i+1:05d}.jpg", image)     # save frame as JPEG file

In [28]:
# this cell will implement all the code necessary for the 50% faster image reading and saving

class FileVideoStream:
    def __init__(self, path, queueSize=160):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.stream = cv2.VideoCapture(path)
        self.stopped = False
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)
        
    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self
    
    def update(self):
        # keep looping infinitely
        while True:
            # if the thread indicator variable is set, stop the
            # thread
            if self.stopped:
                return
            # otherwise, ensure the queue has room in it
            if not self.Q.full():
                # read the next frame from the file
                (grabbed, frame) = self.stream.read()
                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not grabbed:
                    self.stop()
                    return
                # add the frame to the queue
                self.Q.put(frame)
                
    def read(self):
        # return next frame in the queue
        return self.Q.get()
    
    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0
    
    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True

In [29]:
def process_video_to_frames(input_file, output_dir, x, y):

    fvs = FileVideoStream(input_file).start()
    time.sleep(1.0)
    
    frame_count = 0
    while fvs.more():
        frame_count += 1
        # grab the frame from the threaded video file stream, resize
        # it, and convert it to grayscale (while still retaining 3
        # channels)
        frame = fvs.read()
        frame = cv2.resize(frame, (x,y))
        
        # save frame to directory
        cv2.imwrite( str(output_dir) + f"\\img_{frame_count:05d}.jpg", frame)
        
        cv2.waitKey(1)
        
    cv2.destroyAllWindows()
    fvs.stop()

In [30]:
def populate_frames(df, target_directory, gesture_names):
    """populate each of the video subfolders with individual frames from the videos"""
    def per_row(row):
        gesture_dir = Path(target_directory).joinpath(row.gesture_name)
        output_video_dir = gesture_dir.joinpath(row.gesture_video_foldername)
        input_video_dir = row.dir_name
        print(f"Processing {row.gesture_video_foldername}")
        process_video_to_frames(input_video_dir, output_video_dir, 320, 240)
    df.apply(per_row, axis=1)

In [33]:
populate_frames(df, TARGET_DIR, GESTURE_NAMES)

Processing Will_B_CLOCKWISE_00001
Processing Will_B_CLOCKWISE_00002
Processing Will_B_CLOCKWISE_00003
Processing Will_B_CLOCKWISE_00004
Processing Will_B_CLOCKWISE_00005
Processing Yen_P_CLOCKWISE_00006
Processing Yen_P_CLOCKWISE_00007
Processing Yen_P_CLOCKWISE_00008
Processing Yen_P_CLOCKWISE_00009
Processing Yen_P_CLOCKWISE_00010
Processing Will_B_COUNTERCLOCKWISE_00001
Processing Will_B_COUNTERCLOCKWISE_00002
Processing Will_B_COUNTERCLOCKWISE_00003
Processing Will_B_COUNTERCLOCKWISE_00004
Processing Will_B_COUNTERCLOCKWISE_00005
Processing Yen_P_COUNTERCLOCKWISE_00006
Processing Yen_P_COUNTERCLOCKWISE_00007
Processing Yen_P_COUNTERCLOCKWISE_00008
Processing Yen_P_COUNTERCLOCKWISE_00009
Processing Yen_P_COUNTERCLOCKWISE_00010
Processing Will_B_DOWN_00001
Processing Will_B_DOWN_00002
Processing Will_B_DOWN_00003
Processing Will_B_DOWN_00004
Processing Will_B_DOWN_00005
Processing Will_B_DOWN_00006
Processing Will_B_DOWN_00007
Processing Will_B_DOWN_00008
Processing Will_B_DOWN_00009