In [1]:
import pandas as pd # https://pandas.pydata.org/
import os # https://docs.python.org/3/library/os.html
import warnings # https://docs.python.org/3/library/warnings.html
import glob # https://docs.python.org/3/library/glob.html
import numpy as np # https://numpy.org/
import ast # https://docs.python.org/3/library/ast.html

# Used to get rid of repeated depricated warnings
warnings.filterwarnings("ignore")

## Requirements to process the video's body

1. Have a empty folder location to output temporary keyframe json files.  Make sure this file is empty as everything inside will be erased per video cycle. There could be approximately 60 files per second of video.

2. Please ensure your video directory is also inputted into "video_directory"

3. Depending on if you are using a Windows machine, macOS machine, or linux machine you may need to change the command to run OpenPose   ( !Powershell.exe -Command bin\OpenPoseDemo.exe --video.....). If on windows you would just need to change your output and video directories, else please refer to the github demo doc for OpenPose: https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/01_demo.md

4. IMPORTANT: Please place this file inside the OpenPose software folder when running. If it is not inside the commands wont work properly.  Simply drag this file into the folder. 

## OpenPose Keypoint Mapping
 
 0-Nose, 1-Neck, 2-RShoulder, 3-RElbow , 4-RWrist , 5-LShoulder , 6-LElbow, 7-LWrist, \
 8-MidHip, 9-RHip, 10-RKnee, 11-RAnkle, 12-LHip, 13-LKnee, 14-LAnkle,15-REye, 16-LEye, \
 17-REar, 18-LEar, 19-LBigToe, 20-LSmallToe, 21-LHeel, 22-RBigToe, 23-RSmallToe, 24-RHeel

In [2]:
# Dataframe that will store the final annotations
# columns 0 to 24 represents a body part.
master_df = pd.DataFrame(columns =['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22', '23','24'])

# Array used for array/dataframe iteration, stores all 24 column names.
col_list = ['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22', '23','24']

# Output directory, please fill in your location
output = "C:/Users/Arda/Desktop/openpose/output"
video_directory = "C:/Users/Arda/Desktop/openpose/Dataset_CMPT419/Turkish_Samples"

# Variable used to keep track of the number of videos processed.
videocount = 0

# Loop through each video inside a directory that will be processed through OpenPose
# Please input your video folder that you want processed
for videofile in os.listdir(video_directory):
    
    # Remove any keyframe json files in the output directory, please fill in empty folder output location.
    for f in os.listdir(output):
        os.remove(output+'/'+f)
    
    # initialize variables for dataframe rows
    row_num = 0 + videocount
    row_num_limit = 0 + videocount
    
    # This commands starts the OpenPose body processing program for your videos 
    # Please fill in your video data location and empty folder output location
    !Powershell.exe -Command bin\OpenPoseDemo.exe --video {video_directory}/{videofile} --number_people_max 1 --write_json output
    
    # After running through OpenPose, read through output directory
    for file in os.listdir(output):
        # store filename to make sure its the required json file
        filename = os.fsdecode(file)
        
        # increase row limit for dataframe
        row_num_limit = row_num_limit + 1
        
        # if its a json file continue
        if filename.endswith(".json"): 
            
            # store json output into a dataframe
            df = pd.read_json(output +'/'+filename)
            
            # story the 25 keypoints data into an array
            results = df['people'].values.tolist()
            
            # if 25 keypoint data is empty set the 25 points to 0, no person was found in the video.
            if results == []:
                results = [{'person_id': [-1],'pose_keypoints_2d': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],'face_keypoints_2d': [],'hand_left_keypoints_2d': [],'hand_right_keypoints_2d': [],'pose_keypoints_3d': [],'face_keypoints_3d': [],'hand_left_keypoints_3d': [],'hand_right_keypoints_3d': []}]
            
            # Turn string data into floats
            keypoints = ast.literal_eval(str(results[0]))
            
            # split the json data into the 25 subarrays, one for each body part.
            arraya = [0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,9,10,10,10,11,11,11,12,12,12,13,13,13,14,14,14,15,15,15,16,16,16,17,17,17,18,18,18,19,19,19,20,20,20,21,21,21,22,22,22,23,23,23,24,24,24]
            arrayb = keypoints.get("pose_keypoints_2d")
            
            values, indices, counts = np.unique(arraya, return_counts=True, return_index=True)
            subarrays=np.split(arrayb,indices)
            
            # store the subarrays into a new dataframe.
            df_2 = pd.DataFrame({"0":[subarrays[1]], "1":[subarrays[2]], "2":[subarrays[3]], "3":[subarrays[4]], "4":[subarrays[5]], "5":[subarrays[6]], "6":[subarrays[7]], "7":[subarrays[8]], "8":[subarrays[9]],"9":[subarrays[10]], "10":[subarrays[11]], "11":[subarrays[12]], "12":[subarrays[13]], "13":[subarrays[14]], "14":[subarrays[15]],"15":[subarrays[16]],"16":[subarrays[17]],"17":[subarrays[18]],"18":[subarrays[19]],"19":[subarrays[20]],"20":[subarrays[21]],"21":[subarrays[22]],"22":[subarrays[23]],"23":[subarrays[24]],"24":[subarrays[25]]})
            
            # Save the video's name into the dataframe column.
            df_2['video'] = videofile
            
            # Store the fear subcategory into the dataframe
            df_2['Subcategory_2'] = df_2['video'].str[:4]
            df_2['Subcategory'] = df_2['Subcategory_2'].str[-2:]
            
            # Get country of origin
            string_check = df_2['Subcategory_2'].str[:2]
            
            if string_check[0] == "TR":
                df_2['Label'] = "Turkey"
            elif string_check[0] == "CA":
                df_2['Label'] = "Canada"
            elif string_check[0] == "C_":
                df_2['Label'] = "China"
            elif string_check[0] == "I_":
                df_2['Label'] = "India"
            else
                df_2['Label'] = "None"

            
            # Remove the unwanted data frame our annotations (keep the first two x-y coordinate numbers)
            temp0 = df_2['0'][0][:2]
            temp1 = df_2['1'][0][:2]
            temp2 = df_2['2'][0][:2]
            temp3 = df_2['3'][0][:2]
            temp4 = df_2['4'][0][:2]
            temp5 = df_2['5'][0][:2]
            temp6 = df_2['6'][0][:2]
            temp7 = df_2['7'][0][:2]
            temp8 = df_2['8'][0][:2]
            temp9 = df_2['9'][0][:2]
            temp10 = df_2['10'][0][:2]
            temp11 = df_2['11'][0][:2]
            temp12 = df_2['12'][0][:2]
            temp13 = df_2['13'][0][:2]
            temp14 = df_2['14'][0][:2]
            temp15 = df_2['15'][0][:2]
            temp16 = df_2['16'][0][:2]
            temp17 = df_2['17'][0][:2]
            temp18 = df_2['18'][0][:2]
            temp19 = df_2['19'][0][:2]
            temp20 = df_2['20'][0][:2]
            temp21 = df_2['21'][0][:2]
            temp22 = df_2['22'][0][:2]
            temp23 = df_2['23'][0][:2]
            temp24 = df_2['24'][0][:2]
            
            # and then re-set the annotation columns with our desired data.
            df_2['0'][0] = temp0
            df_2['1'][0] = temp1
            df_2['2'][0] = temp2
            df_2['3'][0] = temp3
            df_2['4'][0] = temp4
            df_2['5'][0] = temp5
            df_2['6'][0] = temp6
            df_2['7'][0] = temp7
            df_2['8'][0] = temp8
            df_2['9'][0] = temp9
            df_2['10'][0] = temp10
            df_2['11'][0] = temp11
            df_2['12'][0] = temp12
            df_2['13'][0] = temp13
            df_2['14'][0] = temp14
            df_2['15'][0] = temp15
            df_2['16'][0] = temp16
            df_2['17'][0] = temp17
            df_2['18'][0] = temp18
            df_2['19'][0] = temp19
            df_2['20'][0] = temp20
            df_2['21'][0] = temp21
            df_2['22'][0] = temp22
            df_2['23'][0] = temp23
            df_2['24'][0] = temp24
            
            # store the processed frame file into the master datafrmae
            master_df = master_df.append(df_2, ignore_index=True)
    
    # For each bodypart (columns 0 to 24)
    for x in range(0, 25):
        row_num = 0 + videocount
        
        # put all of videos data rows into 1 row (1 row is now 1 video's data)
        while(row_num <= row_num_limit-2):
            row_num = row_num + 1  
            master_df[col_list[x]][videocount] =  np.concatenate((master_df[col_list[x]][videocount], master_df[col_list[x]][row_num]))
                    
    
    # For each bodypart (columns 0 to 24) fix data into proper format for csv and classifying (comma delimited) 
    for y in range(0, 25):   
        res = []
        idx = 0
        test = master_df[col_list[y]][videocount]
        
        while(idx < (row_num_limit*2)):
            try:
                res.append([test[idx], test[idx+1]]) 
            except IndexError:
                pass

            idx = idx + 2 

        master_df[col_list[y]][videocount] = res
    
    # Drop all unneeded rows (all except final processed videos)
    master_df = master_df.drop(master_df.index.to_list()[videocount+1:], axis=0) 
    
    #increment video count 
    videocount = videocount + 1
    

Starting OpenPose demo...
Configuring OpenPose...
Starting thread(s)...
We have introduced an additional boost in accuracy in the CUDA version of about 0.2% with respect to the CPU/OpenCL versions. We will not port this to CPU given the considerable slow down in speed it would add to it. Nevertheless, this accuracy boost is almost insignificant so the CPU/OpenCL versions can be safely used.
Empty frame detected, frame number 54 of 67. In C:\openpose_cpu\src\openpose\producer\producer.cpp:op::Producer::checkFrameIntegrity():290
Empty frame detected, frame number 54 of 67. In C:\openpose_cpu\src\openpose\producer\producer.cpp:op::Producer::checkFrameIntegrity():290
Empty frame detected, frame number 54 of 67. In C:\openpose_cpu\src\openpose\producer\producer.cpp:op::Producer::checkFrameIntegrity():290
OpenPose demo successfully finished. Total time: 94.822176 seconds.
Starting OpenPose demo...
Configuring OpenPose...
Starting thread(s)...
We have introduced an additional boost in accurac

In [8]:
# Store fear sub-categories from videos
for num in range (0, 5):
    temp = master_df['Subcategory'][num]
    if (temp == "_N" or temp == "Ne"):
        master_df['Subcategory'][num] = "Nervous"
    elif (temp == "_T" or temp == "Th"):
        master_df['Subcategory'][num] = "Threatened"
    elif (temp == "_S" or temp == "Su"):
        master_df['Subcategory'][num] = "Surprised" 
    else
        master_df['Subcategory'][num] = "None" 
    
#Drop unneeded column
master_df = master_df.drop(columns='Subcategory_2')

In [7]:
# Display dataframe for confirmation that it worked
master_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,video,Subcategory_2,Subcategory,Label
0,"[[566.803, 581.802], [560.845, 578.827], [563....","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",...,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",CA_Nervous_17.mp4,CA_N,Nervous,Canada
1,"[[689.265, 444.996], [689.283, 445.001], [689....","[[633.308, 753.961], [633.317, 753.969], [633....","[[374.269, 798.08], [374.277, 798.102], [374.2...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[906.985, 736.367], [907.024, 739.243], [906....","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",...,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",CA_Surprised_15.mp4,CA_S,Surprised,Canada
2,"[[1071.86, 474.4], [1071.85, 477.271], [1071.9...","[[1230.77, 877.568], [1227.8, 880.555], [1224....","[[792.239, 792.267], [792.242, 792.218], [798....","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[1678.19, 927.608], [1678.15, 924.667], [1675...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",...,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",CA_Threatened_18.mp4,CA_T,Threatened,Canada
3,"[[648.045, 465.574], [648.096, 465.479], [648....","[[547.943, 812.833], [547.899, 809.861], [547....","[[206.511, 851.077], [209.437, 845.222], [209....","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[880.545, 795.178], [880.545, 786.344], [886....","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",...,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",I_Nervous_17.mp4,I_Ne,Nervous,India
4,"[[827.458, 318.482], [824.544, 321.243], [824....","[[792.302, 503.748], [792.193, 503.806], [792....","[[592.136, 486.065], [589.136, 486.169], [589....","[[506.773, 798.142], [500.889, 804.007], [500....","[[474.307, 948.29], [471.47, 948.198], [471.46...","[[1001.25, 512.554], [1001.18, 512.616], [1001...","[[1115.91, 801.006], [1113.12, 803.981], [1115...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",...,"[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...",I_Threatened_17.mp4,I_Th,Threatened,India


In [9]:
# Store annotated body dataset into csv file.
master_df.to_csv('body_processed.csv')          