In [1]:
import pandas as pd
import numpy as np
import os
from matplotlib import pyplot as plt

In [2]:
path = "dataset_output/libras_ufop/raw/"
files = [path + i for i in os.listdir(path)]
files

['dataset_output/libras_ufop/raw/libras_ufop_p1.csv',
 'dataset_output/libras_ufop/raw/libras_ufop_p2.csv',
 'dataset_output/libras_ufop/raw/libras_ufop_p3.csv',
 'dataset_output/libras_ufop/raw/libras_ufop_p4.csv',
 'dataset_output/libras_ufop/raw/libras_ufop_p5.csv']

In [12]:
frames_threshold = 15

In [13]:
labels = {}
with open("../datasets/LIBRAS-UFOP DATASET/labels.txt", "r") as f:
    for line in f.readlines():
        split = line.replace('\n', "").split(" ")
        label_name = split[0]
        label_values = split[1:]
        if label_name in labels:
            print("Duplicated label", label_name)
        labels[label_name] = label_values

In [14]:
dfs = [pd.read_csv(f) for f in files]

In [15]:
i = 0
for df in dfs:
    hand_columns = [i for i in df.columns if i.startswith("hand_")]
    face_columns = [i for i in df.columns if i.startswith("face_")]
    df["missing_hand"] = eval(" & ".join([f"(df['{h}'] == 0)" for h in hand_columns]))
    df["missing_face"] = eval(" & ".join([f"(df['{h}'] == 0)" for h in face_columns]))

In [16]:
categories_loss = []
for df in dfs:
    for category in df["category"].unique():
        missing = len(df[(df["category"] == category) & (df["missing_hand"] == True)])
        categories_loss.append((category, missing,))

In [17]:
categories_loss.sort(key=lambda i: i[1])

In [19]:
df

Unnamed: 0.1,Unnamed: 0,category,video_name,frame,hand_0_0_x,hand_0_0_y,hand_0_0_z,hand_0_1_x,hand_0_1_y,hand_0_1_z,...,pose_22_y,pose_22_z,pose_23_x,pose_23_y,pose_23_z,pose_24_x,pose_24_y,pose_24_z,missing_hand,missing_face
0,0,0,p1_c1_s1,0,0.554078,0.574080,0.489432,0.550098,0.584693,0.505750,...,0.968988,0.778580,0.433071,0.966280,0.776645,0.449500,0.928264,0.639326,False,False
1,1,0,p1_c1_s1,1,0.554904,0.572984,0.473265,0.551102,0.584505,0.501587,...,0.969092,0.746218,0.433046,0.966372,0.785361,0.449528,0.930994,0.660776,False,False
2,2,0,p1_c1_s1,2,0.556629,0.570725,0.445739,0.552282,0.581872,0.508635,...,0.969024,0.767361,0.433070,0.966301,0.779206,0.449499,0.928286,0.670544,False,False
3,3,0,p1_c1_s1,3,0.557355,0.569909,0.373626,0.553290,0.578944,0.237545,...,0.968978,0.778955,0.433054,0.966262,0.780360,0.449515,0.928262,0.661416,False,False
4,4,0,p1_c1_s1,4,0.555309,0.566391,0.098960,0.554970,0.578145,0.093302,...,0.969069,0.741455,0.433047,0.966342,0.782431,0.449522,0.928329,0.659935,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51766,51766,0,p1_c4_s9,760,0.579875,0.582410,0.019605,0.578310,0.593680,0.046439,...,0.944501,0.850119,0.469803,0.936393,0.742004,0.498344,0.911964,0.768469,False,False
51767,51767,0,p1_c4_s9,761,0.583663,0.582002,0.021361,0.587518,0.594852,0.025304,...,0.944492,0.847025,0.469824,0.936384,0.738875,0.498321,0.909305,0.773304,False,False
51768,51768,0,p1_c4_s9,762,0.581440,0.601988,0.020364,0.579868,0.609112,0.034156,...,0.944582,0.873261,0.469832,0.939077,0.746043,0.498350,0.911998,0.776382,False,False
51769,51769,0,p1_c4_s9,763,0.583634,0.582956,0.007193,0.589319,0.586746,0.016794,...,0.944570,0.877324,0.469812,0.939072,0.754466,0.498329,0.914656,0.766853,False,False


In [22]:
def get_person(video):
    split = video.split("_")
    person = int(split[0].replace("p", ""))
    return person

In [23]:
def get_category(label):
    return int(label.split(":")[1])

In [44]:
def get_frame(label, index):
    return int(label.split(":")[0].split("-")[index])

In [46]:
def get_start_frame(label, prev_label):
    if prev_label is None:
        last_frame = 0
    else:
        last_frame = get_frame(prev_label, 1)
    start_frame = get_frame(label, 0)
    end_frame = get_frame(label, 1)
    if last_frame >= start_frame - frames_threshold:
        return start_frame
    else:
        return start_frame - frames_threshold

In [47]:
def get_end_frame(label, next_label):
    if next_label is None:
        first_frame = 0
    else:
        first_frame = get_frame(next_label, 0)
    start_frame = get_frame(label, 0)
    end_frame = get_frame(label, 1)
    if first_frame <= end_frame + frames_threshold:
        return end_frame
    else:
        return end_frame + frames_threshold

In [66]:
# All Datasets
df_best = pd.DataFrame(columns=df.columns)
df_count = 0
for df in dfs:
    df_count += 1
    print(f"Processing df {df_count} of {len(dfs)}")
    videos = df["video_name"].unique()
    for video in videos:
        person = get_person(video)
        df_video = df[df["video_name"] == video]
        video_labels = labels[video]
        
        for i in range(0, len(video_labels)):
            label = video_labels[i]
            category = get_category(label)
            
            if i == 0:
                prev_label = None
            else:
                prev_label = video_labels[i-1]
            if i == len(video_labels) - 1:
                next_label = None
            else:
                next_label = video_labels[i+1]
                
            start_frame = get_start_frame(label, prev_label)
            end_frame = get_end_frame(label, next_label)
            
            df_new_video = df_video[(df_video["frame"] >= start_frame) & (df_video["frame"] <= end_frame)].copy()
            df_new_video["video_name"] = f"{video}_{i}"
            df_new_video["frame"] = np.arange(len(df_new_video))
            df_new_video["person"] = person
            df_new_video["category"] = category    
            df_best = pd.concat([df_best, df_new_video])

Processing df 1 of 5
Processing df 2 of 5
Processing df 3 of 5
Processing df 4 of 5
Processing df 5 of 5


In [67]:
df_best

Unnamed: 0.1,Unnamed: 0,category,video_name,frame,hand_0_0_x,hand_0_0_y,hand_0_0_z,hand_0_1_x,hand_0_1_y,hand_0_1_z,...,pose_22_z,pose_23_x,pose_23_y,pose_23_z,pose_24_x,pose_24_y,pose_24_z,missing_hand,missing_face,person
16,16,1,p1_c1_s1_0,0,0.555807,0.567531,0.394245,0.549136,0.575092,0.414851,...,0.753184,0.433065,0.966341,0.781661,0.449489,0.928342,0.651774,False,False,1.0
17,17,1,p1_c1_s1_0,1,0.555086,0.561309,0.396433,0.547985,0.570326,0.402954,...,0.751818,0.433051,0.966384,0.792620,0.449503,0.930953,0.673173,False,False,1.0
18,18,1,p1_c1_s1_0,2,0.552441,0.554763,0.376937,0.544950,0.563844,0.328017,...,0.721404,0.431155,0.966412,0.771990,0.449501,0.928337,0.667928,False,False,1.0
19,19,1,p1_c1_s1_0,3,0.540484,0.558278,0.014662,0.539830,0.565252,0.043459,...,0.745815,0.433046,0.966367,0.777363,0.449523,0.928312,0.661847,False,False,1.0
20,20,1,p1_c1_s1_0,4,0.539812,0.537244,0.100900,0.534094,0.548904,0.083215,...,0.697552,0.433064,0.966390,0.749232,0.449475,0.930948,0.649300,False,False,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50134,50134,47,p5_c4_s9_10,36,0.511064,0.446786,0.026157,0.538449,0.420391,0.030119,...,0.752672,0.424943,0.955436,0.677236,0.439216,0.898344,0.543393,False,False,5.0
50135,50135,47,p5_c4_s9_10,37,0.539804,0.431748,0.028773,0.541779,0.429114,0.030089,...,0.725757,0.424950,0.950097,0.681652,0.437225,0.901135,0.545763,False,False,5.0
50136,50136,47,p5_c4_s9_10,38,0.542277,0.441707,0.030453,0.542592,0.434997,0.080483,...,0.722870,0.426986,0.952731,0.657443,0.435220,0.898388,0.555769,False,False,5.0
50137,50137,47,p5_c4_s9_10,39,0.540754,0.450658,0.036463,0.534091,0.436443,0.065288,...,0.716687,0.424990,0.952775,0.658890,0.437217,0.901061,0.537380,False,False,5.0


In [68]:
len(df_best)

202489

In [69]:
len(df_best[df_best["missing_hand"] == True])

3

In [70]:
len(df_best[df_best["missing_hand"] == True]) / len(df_best)

1.4815619613904953e-05

In [71]:
len(df_best[df_best["missing_face"] == True])

0

In [72]:
len(df_best[df_best["missing_face"] == True]) / len(df_best)

0.0

In [73]:
df_best.drop("Unnamed: 0", axis=1).to_csv("dataset_output/libras_ufop/libras_ufop_openpose.csv", index=False)