In [1]:
import datetime
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.decomposition import PCA

# ===== Data ===== #
DFS = {
    'lag_cp' : pd.read_csv('data/bem_comportadas_laguardia.csv'),
    'lag_li' : pd.read_csv('data/livres_laguardia.csv'),
    'van_cp' : pd.read_csv('data/bem_comportadas_vanessa.csv'),
    'van_li' : pd.read_csv('data/livres_vanessa.csv'),
    'bru_cp' : pd.read_csv('data/bem_comportadas_bruno.csv'),
    'bru_li' : pd.read_csv('data/livres_bruno.csv'),
    'alm_cp' : pd.read_csv('data/bem_comportadas_almir.csv'),
}

---
## Identificando o momento de cada pose
---

In [None]:
for key, df in DFS.items():
    # add a fake column for timestamp
    df['timestamp'] = pd.date_range(start='1/1/2023', periods=df.shape[0], freq='500L')

In [13]:
# Transform the data into a df of poses and durations
DFS_POSES_DURATIONS = {}

for key, df in DFS.items():
    this_df_dict = {}
    this_pose = -1
    pose_count = 0
    # iterate over the rows of the dataframe
    for index, row in df.iterrows():
        # get the pose
        pose = row['pose']
        # if the pose is different from the previous one
        if pose != this_pose:
            # increase the pose count
            pose_count += 1
            # update the pose
            this_pose = pose
            # add the pose to the dict
            this_df_dict[f'p:{pose},c:{pose_count}'] = {
                'pose' : pose,
                'start' : row['timestamp'],
                'end' : row['timestamp'],
            }
        else:
            # update the end of the pose
            this_df_dict[f'p:{pose},c:{pose_count}']['end'] = row['timestamp']
    # create a dataframe from the dict
    dfs = pd.DataFrame.from_dict(this_df_dict, orient='index')
    # save the dataframe
    DFS_POSES_DURATIONS[key] = dfs

In [56]:
DFS_POSES_DURATIONS['bru_li']

Unnamed: 0,pose,start,end
"p:2,c:1",2,2023-01-01 00:00:00,2023-01-01 00:03:03.500
"p:1,c:2",1,2023-01-01 00:03:04,2023-01-01 00:06:28.500
"p:2,c:3",2,2023-01-01 00:06:29,2023-01-01 00:06:31.500
"p:3,c:4",3,2023-01-01 00:06:32,2023-01-01 00:09:36.500
"p:1,c:5",1,2023-01-01 00:09:37,2023-01-01 00:09:38.500
"p:7,c:6",7,2023-01-01 00:09:39,2023-01-01 00:13:24.500
"p:0,c:7",0,2023-01-01 00:13:25,2023-01-01 00:13:25.500
"p:1,c:8",1,2023-01-01 00:13:26,2023-01-01 00:13:26.500
"p:8,c:9",8,2023-01-01 00:13:27,2023-01-01 00:16:06.500
"p:12,c:10",12,2023-01-01 00:16:07,2023-01-01 00:16:08.500


In [55]:
px.timeline(DFS_POSES_DURATIONS['bru_li'],
            x_start='start',
            x_end='end',
            y='pose',
            color='pose')

---
## Comparando dois momentos diferentes da mesma pose
---

In [21]:
DFS['van_li']

Unnamed: 0,p00,p01,p02,p03,p04,p05,p06,p07,p08,p09,p10,p11,pose,timestamp
0,0,0,0,0,953,1203,228,4018,778,484,237,904,3,2023-01-01 00:00:00.000
1,0,0,0,0,989,1227,243,4019,875,538,310,947,3,2023-01-01 00:00:00.500
2,0,0,0,0,986,1210,242,4028,863,532,303,940,3,2023-01-01 00:00:01.000
3,0,0,0,0,989,1232,245,4019,889,556,302,955,3,2023-01-01 00:00:01.500
4,0,0,0,0,1002,1233,246,4029,891,550,336,960,3,2023-01-01 00:00:02.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3756,3089,49,0,0,701,697,482,4009,487,387,652,1009,7,2023-01-01 00:31:18.000
3757,2999,267,0,0,623,584,486,4018,515,366,658,995,7,2023-01-01 00:31:18.500
3758,3067,0,0,0,594,558,477,4007,461,388,634,1009,7,2023-01-01 00:31:19.000
3759,3045,0,0,0,583,578,455,4016,475,365,622,1022,7,2023-01-01 00:31:19.500


In [70]:
# primeira ocorrencia de pose 3
pose3c1 = DFS['bru_li'][
    (DFS['bru_li']['timestamp'] > DFS_POSES_DURATIONS['bru_li']['start']['p:7,c:6']) &
    (DFS['bru_li']['timestamp'] < DFS_POSES_DURATIONS['bru_li']['end']['p:7,c:6'])
]
pose3c1 = pose3c1.copy()

# segunda ocorrencia de pose 3
pose3c2 = DFS['bru_li'][
    (DFS['bru_li']['timestamp'] > DFS_POSES_DURATIONS['bru_li']['start']['p:7,c:13']) &
    (DFS['bru_li']['timestamp'] < DFS_POSES_DURATIONS['bru_li']['end']['p:7,c:13'])
]
pose3c2 = pose3c2.copy()

In [71]:
# junta as duas ocorrencias e cria uma flag para cada uma
pose3c1['flag'] = 1
pose3c2['flag'] = 2
df_pose3 = pd.concat([pose3c1, pose3c2])

df_pose3

Unnamed: 0,p00,p01,p02,p03,p04,p05,p06,p07,p08,p09,p10,p11,pose,timestamp,flag
1159,1168,0,1915,1998,0,0,327,4012,895,220,299,875,7,2023-01-01 00:09:39.500,1
1160,1359,0,1942,2080,0,0,409,4036,991,238,360,676,7,2023-01-01 00:09:40.000,1
1161,1342,0,1902,2061,0,0,397,4024,968,224,281,573,7,2023-01-01 00:09:40.500,1
1162,1219,0,1964,2191,0,0,423,4034,1043,181,386,525,7,2023-01-01 00:09:41.000,1
1163,1361,0,1942,2098,0,0,410,4026,997,247,390,651,7,2023-01-01 00:09:41.500,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2684,1887,0,1868,1838,0,0,792,3958,1223,522,0,975,7,2023-01-01 00:22:22.000,2
2685,1822,0,2022,1950,0,0,790,3948,1221,520,0,974,7,2023-01-01 00:22:22.500,2
2686,2035,0,1966,1137,0,0,794,3958,1203,526,0,979,7,2023-01-01 00:22:23.000,2
2687,1923,0,2002,1648,0,0,792,3949,1220,529,0,978,7,2023-01-01 00:22:23.500,2


In [72]:
df_pose3['flag'].value_counts()

1    450
2    344
Name: flag, dtype: int64

In [73]:
# separa o dataframa em features pro PCA e target
X = df_pose3[df.columns[:-2]]
X

Unnamed: 0,p00,p01,p02,p03,p04,p05,p06,p07,p08,p09,p10,p11
1159,1168,0,1915,1998,0,0,327,4012,895,220,299,875
1160,1359,0,1942,2080,0,0,409,4036,991,238,360,676
1161,1342,0,1902,2061,0,0,397,4024,968,224,281,573
1162,1219,0,1964,2191,0,0,423,4034,1043,181,386,525
1163,1361,0,1942,2098,0,0,410,4026,997,247,390,651
...,...,...,...,...,...,...,...,...,...,...,...,...
2684,1887,0,1868,1838,0,0,792,3958,1223,522,0,975
2685,1822,0,2022,1950,0,0,790,3948,1221,520,0,974
2686,2035,0,1966,1137,0,0,794,3958,1203,526,0,979
2687,1923,0,2002,1648,0,0,792,3949,1220,529,0,978


In [74]:
# aplica o PCA
pca = PCA(n_components=2)
pca.fit(X)
X_pca = pca.transform(X)
X_pca

array([[  89.78308604,  729.6856085 ],
       [  13.81627583,  592.28584159],
       [  95.62828097,  614.32088089],
       ...,
       [-332.54946947,  -71.89107527],
       [-238.02343852,   70.18253184],
       [1836.21136953,  644.41986535]])

In [77]:
# adiciona as componentes principais ao dataframe
df_pose3['pc1'] = X_pca[:,0]
df_pose3['pc2'] = X_pca[:,1]

# plota as componentes principais
fig = px.scatter(df_pose3, x='pc1', y='pc2', color='flag',
                 title='PCA das poses 7 de Bruno')
fig.show()