In [1]:
import pandas as pd
from tqdm import tqdm
import numpy as np
import random
import pickle

In [2]:
df = pd.read_csv("features.csv")

In [3]:
df

In [4]:
body_parts = [ "Nose","Neck","RShoulder","RElbow","RWrist","LShoulder","LElbow","LWrist","MidHip","RHip","RKnee","RAnkle","LHip","LKnee","LAnkle","REye","LEye","REar","LEar","LBigToe","LSmallToe","LHeel","RBigToe","RSmallToe","RHeel"]
all_features = ['video_id', 'frame'] + [f"{body_part}_{elem}" for body_part in body_parts for elem in ['x', 'y', 'prob']]

### Original Dataset

In [5]:
used_body_parts = ["LAnkle", "RAnkle", "LKnee", "RKnee", "LHip", "RHip", "LBigToe", "RBigToe"]
used_features = [f"{body_part}_{elem}" for body_part in used_body_parts for elem in ['x', 'y']]

In [6]:
df_subset = df[['video_id', 'frame'] + used_features]
df_subset

In [7]:
video_lengths = df_subset.groupby("video_id").count()['frame']

In [8]:
video_lengths_filtered = video_lengths[video_lengths >= 500]

In [9]:
filtered_video_ids = video_lengths_filtered.index.tolist()

In [10]:
filtered_df_subset = df_subset[df_subset['video_id'].isin(filtered_video_ids)]
filtered_df_subset

In [89]:
filtered_df_subset.to_csv("original_500.csv", index=False)

In [11]:
df = filtered_df_subset

In [12]:
df.loc[:, "frame_nr"] = df.loc[:, "frame"].apply(lambda x: int(x.split("_")[1]))
df

In [17]:
video_ids = list(set(df['video_id'].tolist()))
len(video_ids)

In [45]:
video_X = []
SHIFT_BY = 100
for video_id in tqdm(video_ids):
    curr_df = df[df['video_id'] == video_id].sort_values(by='frame_nr')
    nr_frames = len(curr_df)
    for i in range(0, nr_frames-500, SHIFT_BY):
        curr_chunk = curr_df.iloc[i:i+500, :]
        x = curr_chunk[used_features].to_numpy()
        if len(x) != 500:
            print(video_id)
        video_X.append([str(video_id), x])

In [46]:
len(video_X)

In [47]:
with open(f"../data/X_{SHIFT_BY}.pkl", 'wb') as file:
    pickle.dump(video_X, file)

### Match predicted values to features

In [258]:
video_id_predicted_values = pd.read_csv("video_id_predicted_values.csv")
# video_id_predicted_values['video_id'] = pd.to_numeric(video_id_predicted_values['video_id'], downcast='signed', errors='coerce')

In [291]:
filtered_predicted_values = video_id_predicted_values[video_id_predicted_values['video_id'].isin(video_ids)]
filtered_predicted_values

In [206]:
common_video_ids_float = filtered_predicted_values.groupby('video_id').count().index.tolist()

In [213]:
common_video_ids_str = [str(int(v_id)) for v_id in common_video_ids_float]

In [217]:
video_ids = common_video_ids_str

In [223]:
random.shuffle(video_ids)

# Calculate the split sizes
total_videos = len(video_ids)
train_size = int(0.8 * total_videos)
val_size = int(0.1 * total_videos)
test_size = total_videos - train_size - val_size  # Ensuring all videos are included

# Split the list
train_videos = video_ids[:train_size]
val_videos = video_ids[train_size:train_size + val_size]
test_videos = video_ids[train_size + val_size:]

# Print the splits
print(f'Total videos: {total_videos}')
print(f'Train videos {len(train_videos)}')
print(f'Validation videos {len(val_videos)}')
print(f'Test videos {len(test_videos)}')

In [224]:
split = {"train": train_videos,
        "validation": val_videos,
        "test": test_videos}

In [225]:
with open(f"../data/split.pkl", 'wb') as file:
    pickle.dump(split, file)

In [261]:
video_ids = [str(v_id) for v_id in video_ids]

In [295]:
filtered_predicted_values.to_csv("y.csv", index=False)

In [267]:
filtered_predicted_values

In [271]:
filtered_predicted_values[filtered_predicted_values["video_id"] == '10312701']['speed'].tolist()

In [273]:
filt = filtered_predicted_values.groupby('video_id').count()

In [284]:
good_video_ids = filt[filt['side'] == 2].index.tolist()

In [285]:
len(filtered_predicted_values)

In [292]:
filtered_predicted_values = filtered_predicted_values[filtered_predicted_values['video_id'].isin(good_video_ids)]

In [293]:
len(filtered_predicted_values)

In [294]:
filtered_predicted_values

In [3]:
df = pd.read_csv("../data/y.csv")

In [4]:
df

Unnamed: 0,video_id,side,KneeFlex_maxExtension,speed,cadence,steplen
0,10312701,R,23.962333,0.915005,0.994667,0.394269
1,10312701,L,34.529000,0.897019,0.959000,0.512130
2,10772801,R,43.406333,0.115560,0.839667,-0.069110
3,10772801,L,47.368000,0.139068,0.826333,0.206712
4,11221801,R,9.374398,1.033810,1.024544,0.542080
...,...,...,...,...,...,...
5713,16828501,L,7.087655,0.299313,0.625468,0.208293
5714,18371401,R,9.921491,1.351350,1.090158,0.602977
5715,18371401,L,9.046386,1.343939,1.101355,0.637646
5716,17286301,R,7.365353,1.143967,1.159751,0.499088
