In [7]:
import sys, os
from pathlib import Path
sys.path.append(os.path.abspath('./..'))
import utils

import numpy as np
import pandas as pd

gdrive = Path('/Users/psr/Library/CloudStorage/GoogleDrive-paru@stanford.edu')
# datadir = gdrive / 'My Drive/NMBL Lab/OpenCap for NMD biomarkers/data'
datadir = gdrive / 'Shared drives/OpencapNMD/Data'

df_session_now = pd.read_excel(datadir / 'session_info.xlsx')
df_trial_now = pd.read_excel(datadir / 'trial_info.xlsx')
df_part_now = pd.read_excel(datadir / 'participant_info.xlsx')

df_part_now.date = pd.to_datetime(df_part_now.date)
df_part_now.date = df_part_now.date.dt.strftime('%Y-%m-%d')


In [10]:
sessions = utils.getUserSessions()


In [11]:
part_labels = ['type', 'clinician', 'weight', 'height', 'age', 'sex',
               'gender', 'data_sharing', 'orthotics', 'clock_a', 'brooke',
               'time_10mwt', 'time_10mwrt', 'time_tug_line', 'time_tug_cone',
               'clock_b', 'time_5xsts', 'clock_c', 'time_stairs_up', 'notes_p']


In [12]:
def get_pid(s):
    name = s['name'].lower()
    if len(name[1:]) == 3 and name[1:].isnumeric() and name[0]=='p':
        return name
    return ''

meta = [s for s in sessions if get_pid(s)]
session_data = []
part_data = []
trial_data = []
for s in meta:
    sid = s['id']
    pid = get_pid(s)
    if int(pid[1:]) < 10:
        continue
    session_data.append(dict(
        sid=sid,
        pid=pid,
        created_at=s['created_at'],
        server=s['server'],
        fps=s['meta']['settings']['framerate'],
        notes_s=np.nan,
    ))
    for t in s['trials']:
        trial = t['name']
        if trial in ('neutral', 'calibration'):
            continue
        trial_data.append(dict(
            sid=sid,
            pid=pid,
            created_at=t['created_at'],
            trial=trial,
            trial_clean=np.nan,
            notes_t=np.nan,
        ))
    part = dict(
        pid=pid,
        date=pd.to_datetime(t['created_at']).tz_convert('America/Los_Angeles').strftime('%Y-%m-%d')
    )
    part.update({pl:'' for pl in part_labels})
    part_data.append(part)
        
df_session = pd.DataFrame(session_data).sort_values('pid')
df_part = pd.DataFrame(part_data).sort_values('pid').drop_duplicates()
df_trial = pd.DataFrame(trial_data).sort_values(['pid', 'trial'])

df_part.shape, df_session.shape, df_trial.shape


((101, 22), (306, 6), (1294, 6))

In [13]:
df_part_new = df_part_now.set_index(['pid', 'date']).combine_first(df_part.set_index(['pid', 'date']))
df_part_new.reset_index(inplace=True)
df_part_new.sort_values(['date', 'pid'], inplace=True)


In [14]:
df_session_new = df_session_now.set_index('sid').combine_first(df_session.set_index('sid'))
cols = ['pid', 'created_at']
cols += [x for x in df_session_new.columns if x not in cols]
cols = [x for x in cols if x != 'notes_s'] + ['notes_s']
df_session_new = df_session_new[cols]
df_session_new.sort_values('pid', inplace=True)
df_session_new.reset_index(inplace=True)


In [15]:
df_trial_new = df_trial_now.set_index(['sid', 'trial']).combine_first(df_trial.set_index(['sid', 'trial']))
df_trial_new.reset_index(inplace=True)
df_trial_new.sort_values(['pid', 'created_at'], inplace=True)
cols = ['pid', 'sid', 'created_at', 'trial', 'trial_clean']
cols += [x for x in df_trial_new.columns if x not in cols]
cols = [x for x in cols if x != 'notes_t'] + ['notes_t']
df_trial_new = df_trial_new[cols]
# df_trial_new


In [17]:
df_part_new


Unnamed: 0,pid,date,type,clinician,weight,height,age,sex,gender,data_sharing,...,brooke,time_10mwt,time_10mwrt,time_tug_line,time_tug_cone,clock_b,time_5xsts,clock_c,time_stairs_up,notes_p
0,p011,2023-05-13,DM,CM,65.8,1.68,30.0,M,M,2.0,...,6.0,8.31,3.75,5.54,6.16,15:30:00,8.96,15:45:00,2.22,tug_line repeated since first time did not do ...
1,p012,2023-05-13,DM,PSR,63.5,1.78,20.0,W,W,1.0,...,6.0,7.59,2.91,5.75,5.44,,13.75,,1.69,Repeated curls since elbows not at 90º first t...
2,p013,2023-05-13,FSHD,PA,83.9,1.88,30.0,M,M,2.0,...,4.0,8.81,4.35,5.88,5.72,15:39:00,10.22,15:51:00,4.54,Arm ROM unable to complete arms above shoulder...
3,p014,2023-05-13,FSHD,CM,108.9,1.83,27.0,M,M,2.0,...,5.0,8.59,2.75,4.81,5.28,16:30:00,7.75,16:45:00,1.85,stairs_down time is 1.75s; stairs_down subject...
4,p017,2023-05-13,DM,PSR,120,1.6,27.0,W,W,2.0,...,6.0,8.62,7.09,8.06,8.78,,12.28,,3.56,Repeated 10MWT due to stopwatch timing error; ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,p126,2023-06-11,FSHD,TTD,77.1,1.83,57.0,M,M,2.0,...,4.0,12.69,11.44,10.35,11.47,,14.81,,,TUG Cone repeated (first time 11.84) since out...
80,p096,2023-06-12,FSHD,CM,68.7,1.65,60.0,W,W,2.0,...,4.0,9.22,6.6,6.56,6.93,,12.66,,3.12,tug_line redid since ran first time (6.08s); r...
57,p073,2023-06-13,DM,CM,,,,M,M,2.0,...,6.0,9.75,9.12,9.69,9.97,12:24:00,14.19,12:35:00,,"10mwrt first trial hands in pockets (9.12 s), ..."
100,p128,2023-06-13,DM,CM,83.9,1.88,34.0,M,M,2.0,...,6.0,8.23,3.56,4.71,5.67,13:38:00,8.24,13:49:00,2.82,Stairs down time 3.42s


In [None]:
# df_part_new.to_excel(datadir / dataset / 'temp/participant_info.xlsx', index=False)
# df_session_new.to_excel(datadir / dataset / 'temp/session_info.xlsx', index=False)
# df_trial_new.to_excel(datadir / dataset / 'temp/trial_info.xlsx', index=False)
