In [1]:
import pandas as pd
from pathlib import Path

import os, sys
sys.path.append(os.path.abspath('..'))
from python_scripts.activity_validation import validate_walking_records
from python_scripts.metric_computation import compute_patient_metrics

## Load Data

In [2]:
records_df = pd.read_csv("../data/metrics/metrics.csv")
print(records_df.columns)


Index(['patient_id', 'segment_id', 'steps', 'dur_sec', 'cadence_spm',
       'mean_contact', 'contact_var', 'step_interval_mean', 'step_interval_cv',
       'mean_pti', 'mean_load', 'peak_load', 'temp_mean', 'temp_max',
       'temp_min'],
      dtype='object')


## Records Validation

* This is to filter the cids with minimum 3 stpes, minimun duration of 5 seconds and wearing the device properly with min temp 25 degree Celsius.

* This is to validate that this cid gives enough data to analyse and deliver the message of their walking health.
* Cids which did not meet these thresholds will not be able to get accurate data with just only a few walking data.

In [3]:
records = validate_walking_records(records_df)
records.head()

Unnamed: 0,patient_id,segment_id,steps,dur_sec,cadence_spm,mean_contact,contact_var,step_interval_mean,step_interval_cv,mean_pti,mean_load,peak_load,temp_mean,temp_max,temp_min,is_walking_records
0,43,0,3,3.845,46.814044,1.281667,0.216308,1.203,0.197496,208.986322,230.920299,268.743655,29.5,31,27,False
1,47,0,3,4.272,42.134831,1.424,0.05283,1.3935,0.000507,159.814707,155.889035,181.572656,28.104167,29,26,False
2,49,1,2,2.473,48.52406,1.2365,0.228396,1.398,,115.927672,136.166254,150.514309,28.25,29,27,False
3,62,0,1,1.456,41.208791,1.456,,,,84.154577,77.010567,77.059864,8.0,9,5,False
4,73,0,1,1.425,42.105263,1.425,,,,68.853688,63.304758,63.569926,18.25,31,-18,False


In [4]:
walking_records = records[records["is_walking_records"]]
print("Valid walking records:", len(walking_records))

Valid walking records: 5


In [5]:
out = Path("../data/metrics")
out.mkdir(exist_ok=True)


In [6]:
walking_records.to_csv(out / "records_walking.csv", index=False)


print("Saved files")

walking_records.head()

Saved files


Unnamed: 0,patient_id,segment_id,steps,dur_sec,cadence_spm,mean_contact,contact_var,step_interval_mean,step_interval_cv,mean_pti,mean_load,peak_load,temp_mean,temp_max,temp_min,is_walking_records
6,85,0,4,9.945,24.13273,1.1525,0.16326,2.962333,1.010232,139.306429,179.526757,200.163515,31.723958,34,29,True
15,296,0,7,8.482,49.516623,1.211714,0.203457,1.232833,0.173833,153.473961,185.197869,205.996461,29.5,30,29,True
16,350,0,47,138.031,20.430193,1.223787,0.18067,2.970022,1.710526,61.925104,73.462639,116.376378,31.751773,33,30,True
24,419,2,3,7.058,25.502975,1.123333,0.273709,3.101,0.907541,30.411624,41.135518,45.610173,27.583333,36,6,True
26,424,2,3,13.722,13.117621,1.065,0.372403,6.2995,1.098796,30.172307,30.005526,48.30812,25.883333,33,6,True


## Compute metrics on cid Level
* This computes overall metrics for each cid during a session.

In [7]:
patient_df = pd.read_csv("../data/metrics/records_walking.csv")
patient_df = compute_patient_metrics(walking_records)
patient_df.to_csv(out /"patient_metrics.csv", index=False)
patient_df.head()

Unnamed: 0,patient_id,total_steps,total_walk_time_sec,mean_cadence,cadence_var,step_interval_mean,step_interval_cv,mean_contact,contact_var,mean_peak_load,max_peak_load,mean_temp,max_temp
0,85,4,9.945,24.13273,,2.962333,1.010232,1.1525,,200.163515,200.163515,31.723958,34
1,296,7,8.482,49.516623,,1.232833,0.173833,1.211714,,205.996461,205.996461,29.5,30
2,350,47,138.031,20.430193,,2.970022,1.710526,1.223787,,116.376378,116.376378,31.751773,33
3,419,3,7.058,25.502975,,3.101,0.907541,1.123333,,45.610173,45.610173,27.583333,36
4,424,3,13.722,13.117621,,6.2995,1.098796,1.065,,48.30812,48.30812,25.883333,33
