In [None]:
import pandas as pd
from pathlib import Path

import os, sys
sys.path.append(os.path.abspath('..'))
from python_scripts.activity_validation import validate_walking_records
from python_scripts.metric_computation import compute_patient_metrics

## Load Data

In [2]:
records_df = pd.read_csv("../data/metrics/metrics.csv")
print(records_df.columns)


Index(['patient_id', 'segment_id', 'steps', 'dur_sec', 'cadence_spm',
       'mean_contact', 'contact_var', 'step_interval_mean', 'step_interval_cv',
       'stance_pct', 'mean_pti', 'mean_load', 'peak_load', 'temp_mean',
       'temp_max', 'temp_min'],
      dtype='object')


## Records Validation

* This is to filter the cids with minimum 3 stpes, minimun duration of 5 seconds and wearing the device properly with min temp 25 degree Celsius.

* This is to validate that this cid gives enough data to analyse and deliver the message of their walking health.
* Cids which did not meet these thresholds will not be able to get accurate data with just only a few walking data.

In [3]:
records = validate_walking_records(records_df)
records.head()

Unnamed: 0,patient_id,segment_id,steps,dur_sec,cadence_spm,mean_contact,contact_var,step_interval_mean,step_interval_cv,stance_pct,mean_pti,mean_load,peak_load,temp_mean,temp_max,temp_min,is_walking_records
0,7,0,3,8.461,21.274081,2.203,1.35146,2.8345,0.703739,116.581408,0.777603,0.438898,0.686388,28.12963,30,26,True
1,10,0,2,2.836,42.313117,0.5715,0.201525,2.122,,53.864279,0.059276,0.381288,0.454997,27.3125,28,27,False
2,11,0,3,3.564,50.505051,0.457667,0.173564,1.612,0.198271,42.586849,0.009873,0.101755,0.102385,27.0,28,26,False
3,13,0,1,3.201,18.744142,3.201,,,,,0.961035,0.332905,0.426519,31.0,32,30,False
4,14,0,1,2.48,24.193548,2.48,,,,,0.796672,0.381448,0.473453,31.321429,32,30,False


In [4]:
walking_records = records[records["is_walking_records"]]
print("Valid walking records:", len(walking_records))

Valid walking records: 61


In [5]:
out = Path("../data/metrics")
out.mkdir(exist_ok=True)


In [6]:
walking_records.to_csv(out / "records_walking.csv", index=False)


print("Saved files")

walking_records.head()

Saved files


Unnamed: 0,patient_id,segment_id,steps,dur_sec,cadence_spm,mean_contact,contact_var,step_interval_mean,step_interval_cv,stance_pct,mean_pti,mean_load,peak_load,temp_mean,temp_max,temp_min,is_walking_records
0,7,0,3,8.461,21.274081,2.203,1.35146,2.8345,0.703739,116.581408,0.777603,0.438898,0.686388,28.12963,30,26,True
6,18,0,3,5.616,32.051282,1.673,1.023132,2.2915,0.557601,109.513419,0.458773,0.366986,0.498861,31.90625,33,30,True
12,28,0,8,13.036,36.821111,1.22,0.591722,1.613,0.37302,86.440528,0.174582,0.206863,0.264684,28.550521,30,27,True
17,33,0,5,21.537,13.929517,1.2836,0.728941,5.02225,1.067115,31.947832,0.290068,0.319312,0.418,25.3225,29,23,True
18,34,0,3,7.394,24.344063,2.141667,2.464328,1.22,0.650306,263.319672,0.396767,0.217401,0.289283,25.674603,28,24,True


## Compute metrics on cid Level
* This computes overall metrics for each cid during a session.

In [None]:
patient_df = pd.read_csv("../data/metrics/records_walking.csv")
patient_df = compute_patient_metrics(walking_records)
patient_df.to_csv(out /"patient_metrics.csv", index=False)
patient_df.head()

Unnamed: 0,patient_id,total_steps,total_walk_time_sec,mean_cadence,cadence_var,mean_stance_pct,step_interval_mean,step_interval_cv,mean_contact,contact_var,mean_peak_load,max_peak_load,mean_temp,max_temp
0,7,3,8.461,21.274081,,116.581408,2.8345,0.703739,2.203,,0.686388,0.686388,28.12963,30
1,18,3,5.616,32.051282,,109.513419,2.2915,0.557601,1.673,,0.498861,0.498861,31.90625,33
2,28,8,13.036,36.821111,,86.440528,1.613,0.37302,1.22,,0.264684,0.264684,28.550521,30
3,33,5,21.537,13.929517,,31.947832,5.02225,1.067115,1.2836,,0.418,0.418,25.3225,29
4,34,3,7.394,24.344063,,263.319672,1.22,0.650306,2.141667,,0.289283,0.289283,25.674603,28
