# Rolling Timepoints

This notebook contains our procedure for creating a rolling analysis of our time series data, which is used to capture our sensor instability over time. A common technique to assess the constancy of a model’s parameters is to compute parameter estimates over a rolling window of a fixed size through the sample. As the sensor parameters due to some variability in time sampling, the rolling estimates should capture this instability.

**INPUT: CSV output of 31_outlier_removal** (plain_data.csv)

**OUTPUT: Rolled Data file** (rolled_data.csv)

## Imports

In [2]:
import pandas as pd
import numpy as np

## Read in Data

In [10]:
df = pd.read_csv('plain_data.csv')

In [11]:
df.head()

Unnamed: 0,Time,ACC1,ACC2,ACC3,TEMP,EDA,BVP,HR,Subject_ID,Activity,Round,Magnitude
0,2019-07-17 11:52:00.000,41.0,27.2,40.0,32.39,0.275354,15.25,78.98,19-001,Baseline 1,1,63.410094
1,2019-07-17 11:52:00.250,41.0,27.3,40.0,32.39,0.276634,-12.75,78.835,19-001,Baseline 1,1,63.453054
2,2019-07-17 11:52:00.500,41.0,27.4,40.0,32.39,0.270231,-42.99,78.69,19-001,Baseline 1,1,63.496142
3,2019-07-17 11:52:00.750,41.0,27.5,40.0,32.39,0.270231,18.39,78.545,19-001,Baseline 1,1,63.539358
4,2019-07-17 11:52:01.000,41.0,27.6,40.0,32.34,0.26895,13.61,78.4,19-001,Baseline 1,1,63.582702


In [12]:
df.shape

(293484, 12)

## View Data Subset (Example without Rolling)

In [10]:
df1 = df[(df['Subject_ID'] == '19-001') & (df['Activity'] == 'Baseline') & (df['Round'] == 1)]
df1

Unnamed: 0,ACC1,ACC2,ACC3,TEMP,EDA,BVP,HR,Magnitude,Activity,Subject_ID,Round
0,41.000000,27.200000,40.000000,32.39,0.275354,15.25,78.9800,63.410094,Baseline,19-001,1
1,41.000000,27.300000,40.000000,32.39,0.276634,-12.75,78.8350,63.453054,Baseline,19-001,1
2,41.000000,27.400000,40.000000,32.39,0.270231,-42.99,78.6900,63.496142,Baseline,19-001,1
3,41.000000,27.500000,40.000000,32.39,0.270231,18.39,78.5450,63.539358,Baseline,19-001,1
4,41.000000,27.600000,40.000000,32.34,0.268950,13.61,78.4000,63.582702,Baseline,19-001,1
...,...,...,...,...,...,...,...,...,...,...,...
955,60.948276,16.948276,-4.258621,33.13,0.242055,-25.99,55.2700,63.404040,Baseline,19-001,1
956,60.943966,16.943966,-4.280172,33.15,0.240774,10.68,55.2500,63.400195,Baseline,19-001,1
957,60.939655,16.939655,-4.301724,33.15,0.242055,7.55,55.2325,63.396359,Baseline,19-001,1
958,60.935345,16.935345,-4.323276,33.15,0.245897,-4.25,55.2150,63.392530,Baseline,19-001,1


## View Data Subset (Example with Rolling)

In [11]:
df2 = df1.rolling(40).median().dropna()
df2['Activity'] = 'Baseline'
df2['Round'] = 1
df2['Subject_ID'] = '19-001'
df2

Unnamed: 0,ACC1,ACC2,ACC3,TEMP,EDA,BVP,HR,Magnitude,Round,Activity,Subject_ID
39,40.281250,28.359375,39.281250,32.355,0.267029,5.520,75.86125,63.006754,1,Baseline,19-001
40,40.218750,28.390625,39.218750,32.340,0.266389,2.100,75.72250,62.941933,1,Baseline,19-001
41,40.156250,28.421875,39.156250,32.340,0.266389,2.100,75.60750,62.877186,1,Baseline,19-001
42,40.093750,28.453125,39.093750,32.340,0.265108,5.520,75.49250,62.812511,1,Baseline,19-001
43,40.031250,28.484375,39.031250,32.340,0.263827,4.660,75.37750,62.747910,1,Baseline,19-001
...,...,...,...,...,...,...,...,...,...,...,...
955,60.850000,16.971983,-4.196983,33.150,0.241415,0.945,55.71250,63.415300,1,Baseline,19-001
956,60.921983,16.971983,-4.207759,33.150,0.240774,4.315,55.68750,63.413367,1,Baseline,19-001
957,60.941810,16.971983,-4.226293,33.150,0.240774,6.595,55.66250,63.409821,1,Baseline,19-001
958,60.941810,16.971983,-4.247845,33.150,0.240774,4.315,55.63750,63.405966,1,Baseline,19-001


## Rolling Procedure 

In [12]:
rolled = pd.DataFrame(columns = ['ACC1', 'ACC2', 'ACC3', 'TEMP', 'EDA', 'BVP', 'HR', 'Round', 'Magnitude', 'Activity', 'Subject_ID'])

for i in pd.unique(df['Subject_ID']):
    for j in pd.unique(df['Activity']):
        for k in pd.unique(df['Round']):
            df_new = df[(df['Subject_ID'] == i) & (df['Activity'] == j) & (df['Round'] == k)]
            df_roll = df_new.rolling(40).mean().dropna()
            df_roll['Activity'] = j
            df_roll['Round'] = k
            df_roll['Subject_ID'] = i
            #print(df_roll.head())
            rolled = rolled.append(df_roll)

Uncomment the commented lines on first run

In [13]:
rolled = rolled.drop(columns = ['Time'])
rolled

Unnamed: 0,ACC1,ACC2,ACC3,TEMP,EDA,BVP,HR,Round,Magnitude,Activity,Subject_ID
39,40.225000,28.297500,39.225000,32.35600,0.265940,-0.21875,76.105250,1,62.913102,Baseline,19-001
40,40.175000,28.342500,39.175000,32.35475,0.265556,-1.07075,75.968750,1,62.870169,Baseline,19-001
41,40.126630,28.383370,39.125543,32.35350,0.265140,-0.75950,75.833750,1,62.826763,Baseline,19-001
42,40.079891,28.420109,39.076630,32.35225,0.264852,0.59100,75.700250,1,62.782884,Baseline,19-001
43,40.034783,28.452717,39.028261,32.35100,0.264563,0.28350,75.568250,1,62.738533,Baseline,19-001
...,...,...,...,...,...,...,...,...,...,...,...
279835,13.739593,-8.671131,64.031176,32.11000,0.600538,-0.15400,92.183625,1,66.098621,Type,19-056
279836,13.976244,-8.749321,64.185294,32.10950,0.603675,-0.22400,92.209375,1,66.328237,Type,19-056
279837,14.291290,-8.854751,64.277941,32.10900,0.606237,0.02700,92.236937,1,66.536121,Type,19-056
279838,14.684729,-8.987421,64.309118,32.10850,0.608734,-0.01875,92.266312,1,66.727613,Type,19-056


In [15]:
rolled.to_csv('../../40_usable_data_for_models/41_Duke_Data/rolled_data.csv', index = False)