# Posture Monitor Data Analysis

Extract data from `.dat` files and then display using matplotlib

In [1]:
%matplotlib notebook
from analyze import *
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

## Create meta data dictionary

In [2]:
# Conditions key:
# b = buzz, s = silent
# j = Justin's WG, r = Seminar room, c = Cafeteria
# e = easy, h = hard

subject_meta = {}
subject_meta[14] = 'bje'
subject_meta[15] = 'bre'
subject_meta[16] = 'sch'
subject_meta[17] = 'sce'
subject_meta[18] = 'sch'
subject_meta[19] = 'sce'
subject_meta[20] = 'bjh'

## Load data using functions in analyze.py

In [3]:
data = load_all_data()
subject_id = 20
s20 = get_subject(data,subject_id)
baseline = get_baseline(s20)
times, widths = get_distances(s20)
time_delta = [(x-times[0]).total_seconds() for x in times]

  if 'baseline' in v:


## Display subject data before and after outlier removal

In [4]:
fig, (ax1, ax2) = plt.subplots(nrows=2,ncols=1)

# Plot raw data
plot_it(ax1,time_delta,widths,conditions=subject_meta[subject_id], baseline=baseline)

# Exlude outliers
# z-score threshold (3) to limit errors caused by signal noise
z_threshold = 3 

# Keep items within z-score of `z_threshold`
time_keep, width_keep = remove_outliers(time_delta, widths, z_threshold)

# Plot clean data
title = """Head Proximity to Computer over Time
Excluding Outliers (z = {})
Subject ID: {} Condition: {}""".format(z_threshold,subject_id, 
                               subject_meta[subject_id])
plot_it(ax2,time_keep,width_keep, conditions=subject_meta[subject_id], \
        baseline=baseline,title=title)
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

## Plot majority of subjects

In [5]:
plot_subjects(data, subject_meta, exclude_outliers=True)

<IPython.core.display.Javascript object>

  if 'baseline' in v:


## Export to R for Statistical Analysis

In [6]:
import csv
import pandas as pd

df = pd.DataFrame()
df_list = []
keys = []

# Convert data into one table
for subject_id in subject_meta.keys():
    print(subject_id)
    subject_data = get_subject(data,subject_id)
    baseline = get_baseline(subject_data)
    times, widths = get_distances(subject_data)
    time_delta = [(x-times[0]).total_seconds() for x in times]
    # Trim to about first 20 minutes
#     time_delta = time_delta[:21*30]
#     widths = widths[:21*30]
    df = pd.DataFrame(pd.Series(widths, index=time_delta))
    df_list.append(df)
    keys.append(subject_id)

16
17
18
19
20
14
15


  if 'baseline' in v:


In [7]:
df = pd.concat(df_list, keys = keys)
df

Unnamed: 0,Unnamed: 1,0
16,0.0,207
16,2.0,226
16,4.0,226
16,6.0,232
16,8.0,229
16,10.0,233
16,12.0,228
16,14.0,236
16,16.0,231
16,18.0,235


In [8]:
# Place data into one index by filling missing values
index = [pd.Timedelta(np.timedelta64(x, 's')) for x in range(1,1240)]
df = pd.DataFrame(index=index)
data = load_all_data()
for subject_id in subject_meta.keys():
    subject_data = get_subject(data,subject_id)
    baseline = get_baseline(subject_data)
    times, widths = get_distances(subject_data)
    time_delta = [(x-times[0]) for x in times]
    # Impute missing values with rolling mean
    impute = lambda z: int(z[pd.notnull(z)].mean())
    df[subject_id] = pd.Series(widths,index=time_delta) 

  if 'baseline' in v:


In [9]:
# Fill missing values by interpolation with average of neighbors
df_clean = df.interpolate()

In [10]:
# FIXME: Calling `plot` crashes the kernel for unknown reason
# fig = plt.figure()
# df_clean[16].iloc[:10].plot()
# plt.show()

In [12]:
df_clean = df_clean.ffill().fillna(method='backfill').applymap(int)
df_clean.to_csv('posture_data.csv')