# Fenland Analysis Script

## 1. Importing dependencies

In [78]:
import os
import tempfile
from glob import glob
import pandas as pd
import numpy as np
from collections import defaultdict

from hypnospy import Wearable, Diary
from hypnospy.data import RawProcessing
from hypnospy.analysis import NonWearingDetector, SleepBoudaryDetector, Validator, Viewer, PhysicalActivity,SleepMetrics
from hypnospy import Experiment

## 2. Setting up the experiment

Using Fenland-specific data pre-processing

In [24]:
def load_experiment(data_path, start_hour):

    # Configure the Experiment
    exp = Experiment()

    # Iterates over a set of files in a directory.
    for file in glob(data_path):
        pp = RawProcessing(file,
                           # HR information
                           col_for_hr="mean_hr",
                           # Activity information
                           cols_for_activity=["stdMET_highIC_Branch"],
                           is_act_count=False,
                           device_location="dw",
                           # Datetime information
                           col_for_datetime="real_time",
                           strftime="%d-%m-%Y %H:%M:%S",#'2012-09-03 10:55:00'
                           # Participant information
                           col_for_pid="id")
        #pp.data["hyp_act_x"] = (pp.data["hyp_act_x"]/0.0060321) + 0.057 # adjust for Fenland

        w = Wearable(pp)  # Creates a wearable from a pp object
        exp.add_wearable(w)

    # Set frequency for every wearable in the collection
    exp.set_freq_in_secs(60)

    # Changing the hour the experiment starts from midnight (0) to 3pm (15)
    exp.change_start_hour_for_experiment_day(start_hour)

    return exp

## 3. Defining the data path, hyperparameters and cutoffs

In [25]:
# Path to find the wearables data
data_path = "./data/small_collection_fenland_full/*.csv"

# Parameters for the HypnosPy HR-based sleep algorithm
hr_quantile = 0.40
hr_min_window_length = 35
hr_merge_blocks = 180

hr_volarity = 6
#Time to consider as start and end of each experiment day - if equal the sleep labelling occurs
#over the entire 24 hours
start_hour = 20
end_hour = 20

#Giving the experiment a number
exp_id = 0

#Set the PA cutoffs - in METs, with names being the binary columns created to label each epoch
cutoffs=[1.5,3,6]
names=['Sed','LPA','MPA','VPA']

## 4. Running the experiment

1. Loading
2. Validating
3. Sleep Labelling
4. Physical Activity Labelling

To Do:
- get sleep metrics (SE, awakenings, SRI from SleepMetrics)

In [26]:
exp = load_experiment(data_path, start_hour)
exp.fill_no_activity(-0.0001)

va = Validator(exp)

# Flag times with less activity than set threshold, or non-wearing periods
va.flag_epoch_physical_activity_less_than(min_activity_threshold=0)
va.flag_epoch_null_cols(col_list=["hyp_act_x"])
va.flag_day_max_nonwearing(max_non_wear_minutes_per_day=60)

va.flag_day_if_invalid_epochs_larger_than(max_invalid_minutes_per_day=60)

# Accounting for removed days and subjects (referred to as wearables)
n_removed_days = va.remove_flagged_days()
print("Removed %d days (non wearing)." % n_removed_days)
n_users = va.remove_wearables_without_valid_days()
print("Removed %d wearables." % n_users)

sbd = SleepBoudaryDetector(exp)

sbd.detect_sleep_boundaries(strategy="hr", output_col="hyp_sleep_period_hr", hr_quantile=hr_quantile,
                                hr_volarity_threshold=hr_volarity, hr_rolling_win_in_minutes=5,
                                hr_sleep_search_window=(start_hour, end_hour),
                                hr_min_window_length_in_minutes=hr_min_window_length,
                                hr_volatility_window_in_minutes=10, hr_merge_blocks_gap_time_in_min=hr_merge_blocks,
                                hr_sleep_only_in_sleep_search_window=True, hr_only_largest_sleep_period=True)

cutoffs=[1.5,3,6]
names=['Sed','LPA','MPA','VPA']

pa = PhysicalActivity(exp)

pa.set_cutoffs(cutoffs=cutoffs,names=names)
pa.generate_pa_columns(based_on='hyp_act_x')

Removed 0 days (non wearing).
Removed 0 wearables.


## 5. Population Analysis

1. Creates dict with all data
2. Extracts statistics from pop dict into pop_df dataframe

### To Do:
- put sleep metrics into the population analysis
- bin subjects by TST according to analysis plan (below)
- creates tables and graphs from pop_df

### Analysis Plan:
	1. Subjects who are more physically active have higher TST, higher SE, higher SRI and lower WASO
			i. Physical activity binned into: 
				1) 0-300, 300-600, 600-900, 900+ METmins per week (multiply daily average by 7) OR
				2) 0-100, 100-200, 200-300, 300+ MVPA per week (multiply daily average by 7)
			ii. Then average all the sleep metrics over these bins and test for statistically significant differences
			iii. Would produce 2 tables: METmins vs sleep metrics & MVPA vs sleep metrics
	2. Subjects with higher sleep quality are healthier
			i. Sleep metrics:
				1) TST binned into hourly intervals (eg. those sleeping <5, 5-6,6-7,7-8,8+ hours/night on average)
				2) SRI binned into quartiles
			ii. Then average the METmins per week for these bins, BMI and also OR for having a cardiovascular disease
			iii. Would produce 2 tables: TST vs PA, BMI, disease status & SRI vs PA, BMI, disease status

In [77]:
pop = defaultdict()

for w in exp.wearables:
    pop[w] = {}
    pop[w]['tst'] = exp.wearables[w].get_total_sleep_time_per_day(sleep_col="hyp_sleep_period_hr")
    pop[w]['onset'] = exp.wearables[w].get_onset_sleep_time_per_day(sleep_col="hyp_sleep_period_hr")
    pop[w]['offset'] = exp.wearables[w].get_offset_sleep_time_per_day(sleep_col="hyp_sleep_period_hr")
    pop[w]['height'] = exp.wearables[w].data['height'][0]
    pop[w]['weight'] = exp.wearables[w].data['weight'][0]
    pop[w]['BMI'] = pop[w]['weight'] / (pop[w]['height']**2)
    pop[w]['sex'] = exp.wearables[w].data['sex'][0]
    pop[w]['age'] = exp.wearables[w].data['age'][0]
    pop[w]['Sed'] = exp.wearables[w].data.groupby(exp.wearables[w].get_experiment_day_col())['Sed'].sum()
    pop[w]['LPA'] = exp.wearables[w].data.groupby(exp.wearables[w].get_experiment_day_col())['LPA'].sum()
    pop[w]['MPA'] = exp.wearables[w].data.groupby(exp.wearables[w].get_experiment_day_col())['MPA'].sum()
    pop[w]['VPA'] = exp.wearables[w].data.groupby(exp.wearables[w].get_experiment_day_col())['VPA'].sum()
    pop[w]['METmins_MPA'] = exp.wearables[w].data[exp.wearables[w].data['MPA']]['hyp_act_x'].sum()
    pop[w]['METmins_VPA'] = exp.wearables[w].data[exp.wearables[w].data['VPA']]['hyp_act_x'].sum()
    pop[w]['METmins_total'] = pop[w]['METmins_MPA'] + pop[w]['METmins_VPA']
    
    #Exclude exp_days with <150 mins of sleep
    pop[w]['tst_mean'] = pop[w]['tst'][pop[w]['tst']['hyp_sleep_period_hr']>150].mean()[0]
    pop[w]['tst_std'] = pop[w]['tst'][pop[w]['tst']['hyp_sleep_period_hr']>150].std()[0]
    pop[w]['LPA_daily'] = pop[w]['LPA'].mean()
    pop[w]['MPA_weekly'] = pop[w]['MPA'].mean()*7
    pop[w]['VPA_weekly'] = pop[w]['VPA'].mean()*7
    pop[w]['MVPA_weekly'] = (pop[w]['MPA'].mean() + pop[w]['VPA'].mean())*7
    pop[w]['METmins_weekly'] = pop[w]['METmins_total'].mean()*7
         
#print(pop['dummy5'].items())


df_cols = ['sex','BMI','age','tst_mean','tst_std',
           'LPA_daily','MPA_weekly','VPA_weekly','MVPA_weekly','METmins_weekly']

pop_df = pd.DataFrame(columns=df_cols)
for w in exp.wearables:
    for col in df_cols:
        pop_df.loc[w,col] = pop[w][col]
print(pop_df)

       sex        BMI age    tst_mean     tst_std   LPA_daily MPA_weekly  \
dummy1   0  18.116276  48       395.5  181.116261   58.428571       34.0   
dummy2   1  26.023427  29      435.25  164.817829        74.4       30.8   
dummy3   0  25.059307  31  544.166667  112.762435        87.0       94.5   
dummy4   1  26.023427  29      435.25  164.817829        74.4       30.8   
dummy5   1   26.97404  52  508.333333  125.463408  117.285714      191.0   

       VPA_weekly MVPA_weekly METmins_weekly  
dummy1        0.0        34.0     845.068672  
dummy2        0.0        30.8     517.243873  
dummy3      2.625      97.125    3510.096365  
dummy4        0.0        30.8     517.243873  
dummy5        4.0       195.0    5363.642738  
