# Imports and setup

In [1]:
import functools
import pandas as pd
from glob import glob
from hypnospy import Wearable, Experiment, Diary
from hypnospy.data import MESAPreProcessing
from hypnospy.analysis import NonWearingDetector, SleepBoudaryDetector, Viewer, PhysicalActivity, Validator, CircadianAnalysis
from hypnospy.analysis import SleepMetrics, SleepWakeAnalysis

import seaborn as sns
import matplotlib.pyplot as plt

def setup_experiment(file_path, diary_path, start_hour):
    # Configure an Experiment
    exp = Experiment()

    # Iterates over a set of files in a directory.
    # Unfortunately, we have to do it manually with RawProcessing because we are modifying the annotations
    for file in glob(file_path):
        pp = MESAPreProcessing(file)
        w = Wearable(pp)  # Creates a wearable from a pp object
        # Invert the two_stages flag. Now True means sleeping and False means awake

        w.data["interval_sleep"] = w.data["interval"].isin(["REST-S", "REST"])

        exp.add_wearable(w)
        exp.set_freq_in_secs(30)
        w.change_start_hour_for_experiment_day(start_hour)

    # diary = Diary().from_file(diary_path)
    # exp.add_diary(diary)

    return exp




# Read data

In [2]:
file_path = "../data/small_collection_mesa/*.csv"
diary_path = "../data/diaries/mesa_diary.csv"

file_path = "HypnosPy/data/small_collection_mesa/*.csv"
diary_path = "HypnosPy/data/diaries/mesa_diary.csv"


start_hour = 15
end_hour = 15

exp = setup_experiment(file_path, diary_path, start_hour)
exp.fill_no_activity(-0.0001)
exp.overall_stats()



Total number of wearables: 5
Total number of days: 44
Avg. number of days: 8.80 (+-1.600). Max: 12, Min: 8.
Avg. number of epochs: 23314.00 (+-4390.404). Max: 31819, Min: 19830.


# Non wear and sleep boundary detection

In [3]:
nwd = NonWearingDetector(exp)
nwd.detect_non_wear(strategy="choi", wearing_col="hyp_wearing_choi")

# TODO: fix bug when annotation_merge_tolerance_in_minutes < 0
sbd = SleepBoudaryDetector(exp)
sbd.detect_sleep_boundaries(strategy="annotation", output_col="sleep_period_annotation",
                            annotation_col="interval_sleep",
                            annotation_merge_tolerance_in_minutes=30, annotation_only_largest_sleep_period=True)




# Validation

In [4]:
va = Validator(exp)
va.flag_epoch_physical_activity_less_than(min_activity_threshold=0)
va.flag_epoch_null_cols(col_list=["hyp_act_x"])
va.flag_epoch_nonwearing("hyp_wearing_choi")

va.flag_day_sleep_length_less_than(sleep_period_col="sleep_period_annotation", min_sleep_in_minutes=3*30)
va.flag_day_sleep_length_more_than(sleep_period_col="sleep_period_annotation", max_sleep_in_minutes=12*60)
va.flag_day_max_nonwearing(max_non_wear_minutes_per_day=3*10)
va.flag_day_if_valid_epochs_smaller_than(valid_minutes_per_day=20*60)
va.validation_report()

n_removed_wearables = va.remove_wearables_without_valid_days()
print("Removed %d wearables." % n_removed_wearables)

va.flag_day_if_not_enough_consecutive_days(3)
n_removed_days = va.remove_flagged_days()
print("Removed %d days that are not consecutive." % n_removed_days)
n_removed_wearables = va.remove_wearables_without_valid_days()
print("Removed %d wearables." % n_removed_wearables)

exp.overall_stats()



Number of days removed due to InvCode.FLAG_DAY_SHORT_SLEEP: 10
Number of days removed due to InvCode.FLAG_DAY_LONG_SLEEP: 0
Number of days removed due to InvCode.FLAG_DAY_WITHOUT_DIARY: 0
Number of days removed due to InvCode.FLAG_DAY_NON_WEARING: 24
Number of days removed due to InvCode.FLAG_DAY_NOT_ENOUGH_VALID_EPOCHS: 26
Number of days removed due to InvCode.FLAG_DAY_NOT_ENOUGH_CONSECUTIVE_DAYS: 0
Total number of potential days to remove (may have overlaps): 60
Removed 0 wearables.
Flagging the following days as invalid for pid 1764: 1,3,6
Flagging the following days as invalid for pid 1766: 1,2,4,5
Flagging the following days as invalid for pid 1769: 1,2
Removed 35 days that are not consecutive.
Removing wearable 1764.
Removing wearable 1766.
Removing wearable 1769.
Removed 3 wearables.
Total number of wearables: 2
Total number of days: 9
Avg. number of days: 4.50 (+-0.500). Max: 5, Min: 4.
Avg. number of epochs: 12960.00 (+-1440.000). Max: 14400, Min: 11520.


# Set Machine Learning Sequence days instead of 24h day

In [5]:
# Setting day to ml representation -> days may not be of fixed lengths.
exp_day_column='ml_sequence'
exp.set_ml_representation_days_exp(sleep_col="sleep_period_annotation", ml_column=exp_day_column)

## Revalidate ML sequence days

In [6]:
va.flag_day_sleep_length_less_than(sleep_period_col="sleep_period_annotation", min_sleep_in_minutes=3*30)    
va.flag_day_sleep_length_more_than(sleep_period_col="sleep_period_annotation", max_sleep_in_minutes=12*60)
n_removed_wearables = va.remove_wearables_without_valid_days()
n_removed_days = va.remove_flagged_days()
print("Removed %d wearables." % n_removed_wearables)
print("Removed %d days." % n_removed_days)

Removed 0 wearables.
Removed 2 days.


In [7]:
va.validation_report()

Number of days removed due to InvCode.FLAG_DAY_SHORT_SLEEP: 0
Number of days removed due to InvCode.FLAG_DAY_LONG_SLEEP: 0
Number of days removed due to InvCode.FLAG_DAY_WITHOUT_DIARY: 0
Number of days removed due to InvCode.FLAG_DAY_NON_WEARING: 0
Number of days removed due to InvCode.FLAG_DAY_NOT_ENOUGH_VALID_EPOCHS: 0
Number of days removed due to InvCode.FLAG_DAY_NOT_ENOUGH_CONSECUTIVE_DAYS: 0
Total number of potential days to remove (may have overlaps): 0


# Feature extraction

## Physical activity

In [8]:
exp.overall_stats()

Total number of wearables: 2
Total number of days: 9
Avg. number of days: 4.50 (+-0.500). Max: 5, Min: 4.
Avg. number of epochs: 11964.00 (+-1594.000). Max: 13558, Min: 10370.


In [22]:
pa_levels = ["sedentary", "light", "medium", "vigorous"]

pa = PhysicalActivity(exp)
# METS: 1.5, 3, 6
pa.set_cutoffs(cutoffs=[58, 399, 1404], names=pa_levels)
pa.generate_pa_columns(based_on="activity")
bouts = []
for act_level in pa_levels:
    tmp_list = []
    for length in [5, 10, 20, 30]:
        pa_bout = pa.get_bouts(act_level, length, length//2,
                                     resolution="hour", sleep_col="sleep_period_annotation")
        
        if (type(pa_bout) == pd.DataFrame) and not pa_bout.empty:
            tmp_list.append(pa_bout)
    
    if tmp_list:
        tmp_list = pd.concat(tmp_list)
        bouts.append(tmp_list)


# Merge PA datasets
bouts = functools.reduce(
    lambda left, right: pd.merge(left, right, on=["pid", exp_day_column, "hyp_time_col", "bout_length"],
                                 how='outer'), bouts).fillna(0.0)

# bouts_melted = bouts.melt(id_vars=["pid", exp_day_column, "bout_length"],
#                           value_vars=["sedentary", "light", "medium", "vigorous"])

bouts_melted = bouts.melt(id_vars=["pid", exp_day_column, "bout_length"],
                          value_vars=["sedentary", "light", "medium"])




## Sleep Analysis

In [25]:
sw = SleepWakeAnalysis(exp)
sw.run_sleep_algorithm(algname="ScrippsClinic", activityIdx="hyp_act_x", rescoring=False, on_sleep_interval=False,
                       inplace=True)
sw.run_sleep_algorithm(algname="Sadeh", activityIdx="hyp_act_x", rescoring=False, on_sleep_interval=False,
                       inplace=True)

sm = SleepMetrics(exp)

sleep_metrics = []
for sleep_metric in ["sleepEfficiency", "awakening", "arousal"]:
    sleep_metrics.append(sm.get_sleep_quality(sleep_metric=sleep_metric, wake_sleep_col="ScrippsClinic",
                                              sleep_period_col="sleep_period_annotation"))
# SRI does not use a sleep_period_col
# SRI calculation will not work with set_ml_representation_days_exp because day representation will be of different lengths.
# While SRI requires the days to be of fixed lengths.
sleep_metrics.append(sm.get_sleep_quality(sleep_metric="sri", wake_sleep_col="ScrippsClinic"))

sleep_metrics = functools.reduce(lambda left, right: pd.merge(left, right, on=["pid", exp_day_column], how='outer'),
                                 sleep_metrics)

sleep_metrics = sleep_metrics.set_index(['pid', 'ml_sequence'])

# filter columns
sleep_metrics = sleep_metrics[sleep_metrics.columns[~sleep_metrics.columns.str.contains('_parameters')]]
sleep_metrics = sleep_metrics[['sleepEfficiency', 'awakening', 'arousal']]
sleep_metrics

Unable to calculate SRI for day 1 (PID = 1760).
Unable to calculate SRI for day 2 (PID = 1760).
Unable to calculate SRI for day 3 (PID = 1760).
Unable to calculate SRI for day 1 (PID = 1768).
Unable to calculate SRI for day 2 (PID = 1768).
Unable to calculate SRI for day 3 (PID = 1768).
Unable to calculate SRI for day 4 (PID = 1768).


Unnamed: 0_level_0,Unnamed: 1_level_0,sleepEfficiency,awakening,arousal
pid,ml_sequence,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1760,0,90.594499,7,7
1760,1,95.197439,4,4
1760,2,92.987805,4,4
1760,3,92.5,10,10
1768,0,86.625332,30,30
1768,1,75.646123,40,40
1768,2,88.243065,23,23
1768,3,89.833641,15,15
1768,4,95.673077,6,6


## Circadian Analysis

In [27]:
# return cosinor_results when ca.run_cosinor()
# return ssa_results when ca.run_ssa()

In [28]:
# The below code takes time because they run linear algebra algorithms.
ca = CircadianAnalysis(exp)
ca.run_cosinor()
ca.run_SSA()

w = exp.get_wearable("1768")
print(w.cosinor)
print(w.ssa['hyp_act_x'].keys())

2it [00:00, 27.44it/s]
0it [00:00, ?it/s]

0
1
=== Running SSA ===
0
(3746, 1440)


  0%|▎                                                                                                                                                                                               | 2/1440 [00:00<02:20, 10.21it/s]

input: (3746, 1440)
U: (3746, 3746)
s: (1440,)
V: (1440, 1440)
r: 1440
gkList: (1440, 5185)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1440/1440 [02:22<00:00, 10.14it/s]
1it [02:36, 156.54s/it]

1
(5340, 1440)


  0%|▏                                                                                                                                                                                               | 1/1440 [00:00<03:01,  7.94it/s]

input: (5340, 1440)
U: (5340, 5340)
s: (1440,)
V: (1440, 1440)
r: 1440
gkList: (1440, 6779)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1440/1440 [03:16<00:00,  7.32it/s]
2it [06:11, 185.65s/it]

                 p  p_reject       SNR           RSS    resid_SE          ME  \
test                                                                           
0     1.110223e-16       NaN  0.436968  1.535181e+07   84.996350  166.684725   
1     1.110223e-16       NaN  0.314593  1.621121e+07   77.543932  152.051577   
2     1.110223e-16       NaN  0.359359  2.879929e+07  105.084127  206.056734   
3     1.156521e-08       NaN  0.116067  3.773967e+07  106.107734  208.042459   
4     1.110223e-16       NaN  0.319735  2.340399e+07   92.302963  180.990229   

      period  amplitude  acrophase       mesor  
test                                            
0       24.0   2.340501  -0.812390  127.099835  
1       24.0   2.464298  -4.690900   91.696167  
2       24.0   4.572767  -0.759978  142.695136  
3       24.0   4.361200  -6.079826   82.982133  
4       24.0   2.874532  -0.288268  115.770324  
dict_keys(['r', 'pv', 'gk', 'wm', 'df', 'acrophase', 'gksum15T', 'trend', 'period'])





# Encode datetime series to ML object

In [32]:
from hypnospy.misc import encode_datetime_to_ml
encode_datetime_to_ml(w.ssa['hyp_act_x']['acrophase'], 'acrophase')

Unnamed: 0,acrophase_year,acrophase_month_sin,acrophase_month_cos,acrophase_day_sin,acrophase_day_cos,acrophase_hour_sin,acrophase_hour_cos,acrophase_minute_sin,acrophase_minute_cos
2017-01-03,2017,0.5,0.866025,0.571268,0.820763,-0.7071068,-0.7071068,0.5,-0.866025
2017-01-04,2017,0.5,0.866025,0.724793,0.688967,-0.5,-0.8660254,-0.406737,0.913545
2017-01-05,2017,0.5,0.866025,0.848644,0.528964,1.224647e-16,-1.0,-0.913545,0.406737
2017-01-06,2017,0.5,0.866025,0.937752,0.347305,-1.0,-1.83697e-16,-0.743145,0.669131
2017-01-07,2017,0.5,0.866025,0.988468,0.151428,-1.0,-1.83697e-16,0.0,1.0
2017-01-08,2017,0.5,0.866025,0.998717,-0.050649,0.9659258,-0.258819,-0.207912,0.978148


# View experiment

In [33]:
# TODO: Rerun with encode_datetime_to_ml
# attach encode to ml
# fix view signal

In [80]:
# View signals
v = Viewer(exp)
v.view_signals(["activity", "pa_intensity", "sleep"], sleep_cols=["sleep_period_annotation"],
               signal_as_area=["ScrippsClinic"])

# Convert features to machine learning format

In [None]:
#  ------------------------------------------------
#
# "XY.pickle"
# X = {"demographics": [wearable list], {"mvpa_bouts": [wearable list], "hour_stats": [pid1day124, ]}
# Y = {"sleep_efficiency": [0.5, 0.7...]}
#
#
# DF is dataframe
# pid, exp_day, exp_hour, mvpa_bouts, age, bmi, sleep_efficiency, ....
# -
# -
# -
#
# collist = {"demographics": ["age", "bmi"], "PA": [A, B, C]}

In [77]:
encode_datetime_to_ml(w.ssa['hyp_act_x']['acrophase'], 'acrophase')

Unnamed: 0,acrophase_year,acrophase_month_sin,acrophase_month_cos,acrophase_day_sin,acrophase_day_cos,acrophase_hour_sin,acrophase_hour_cos,acrophase_minute_sin,acrophase_minute_cos
2017-01-03,2017,0.5,0.866025,0.571268,0.820763,-0.7071068,-0.7071068,0.5,-0.866025
2017-01-04,2017,0.5,0.866025,0.724793,0.688967,-0.5,-0.8660254,-0.406737,0.913545
2017-01-05,2017,0.5,0.866025,0.848644,0.528964,1.224647e-16,-1.0,-0.913545,0.406737
2017-01-06,2017,0.5,0.866025,0.937752,0.347305,-1.0,-1.83697e-16,-0.743145,0.669131
2017-01-07,2017,0.5,0.866025,0.988468,0.151428,-1.0,-1.83697e-16,0.0,1.0
2017-01-08,2017,0.5,0.866025,0.998717,-0.050649,0.9659258,-0.258819,-0.207912,0.978148


## Physical activity

### bouts

In [382]:
bouts['hyp_time_col'] = pd.Categorical(bouts['hyp_time_col'], ordered=True, categories=range(24))
bouts = bouts[bouts['ml_sequence'] != -1]

bouts_per_hour = bouts.groupby(['pid', 'ml_sequence', 'hyp_time_col', 'bout_length'])[['sedentary', 'light', 'medium', 'vigorous']].min()
bouts_per_hour = bout_features_per_hour.fillna(0)

bouts_per_day = bouts_per_hour.groupby(['pid', 'ml_sequence', 'bout_length']).sum()
bouts_per_day = bouts_per_day.pivot_table(index=['pid', 'ml_sequence'], 
                                        columns=['bout_length'], 
                                        values=['sedentary', 'light', 'medium', 'vigorous'])

# for y
# bouts_per_day['medium'][10]
# distribution of activity for medium
# bouts_per_day['medium'][10].astype(bool).value_counts()

print(bouts_per_day.shape)
bouts_per_day

(10, 16)


Unnamed: 0_level_0,Unnamed: 1_level_0,light,light,light,light,medium,medium,medium,medium,sedentary,sedentary,sedentary,sedentary,vigorous,vigorous,vigorous,vigorous
Unnamed: 0_level_1,bout_length,5,10,20,30,5,10,20,30,5,10,20,30,5,10,20,30
pid,ml_sequence,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
1760,0,12.0,5.0,4.0,3.0,2.0,1.0,0.0,2.0,14.0,6.0,3.0,1.0,0.0,0.0,0.0,0.0
1760,1,21.0,14.0,8.0,6.0,0.0,1.0,0.0,0.0,21.0,12.0,3.0,1.0,0.0,0.0,0.0,0.0
1760,2,17.0,15.0,9.0,10.0,0.0,0.0,0.0,0.0,29.0,13.0,4.0,1.0,0.0,0.0,0.0,0.0
1760,3,22.0,12.0,9.0,6.0,0.0,0.0,0.0,0.0,27.0,7.0,5.0,1.0,0.0,0.0,0.0,0.0
1760,4,13.0,8.0,3.0,5.0,0.0,1.0,0.0,0.0,13.0,6.0,1.0,1.0,0.0,0.0,0.0,0.0
1768,0,16.0,8.0,4.0,2.0,0.0,0.0,0.0,0.0,16.0,9.0,6.0,2.0,0.0,0.0,0.0,0.0
1768,1,33.0,13.0,9.0,4.0,1.0,0.0,0.0,0.0,24.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0
1768,2,34.0,13.0,3.0,2.0,1.0,1.0,0.0,0.0,34.0,17.0,6.0,3.0,0.0,0.0,0.0,0.0
1768,3,35.0,18.0,6.0,5.0,2.0,2.0,0.0,1.0,38.0,15.0,5.0,1.0,0.0,0.0,0.0,0.0
1768,4,28.0,11.0,4.0,2.0,1.0,1.0,2.0,1.0,22.0,12.0,5.0,1.0,0.0,0.0,0.0,0.0


### activity timeseries

In [228]:
def get_activity_per_day_exp(exp):
    act_per_ml_day_exp = []
    for w in exp.get_all_wearables():
        act_per_ml_day = get_activity_per_day(w)
        act_per_ml_day_exp.append(act_per_ml_day)
    
    wearable_pids = [w.pid for w in exp.get_all_wearables()]
    act_per_ml_day_exp = pd.concat(act_per_ml_day_exp, keys=wearable_pids, names=['pid', 'ml_sequence'])
    
    return act_per_ml_day_exp

def get_activity_per_day(w):
    # .loc[0:] is to remove -1 days (invalid days).
    act_per_ml_day = w.data.groupby('ml_sequence')['hyp_act_x'].apply(list).loc[0:]
    return act_per_ml_day

In [230]:
# get list of activities without sleep

act_per_ml_day = get_activity_per_day_exp(exp)
index_name = act_per_ml_day.index
sequence_lengths = act_per_ml_day.transform(len)

print(act_per_ml_day)
print(sequence_lengths)
print('mean:', sequence_lengths.mean())

# pads each actigraphy list
act_per_ml_day = pad_sequences(act_per_ml_day, 
                               maxlen=2500, 
                               dtype="int32", 
                               padding="post", 
                               truncating="post", 
                               value=0.0)

act_per_ml_day = pd.DataFrame(act_per_ml_day, index=index_name)
act_per_ml_day

pid   ml_sequence
1760  0              [24.0, 34.0, 13.0, 143.0, 3.0, 46.0, 46.0, 7.0...
      1              [46.0, 238.0, 41.0, 0.0, 73.0, 77.0, 46.0, 69....
      2              [57.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...
      3              [271.0, 80.0, 50.0, 59.0, 46.0, 30.0, 43.0, 22...
      4              [34.0, 39.0, 29.0, 41.0, 0.0, 0.0, 0.0, 0.0, 0...
1768  0              [38.0, 6.0, 9.0, 1.0, 0.0, 1.0, 9.0, 13.0, 21....
      1              [85.0, 101.0, 0.0, 0.0, 160.0, 354.0, 377.0, 2...
      2              [155.0, 189.0, 35.0, 255.0, 255.0, 216.0, 630....
      3              [21.0, 1.0, 4.0, 0.0, 7.0, 0.0, 0.0, 51.0, 7.0...
      4              [171.0, 196.0, 171.0, 98.0, 282.0, 149.0, 591....
Name: hyp_act_x, dtype: object
pid   ml_sequence
1760  0              1890
      1              2742
      2              3133
      3              2605
      4              1150
1768  0              2131
      1              2702
      2              2614
      3        

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8,9,...,2490,2491,2492,2493,2494,2495,2496,2497,2498,2499
pid,ml_sequence,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1760,0,24,34,13,143,3,46,46,7,14,37,...,0,0,0,0,0,0,0,0,0,0
1760,1,46,238,41,0,73,77,46,69,53,57,...,0,0,0,0,0,0,0,0,0,0
1760,2,57,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1760,3,271,80,50,59,46,30,43,22,0,1,...,0,0,0,0,0,0,0,0,0,0
1760,4,34,39,29,41,0,0,0,0,0,11,...,0,0,0,0,0,0,0,0,0,0
1768,0,38,6,9,1,0,1,9,13,21,13,...,0,0,0,0,0,0,0,0,0,0
1768,1,85,101,0,0,160,354,377,282,13,60,...,0,0,0,0,0,0,0,0,0,0
1768,2,155,189,35,255,255,216,630,67,36,19,...,0,0,0,0,0,0,0,2,0,0
1768,3,21,1,4,0,7,0,0,51,7,3,...,0,69,45,4,19,28,0,32,8,1
1768,4,171,196,171,98,282,149,591,65,13,255,...,0,0,149,6,0,13,1,0,26,0


In [431]:
# act2seq
act_per_ml_day

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8,9,...,2490,2491,2492,2493,2494,2495,2496,2497,2498,2499
pid,ml_sequence,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1760,0,24,34,13,143,3,46,46,7,14,37,...,0,0,0,0,0,0,0,0,0,0
1760,1,46,238,41,0,73,77,46,69,53,57,...,0,0,0,0,0,0,0,0,0,0
1760,2,57,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1760,3,271,80,50,59,46,30,43,22,0,1,...,0,0,0,0,0,0,0,0,0,0
1760,4,34,39,29,41,0,0,0,0,0,11,...,0,0,0,0,0,0,0,0,0,0
1768,0,38,6,9,1,0,1,9,13,21,13,...,0,0,0,0,0,0,0,0,0,0
1768,1,85,101,0,0,160,354,377,282,13,60,...,0,0,0,0,0,0,0,0,0,0
1768,2,155,189,35,255,255,216,630,67,36,19,...,0,0,0,0,0,0,0,2,0,0
1768,3,21,1,4,0,7,0,0,51,7,3,...,0,69,45,4,19,28,0,32,8,1
1768,4,171,196,171,98,282,149,591,65,13,255,...,0,0,149,6,0,13,1,0,26,0


## Circadian Analysis

In [302]:
def get_cosinor_exp(exp):
    cosinors = []
    for w in exp.get_all_wearables():
        cosinor = w.cosinor
        cosinors.append(cosinor)

    wearable_pids = [w.pid for w in exp.get_all_wearables()]
    cosinors = pd.concat(cosinors, keys=wearable_pids, names=['pid', 'ml_sequence'])
    return cosinors

cosinor = get_cosinor_exp(exp)

# temporary
cosinor = cosinor[~cosinor.index.isin([('1768', '-1')])]
cosinor = cosinor.reset_index()
cosinor['ml_sequence'] = cosinor['ml_sequence'].astype(int)
cosinor = cosinor.set_index(['pid', 'ml_sequence'])

# Merge ML formats

In [340]:

# bouts + activity
df = bouts_per_day.merge(act_per_ml_day, left_index=True, right_index=True)

# bouts + activity + sleep_mertics
df = df.merge(sleep_metrics, left_index=True, right_index=True)

# bouts + activity + sleep_metrics + cosinor
df = df.merge(cosinor, left_index=True, right_index=True)

# drop na columns
df = df.dropna(axis=1)



In [433]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,"(light, 5)","(light, 10)","(light, 20)","(light, 30)","(medium, 5)","(medium, 10)","(medium, 20)","(medium, 30)","(sedentary, 5)","(sedentary, 10)",...,arousal,p,SNR,RSS,resid_SE,ME,period,amplitude,acrophase,mesor
pid,ml_sequence,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1760,0,0.5,0.208333,0.166667,0.125,0.083333,0.041667,0.0,0.083333,0.583333,0.25,...,7,1.110223e-16,0.286758,16760530.0,94.319927,184.9825,24.0,3.238946,-2.673027,90.401996
1760,1,0.875,0.583333,0.333333,0.25,0.0,0.041667,0.0,0.0,0.875,0.5,...,4,1.110223e-16,0.25482,16460370.0,77.564284,152.090485,24.0,1.674686,-3.773685,72.784141
1760,2,0.708333,0.625,0.375,0.416667,0.0,0.0,0.0,0.0,1.208333,0.541667,...,4,1.110223e-16,0.243315,18925450.0,77.796334,152.537055,24.0,2.460543,-5.162611,71.332025
1760,3,0.916667,0.5,0.375,0.25,0.0,0.0,0.0,0.0,1.125,0.291667,...,10,1.110223e-16,0.311605,14823780.0,75.522506,148.090357,24.0,0.767738,-3.354387,81.145354
1760,4,0.541667,0.333333,0.125,0.208333,0.0,0.041667,0.0,0.0,0.541667,0.25,...,0,0.9482836,0.031869,10575770.0,96.148613,188.647407,24.0,1.942738,-6.106032,65.645501
1768,0,0.666667,0.333333,0.166667,0.083333,0.0,0.0,0.0,0.0,0.666667,0.375,...,30,1.110223e-16,0.436968,15351810.0,84.99635,166.684725,24.0,2.340501,-0.81239,127.099835
1768,1,1.375,0.541667,0.375,0.166667,0.041667,0.0,0.0,0.0,1.0,0.333333,...,40,1.110223e-16,0.314593,16211210.0,77.543932,152.051577,24.0,2.464298,-4.6909,91.696167
1768,2,1.416667,0.541667,0.125,0.083333,0.041667,0.041667,0.0,0.0,1.416667,0.708333,...,23,1.110223e-16,0.359359,28799290.0,105.084127,206.056734,24.0,4.572767,-0.759978,142.695136
1768,3,1.458333,0.75,0.25,0.208333,0.083333,0.083333,0.0,0.041667,1.583333,0.625,...,15,1.156521e-08,0.116067,37739670.0,106.107734,208.042459,24.0,4.3612,-6.079826,82.982133
1768,4,1.166667,0.458333,0.166667,0.083333,0.041667,0.041667,0.083333,0.041667,0.916667,0.5,...,6,1.110223e-16,0.319735,23403990.0,92.302963,180.990229,24.0,2.874532,-0.288268,115.770324


In [436]:
df.columns

Index([     ('light', 5),     ('light', 10),     ('light', 20),
           ('light', 30),     ('medium', 5),    ('medium', 10),
          ('medium', 20),    ('medium', 30),  ('sedentary', 5),
       ('sedentary', 10),
       ...
               'arousal',               'p',             'SNR',
                   'RSS',        'resid_SE',              'ME',
                'period',       'amplitude',       'acrophase',
                 'mesor'],
      dtype='object', length=2528)

# ML model

In [439]:
# example of y
df[['sleepEfficiency', ('medium', 10)]]

Unnamed: 0_level_0,Unnamed: 1_level_0,sleepEfficiency,"(medium, 10)"
pid,ml_sequence,Unnamed: 2_level_1,Unnamed: 3_level_1
1760,0,90.594499,0.041667
1760,1,95.197439,0.041667
1760,2,92.987805,0.0
1760,3,92.5,0.0
1760,4,0.0,0.041667
1768,0,86.625332,0.0
1768,1,75.646123,0.0
1768,2,88.243065,0.041667
1768,3,89.833641,0.083333
1768,4,95.673077,0.041667


In [354]:
from sklearn.linear_model import LinearRegression

X = df.drop('sleepEfficiency', axis=1)
y = df['sleepEfficiency']

reg = LinearRegression().fit(X, y)

print(reg.score(X, y))
print('------------------')
print(reg.coef_)
print(reg.intercept_)

1.0
------------------
[ 1.71079712e-07  3.42381289e-07  1.13316230e-06 ... -5.00090635e-06
  1.79973186e-05  7.93960294e-05]
30.63043318684577
