# Reproduce Table 1

- Py kernel
- Verify by looking at Table 1 in the Original Analysis paper and by running `python3 demographics_day_night.py` TGN10+ version or Original version. Be sure to configure your file paths.

In [1]:
import pandas as pd
import numpy as np
# import pingouin as pg
from statsmodels.formula.api import ols

from scipy import stats

# Load Data

In [2]:
base = "/Users/brinkley97/Documents/development/lab-kcad/"
path_to_file = "datasets/tiles_dataset/table_3/"

nurse_file = base + path_to_file + "nurse_step_sleep.csv"
rest_off_file = base + path_to_file + "rest_off.csv"
step_ratio_off_file = base + path_to_file + "step_ratio_off.csv"
duration_off_file = base + path_to_file + "duration_off.csv"
efficiency_off_file = base + path_to_file + "efficiency_off.csv"

In [3]:
def load_data(file):
    
    original_data = pd.read_csv(file, index_col=0)
    copy_of_data = original_data.copy()
    
    return copy_of_data

In [4]:
nurse_df = load_data(nurse_file)
# nurse_df 

In [5]:
rest_off_df = load_data(rest_off_file)
# rest_off_df

In [6]:
step_ratio_off_df = load_data(step_ratio_off_file)
# step_ratio_off_df

In [7]:
duration_off_df = load_data(duration_off_file)
# duration_off_df

In [8]:
efficiency_off_df = load_data(efficiency_off_file)
# efficiency_off_df

# Load Generated Specific Questions

In [9]:
generated_sq_file = "TGN10Plus/generateSpecificQuestions.ipynb"
tables_3_and_5_specific_questions_path = base + generated_sq_file

In [10]:
# %load tables_3_and_5_specific_questions_path
/Users/brinkley97/Documents/development/lab-kcad/TGN10Plus/generateSpecificQuestions.ipynb

In [11]:
%run "generateSpecificQuestions.ipynb"

In [12]:
# table_3_sqs

In [13]:
behav_col = ontology_mappings['behavioral']
behav_col

['stai',
 'pan_PosAffect',
 'pan_NegAffect',
 'swls',
 'bfi_Neuroticism',
 'bfi_Conscientiousness',
 'bfi_Extraversion',
 'bfi_Agreeableness',
 'bfi_Openness',
 'psqi']

In [14]:
index_col = ontology_mappings['tables_3_and_5']
index_col

['intercept', 'age', 'gender', 'day', 'observation', 'r2']

In [15]:
t3_activity = ontology_mappings['table_3_activity']
t3_activity

['Rest-activity ratio (off-day)',
 'Shift [day shift] x rest-activity ratio (off-day)',
 'Walk-activity ratio (off-day)',
 'Shift [day shift] x walk-activity ratio (off-day)']

In [16]:
t5_sleep = ontology_mappings['table_5_sleep']
t5_sleep

['Sleep duration (off-day)',
 'Shift [day shift] x sleep duration (off-day)',
 'Sleep efficiency (off-day)',
 'Shift [day shift] × sleep efficiency (off-day)']

In [17]:
index_col.extend(t3_activity)
index_col.extend(t5_sleep)

In [18]:
index_col

['intercept',
 'age',
 'gender',
 'day',
 'observation',
 'r2',
 'Rest-activity ratio (off-day)',
 'Shift [day shift] x rest-activity ratio (off-day)',
 'Walk-activity ratio (off-day)',
 'Shift [day shift] x walk-activity ratio (off-day)',
 'Sleep duration (off-day)',
 'Shift [day shift] x sleep duration (off-day)',
 'Sleep efficiency (off-day)',
 'Shift [day shift] × sleep efficiency (off-day)']

# Expand SPs

In [26]:
table_3_sqs

['what is the affect on *psqi* with respect to *intercept* ?',
 'what is the affect on *psqi* with respect to *age* ?',
 'what is the affect on *psqi* with respect to *gender* ?',
 'what is the affect on *psqi* with respect to *day* ?',
 'what is the affect on *psqi* with respect to *observation* ?',
 'what is the affect on *psqi* with respect to *r2* ?',
 'what is the affect on *psqi* with respect to *Rest-activity ratio (off-day)* ?',
 'what is the affect on *psqi* with respect to *Shift [day shift] x rest-activity ratio (off-day)* ?',
 'what is the affect on *psqi* with respect to *Walk-activity ratio (off-day)* ?',
 'what is the affect on *psqi* with respect to *Shift [day shift] x walk-activity ratio (off-day)* ?',
 'what is the affect on *stai* with respect to *intercept* ?',
 'what is the affect on *stai* with respect to *age* ?',
 'what is the affect on *stai* with respect to *gender* ?',
 'what is the affect on *stai* with respect to *day* ?',
 'what is the affect on *stai* wi

In [27]:
def split_t_sps_by_behav(t_sqs):
    """
    """

    psqi_table_sqs = t_sqs[0:10]
    stai_table_sqs = t_sqs[10:20]
    pa_table_sqs = t_sqs[20:30]
    na_table_sqs = t_sqs[30:40]
    swls_table_sqs = t_sqs[40:50]
    
    return psqi_table_sqs, stai_table_sqs, pa_table_sqs, na_table_sqs, swls_table_sqs

In [28]:
psqi_table_3_sqs, stai_table_3_sqs, pa_table_3_sqs, na_table_3_sqs, swls_table_3_sqs = split_t_sps_by_behav(table_3_sqs)
psqi_table_5_sqs, stai_table_5_sqs, pa_table_5_sqs, na_table_5_sqs, swls_table_5_sqs = split_t_sps_by_behav(table_5_sqs)

In [29]:
def reformat_sqs(table_3_sqs):
    """
    """
    
    psqi_base_3_sqs = table_3_sqs[0:6]
    rest_3_sqs = table_3_sqs[6:8]
    base_rest_3_sqs = psqi_base_3_sqs + rest_3_sqs
    walk_3_sqs = table_3_sqs[8:10]
    base_walk_3_sqs = psqi_base_3_sqs + walk_3_sqs
    
    return base_rest_3_sqs, base_walk_3_sqs

In [30]:
psqi_rest, psqi_walk = reformat_sqs(psqi_table_3_sqs)

In [31]:
stai_rest, stai_walk = reformat_sqs(stai_table_3_sqs)

In [32]:
pa_rest, pa_walk = reformat_sqs(pa_table_3_sqs)

In [33]:
na_rest, na_walk = reformat_sqs(na_table_3_sqs)

In [34]:
swls_rest, swls_walk = reformat_sqs(swls_table_3_sqs)

In [35]:
psqi_duration, psqi_efficiency = reformat_sqs(psqi_table_5_sqs)

In [36]:
stai_duration, stai_efficiency = reformat_sqs(stai_table_5_sqs)

In [37]:
pa_duration, pa_efficiency = reformat_sqs(pa_table_5_sqs)

In [38]:
na_duration, na_efficiency = reformat_sqs(na_table_5_sqs)

In [39]:
swls_duration, swls_efficiency = reformat_sqs(swls_table_5_sqs)

# Run Analysis

In [40]:
def get_ols(nurse_df, feat_col, igtb_col, index_col, split_sq, index):
    """
    nurse_df -- pd df (of either rest_off_df, step_ratio_off_df, duration_off_df, efficiency_off_df)
    feat_col -- str (of corresponding rest_off, step_ratio_off, duration_off, efficiency_off)
    igtb_col -- list (of either swls, stai, psqi, pan_PosAffect, pan_NegAffect)
    index_col -- list (of the row with intercept, age, gender, etc)
    split_sq -- list (of either table_3_sqs or table_5_sqs)
    index -- list (of the row with intercept, age, gender, etc)
    
    """
    
    data_df = nurse_df[[feat_col, 'less_than_40', 'female', 'day', 'night']+[igtb_col]]
    data_df = data_df.dropna()
    tmp_df = (data_df[[igtb_col, feat_col]] - data_df[[igtb_col, feat_col]].mean()) / data_df[[igtb_col, feat_col]].std()
    data_df.loc[list(tmp_df.index), [igtb_col, feat_col]] = tmp_df.loc[list(tmp_df.index), [igtb_col, feat_col]]

    # print(data_df)
    model = ols(igtb_col + ' ~ less_than_40 + female + day + ' + feat_col + ' + day : ' + feat_col, data=data_df).fit()
        
    if index in index_col:
        print(igtb_col, " x ", index)

        if index == 'r2':
            # print(model.summary())
            if model.f_pvalue < 0.01:
                p = model.rsquared_adj
                round_p = round(p, 2)
                print("p: ", round_p)
                # print(round(model.rsquared_adj), 2)
            elif model.f_pvalue < 0.05:
                p = model.rsquared_adj
                round_p = round(p, 2)
                print("p: ", round_p)
                # print(round(model.rsquared_adj), 2)
            else:
                p = model.rsquared_adj
                round_p = round(p, 2)
                print("p: ", round_p)
                # print(round(model.rsquared_adj), 2)
        elif index == 'observation':
            print(len(data_df))
        elif index == 'intercept':
            if model.pvalues[0] < 0.01:
                # print(round(model.params[0]), 2)
                p = model.params[0]
                round_p = round(p, 2)
                print("p: ", round_p)
            elif model.pvalues[0] < 0.05:
                # print(round(model.params[0]), 2)
                p = model.params[0]
                round_p = round(p, 2)
                print("p: ", round_p)
            else:
                # print(round(model.params[0]), 2)
                p = model.params[0]
                round_p = round(p, 2)
                print("p: ", round_p)
        else:
            # for print_index in ['r2', 'age', 'gender', 'day', 'feat_col', 'day*feat_col', 'night*feat_col']:
            if index == 'age':
                param_idx = 0
            elif index == 'gender':
                param_idx = 1
            elif index == 'day':
                param_idx = 2
                # index = 'day*'
                # print(split_sq[3])
                # split_sq[3] = 'day*'
                day_count = 2
                # print(day_count)
                # print()
            elif index == feat_col:
                param_idx = 3
            elif index == 'day*'+feat_col:
                param_idx = 4
            elif index == 'night*'+feat_col:
                param_idx = 5
                
            elif index == 'Rest-activity ratio (off-day)':
                # if index_col == 'rest_off':
                param_idx = 3
            elif index == 'Shift [day shift] x rest-activity ratio (off-day)':
                # if index_col == 'rest_off':
                param_idx = 4

            elif index == 'Walk-activity ratio (off-day)':
                # if index_col == 'rest_off':
                param_idx = 3
            elif index == 'Shift [day shift] x walk-activity ratio (off-day)':                
                param_idx = 4
                
            elif index == 'Sleep duration (off-day)':
                # if index_col == 'rest_off':
                param_idx = 3
            elif index == 'Shift [day shift] x sleep duration (off-day)':
                # if index_col == 'rest_off':
                param_idx = 4

            elif index == 'Sleep efficiency (off-day)':
                # if index_col == 'rest_off':
                param_idx = 3
            elif index == 'Shift [day shift] × sleep efficiency (off-day)':                
                param_idx = 4


            if model.pvalues[param_idx+1] < 0.01:
                p = model.params[param_idx+1]
                round_p = round(p, 2)
                print("p: ", round_p)
                # day_count += 1
            elif model.pvalues[param_idx+1] < 0.05:
                p = model.params[param_idx+1]
                round_p = round(p, 2)
                print("p: ", round_p)
            else:
                p = model.params[param_idx+1]
                round_p = round(p, 2)
                print("p: ", round_p)
                        

In [41]:
def table_three_and_five(df, feat_col, table_sqs, index_col, behav_col):
    """
    df -- pd df (of either rest_off_df, step_ratio_off_df, duration_off_df, efficiency_off_df)
    feat_col -- str (of corresponding rest_off, step_ratio_off, duration_off, efficiency_off)
    table_sqs -- list (of either table_3_sqs or table_5_sqs)

    index_col -- list (of the row with intercept, age, gender, etc)
    behav_col -- list (of either swls, stai, psqi, pan_PosAffect, pan_NegAffect)
    """
    
    # print(df)
    df = df.reset_index()
    # print(df)

    for table_sq_idx in range(len(table_sqs)):
        table_sq = table_sqs[table_sq_idx]
        print(table_sq)
        split_sq = table_sq.split('*')
        
        # print(split_sq)
        for specific_behav_var in behav_col:
            if specific_behav_var in split_sq:
                # print(specific_behav_var)
                
                for index in split_sq:
                    # print("index:", index)
                    get_ols(df, feat_col, specific_behav_var, index_col, split_sq, index)
        print()

In [42]:
base_rest_3_sqs = psqi_rest + stai_rest + pa_rest + na_rest + swls_rest
base_rest_3_sqs

['what is the affect on *psqi* with respect to *intercept* ?',
 'what is the affect on *psqi* with respect to *age* ?',
 'what is the affect on *psqi* with respect to *gender* ?',
 'what is the affect on *psqi* with respect to *day* ?',
 'what is the affect on *psqi* with respect to *observation* ?',
 'what is the affect on *psqi* with respect to *r2* ?',
 'what is the affect on *psqi* with respect to *Rest-activity ratio (off-day)* ?',
 'what is the affect on *psqi* with respect to *Shift [day shift] x rest-activity ratio (off-day)* ?',
 'what is the affect on *stai* with respect to *intercept* ?',
 'what is the affect on *stai* with respect to *age* ?',
 'what is the affect on *stai* with respect to *gender* ?',
 'what is the affect on *stai* with respect to *day* ?',
 'what is the affect on *stai* with respect to *observation* ?',
 'what is the affect on *stai* with respect to *r2* ?',
 'what is the affect on *stai* with respect to *Rest-activity ratio (off-day)* ?',
 'what is the a

In [43]:
# rest_off_df
# step_ratio_off_df
# duration_off_df
# efficiency_off_df

feat_col = 'rest_off'
table_three_and_five(nurse_df, feat_col, base_rest_3_sqs, index_col, behav_col)

what is the affect on *psqi* with respect to *intercept* ?
psqi  x  intercept
p:  0.38

what is the affect on *psqi* with respect to *age* ?
psqi  x  age
p:  -0.4

what is the affect on *psqi* with respect to *gender* ?
psqi  x  gender
p:  0.25

what is the affect on *psqi* with respect to *day* ?
psqi  x  day
p:  -0.69

what is the affect on *psqi* with respect to *observation* ?
psqi  x  observation
94

what is the affect on *psqi* with respect to *r2* ?
psqi  x  r2
p:  0.16

what is the affect on *psqi* with respect to *Rest-activity ratio (off-day)* ?
psqi  x  Rest-activity ratio (off-day)
p:  0.05

what is the affect on *psqi* with respect to *Shift [day shift] x rest-activity ratio (off-day)* ?
psqi  x  Shift [day shift] x rest-activity ratio (off-day)
p:  0.04

what is the affect on *stai* with respect to *intercept* ?
stai  x  intercept
p:  0.2

what is the affect on *stai* with respect to *age* ?
stai  x  age
p:  -0.23

what is the affect on *stai* with respect to *gender* ?
s

In [44]:
base_walk_3_sqs = psqi_walk + stai_walk + pa_walk + na_walk + swls_walk
base_walk_3_sqs

['what is the affect on *psqi* with respect to *intercept* ?',
 'what is the affect on *psqi* with respect to *age* ?',
 'what is the affect on *psqi* with respect to *gender* ?',
 'what is the affect on *psqi* with respect to *day* ?',
 'what is the affect on *psqi* with respect to *observation* ?',
 'what is the affect on *psqi* with respect to *r2* ?',
 'what is the affect on *psqi* with respect to *Walk-activity ratio (off-day)* ?',
 'what is the affect on *psqi* with respect to *Shift [day shift] x walk-activity ratio (off-day)* ?',
 'what is the affect on *stai* with respect to *intercept* ?',
 'what is the affect on *stai* with respect to *age* ?',
 'what is the affect on *stai* with respect to *gender* ?',
 'what is the affect on *stai* with respect to *day* ?',
 'what is the affect on *stai* with respect to *observation* ?',
 'what is the affect on *stai* with respect to *r2* ?',
 'what is the affect on *stai* with respect to *Walk-activity ratio (off-day)* ?',
 'what is the a

In [45]:
# rest_off_df
# step_ratio_off_df
# duration_off_df
# efficiency_off_df

feat_col = 'step_ratio_off'
table_three_and_five(nurse_df, feat_col, base_walk_3_sqs, index_col, behav_col)

what is the affect on *psqi* with respect to *intercept* ?
psqi  x  intercept
p:  0.42

what is the affect on *psqi* with respect to *age* ?
psqi  x  age
p:  -0.44

what is the affect on *psqi* with respect to *gender* ?
psqi  x  gender
p:  0.22

what is the affect on *psqi* with respect to *day* ?
psqi  x  day
p:  -0.7

what is the affect on *psqi* with respect to *observation* ?
psqi  x  observation
94

what is the affect on *psqi* with respect to *r2* ?
psqi  x  r2
p:  0.15

what is the affect on *psqi* with respect to *Walk-activity ratio (off-day)* ?
psqi  x  Walk-activity ratio (off-day)
p:  -0.05

what is the affect on *psqi* with respect to *Shift [day shift] x walk-activity ratio (off-day)* ?
psqi  x  Shift [day shift] x walk-activity ratio (off-day)
p:  0.02

what is the affect on *stai* with respect to *intercept* ?
stai  x  intercept
p:  0.32

what is the affect on *stai* with respect to *age* ?
stai  x  age
p:  -0.35

what is the affect on *stai* with respect to *gender* ?

In [46]:
base_duration_5_sqs = psqi_duration + stai_duration + pa_duration + na_duration + swls_duration
base_duration_5_sqs

['what is the affect on *psqi* with respect to *intercept* ?',
 'what is the affect on *psqi* with respect to *age* ?',
 'what is the affect on *psqi* with respect to *gender* ?',
 'what is the affect on *psqi* with respect to *day* ?',
 'what is the affect on *psqi* with respect to *observation* ?',
 'what is the affect on *psqi* with respect to *r2* ?',
 'what is the affect on *psqi* with respect to *Sleep duration (off-day)* ?',
 'what is the affect on *psqi* with respect to *Shift [day shift] x sleep duration (off-day)* ?',
 'what is the affect on *stai* with respect to *intercept* ?',
 'what is the affect on *stai* with respect to *age* ?',
 'what is the affect on *stai* with respect to *gender* ?',
 'what is the affect on *stai* with respect to *day* ?',
 'what is the affect on *stai* with respect to *observation* ?',
 'what is the affect on *stai* with respect to *r2* ?',
 'what is the affect on *stai* with respect to *Sleep duration (off-day)* ?',
 'what is the affect on *stai*

In [47]:
# rest_off_df
# step_ratio_off_df
# duration_off_df
# efficiency_off_df

feat_col = 'duration_off'
table_three_and_five(nurse_df, feat_col, base_duration_5_sqs, index_col, behav_col)

what is the affect on *psqi* with respect to *intercept* ?
psqi  x  intercept
p:  0.41

what is the affect on *psqi* with respect to *age* ?
psqi  x  age
p:  -0.48

what is the affect on *psqi* with respect to *gender* ?
psqi  x  gender
p:  0.19

what is the affect on *psqi* with respect to *day* ?
psqi  x  day
p:  -0.6

what is the affect on *psqi* with respect to *observation* ?
psqi  x  observation
93

what is the affect on *psqi* with respect to *r2* ?
psqi  x  r2
p:  0.14

what is the affect on *psqi* with respect to *Sleep duration (off-day)* ?
psqi  x  Sleep duration (off-day)
p:  0.15

what is the affect on *psqi* with respect to *Shift [day shift] x sleep duration (off-day)* ?
psqi  x  Shift [day shift] x sleep duration (off-day)
p:  -0.25

what is the affect on *stai* with respect to *intercept* ?
stai  x  intercept
p:  0.31

what is the affect on *stai* with respect to *age* ?
stai  x  age
p:  -0.35

what is the affect on *stai* with respect to *gender* ?
stai  x  gender
p: 

In [48]:
base_efficiency_5_sqs = psqi_efficiency + stai_efficiency + pa_efficiency + na_efficiency + swls_efficiency
base_efficiency_5_sqs

['what is the affect on *psqi* with respect to *intercept* ?',
 'what is the affect on *psqi* with respect to *age* ?',
 'what is the affect on *psqi* with respect to *gender* ?',
 'what is the affect on *psqi* with respect to *day* ?',
 'what is the affect on *psqi* with respect to *observation* ?',
 'what is the affect on *psqi* with respect to *r2* ?',
 'what is the affect on *psqi* with respect to *Sleep efficiency (off-day)* ?',
 'what is the affect on *psqi* with respect to *Shift [day shift] × sleep efficiency (off-day)* ?',
 'what is the affect on *stai* with respect to *intercept* ?',
 'what is the affect on *stai* with respect to *age* ?',
 'what is the affect on *stai* with respect to *gender* ?',
 'what is the affect on *stai* with respect to *day* ?',
 'what is the affect on *stai* with respect to *observation* ?',
 'what is the affect on *stai* with respect to *r2* ?',
 'what is the affect on *stai* with respect to *Sleep efficiency (off-day)* ?',
 'what is the affect on 

In [49]:
# rest_off_df
# step_ratio_off_df
# duration_off_df
# efficiency_off_df

feat_col = 'efficiency_off'
table_three_and_five(nurse_df, feat_col, base_efficiency_5_sqs, index_col, behav_col)

what is the affect on *psqi* with respect to *intercept* ?
psqi  x  intercept
p:  0.68

what is the affect on *psqi* with respect to *age* ?
psqi  x  age
p:  -0.46

what is the affect on *psqi* with respect to *gender* ?
psqi  x  gender
p:  0.19

what is the affect on *psqi* with respect to *day* ?
psqi  x  day
p:  -0.9

what is the affect on *psqi* with respect to *observation* ?
psqi  x  observation
93

what is the affect on *psqi* with respect to *r2* ?
psqi  x  r2
p:  0.2

what is the affect on *psqi* with respect to *Sleep efficiency (off-day)* ?
psqi  x  Sleep efficiency (off-day)
p:  -1.45

what is the affect on *psqi* with respect to *Shift [day shift] × sleep efficiency (off-day)* ?
psqi  x  Shift [day shift] × sleep efficiency (off-day)
p:  1.41

what is the affect on *stai* with respect to *intercept* ?
stai  x  intercept
p:  0.28

what is the affect on *stai* with respect to *age* ?
stai  x  age
p:  -0.38

what is the affect on *stai* with respect to *gender* ?
stai  x  gen