# Figure 2
- Py Kernel

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import sys
sys.path.insert(1, '/Users/brinkley97/Documents/development/')
import utils

# Load Data

In [2]:
path_to_data =  "lab-kcad/datasets/tiles_dataset/" 
work_data = "figure_2/physical/diurnal_work_lm_6.csv.gz"
off_data = "figure_2/physical/diurnal_off_lm_6.csv.gz"

In [3]:
work_df = utils.load_gzip_csv_data(path_to_data, work_data)
off_df = utils.load_gzip_csv_data(path_to_data, off_data)

# Modify Dataframes

In [4]:
def modify_df(work_df, off_df):
    '''Modify Dataframes columns and values to match/flow with specific questions
    
    Arguments:
    work_df -- Dataframe
    off_df -- Dataframe
    
    Return: combined -- Dataframe
    '''

    day_map = {'4th': "11:00–15:00 h", '5th': "15:00–19:00 h", '6th': "19:00–23:00 h", '1st': "23:00–3:00 h", '2nd': "3:00–7:00 h", '3rd': "7:00–11:00 h"}
    night_map = {'1st': '23:00–3:00 h', '2nd': '3:00–7:00 h', '3rd': '7:00–11:00 h', '4th': '11:00–15:00 h', '5th': '15:00–19:00 h', '6th': '19:00–23:00 h'}
    
    
    work_df_copy = work_df.copy()
    work_df_copy = work_df_copy.drop(columns=["id", "age",  "gender"])
    update_work_df = work_df_copy.replace({"time": day_map})
    
    off_df_copy = off_df.copy()
    off_df_copy = off_df_copy.drop(columns=["id", "age",  "gender"])
    update_off_df = off_df_copy.replace({"time": night_map})

    combine_dfs = pd.concat([update_work_df, update_off_df])
    combine_dfs = combine_dfs.reset_index(drop=True)
    return update_work_df, update_off_df, combine_dfs

In [5]:
modified_work_df, modified_off_df, combine_dfs = modify_df(work_df, off_df)

# Load Generated Specific Questions

In [6]:
base = "/Users/brinkley97/Documents/development/lab-kcad/"
path_to_questions = "tiles-day-night/my_code"
name_of_questions = "generateSpecificQuestions.ipynb"
questions = base + path_to_questions + name_of_questions

In [7]:
# %load questions
/Users/brinkley97/Documents/development/lab-kcad/tiles-day-night/my_codegenerateSpecificQuestions.ipynb

In [8]:
%run "generateSpecificQuestions.ipynb"

In [9]:
figure_2_specific_questions = fig_2

In [10]:
def specific_questions_with_df(specific_question, specific_question_by_words, work_df, off_df, word_to_find):
    '''Calculate the p-value for... 
        - work-time-day-rest
        - work-time-night-rest
        - off-time-day-rest
        - off-time-night-rest
        - work-time-day-step_ratio
        - work-time-night-step_ratio
        - off-time-day-step_ratio
        - off-time-night-step_ratio
        
    Run "code/physical/plot_diurnal_data.py" to check results
    
    Arguments:
    specific_question -- string
    specific_question_by_words -- list
    work_df -- Dataframe
    off_df -- Dataframe
    word_to_find -- string
    
    Return:
    p_val_df -- Dataframe of p_values
    
    '''
    
    print(specific_question)
    # print(rest_or_step)
    filter_shifts = ["day", "night"]
    filter_times = list(key_words_in_study.values())[7]
    p_val_df = pd.DataFrame(index=filter_times)
    # print(work_df)
    work_df = work_df.reset_index(drop=True)
    # print(work_df)
    
    group_by_time_work_df = work_df.set_index(["time"])
    # print(group_by_time_work_df)
    
    off_df = off_df.reset_index(drop=True)
    # print(off_df)
    
    group_by_time_off_df = off_df.set_index(["time"])
    
    for time_idx in range(len(specific_question_by_words)):
        # print(time_idx)
        time = specific_question_by_words[time_idx]
        if time in filter_times:
            
            '''
            Work DF
            '''
            work_by_time = group_by_time_work_df.loc[time]
            # print("\n", work_by_time, "---")
                
            shift_day = work_by_time.loc[work_by_time["shift"] == "day"]
            # print(time, "\n", "work", "day", shift_day)
                
            shift_night = work_by_time.loc[work_by_time["shift"] == "night"]
            # print(time, "\n", "work", "night", shift_night)
                
            
            stats_value, work_p = stats.ttest_ind(np.array(shift_day[word_to_find]), np.array(shift_night[word_to_find]))
            print("work", time, work_p)
            
            p_val_df.loc[time, "p-val"] = work_p
            
            '''
            Off DF
            '''
            off_by_time = group_by_time_off_df.loc[time]
            # print("\n", off_by_time, "---")
                
            off_shift_day = off_by_time.loc[off_by_time["shift"] == "day"]
            # print(time, "\n", "off", "day", off_shift_day)
                
            off_shift_night = off_by_time.loc[off_by_time["shift"] == "night"]
            # print(time, "\n", "off", "night", off_shift_night)
                
            
            stats_value, off_p = stats.ttest_ind(np.array(off_shift_day[word_to_find]), np.array(off_shift_night[word_to_find]))
            print("off", time, off_p)
            print("------------------------------------------")
            p_val_df.loc[time, "p-val"] = off_p
            

    return p_val_df

In [11]:
def figure_2(generated_specific_questions, work_df, off_df):
    '''Reproduce Table 2
    
    Arguments:
    generated_specific_questions -- list
    work_df -- Dataframe
    off_df -- Dataframe
    
    
    Return:
    
    '''
    
    p_values = []
    filter_physiological = ["rest", "step_ratio"]
    for generated_specific_question_idx in range(len(generated_specific_questions)):
        specific_question = generated_specific_questions[generated_specific_question_idx]
        # print(generated_specific_question_idx, "th specific_question : ", specific_question)
        specific_question_by_words = specific_question.split("*")
        # print(specific_question_by_words)
        
#         for work_df_column in work_df_columns:
#             if work_df_column in specific_question.split("*"):
#                 print(work_df_column, True)
        work_df = work_df.reset_index(drop=True)
        
        for filter_physiological_idx in range(len(filter_physiological)):
            
            word_to_find = filter_physiological[filter_physiological_idx]
            if word_to_find in specific_question_by_words:
                # print(word_to_find)
                modified_work_df = work_df.loc[0:, ["work", "time", word_to_find, "shift"]]
                # print(generated_specific_question_idx, specific_question)
                p_val = specific_questions_with_df(specific_question, specific_question_by_words, modified_work_df, off_df, word_to_find)
                # print(p_val)
                # p_val_df.loc[filter_times[i], "p-val"] = p_val
                # i = i + 1
                # p_values.append(p_val)
                # print(p_val)
    return 

In [12]:
figure_2(figure_2_specific_questions, modified_work_df, modified_off_df)

on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *23:00–3:00 h* ?
work 23:00–3:00 h 0.0015494236117052733
off 23:00–3:00 h 0.8501148450822772
------------------------------------------
on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *3:00–7:00 h* ?
work 3:00–7:00 h 0.034467656157217745
off 3:00–7:00 h 0.3176447299854959
------------------------------------------
on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *7:00–11:00 h* ?
work 7:00–11:00 h 8.619878000514936e-08
off 7:00–11:00 h 4.227081558573983e-05
------------------------------------------
on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between th