# Figure 2
- Py Kernel with rpy2 integration for R and Python

In [1]:
import pandas as pd
import sys
sys.path.insert(1, '/Users/brinkley97/Documents/development/')
import utils
import re

# Load Data

In [2]:
path_to_data =  "lab-kcad/datasets/tiles_dataset/" 
work_data = "figure_2/physical/diurnal_work_lm_6.csv.gz"
off_data = "figure_2/physical/diurnal_off_lm_6.csv.gz"

In [3]:
work_df = utils.load_gzip_csv_data(path_to_data, work_data)
off_df = utils.load_gzip_csv_data(path_to_data, off_data)

In [4]:
# work_df.keys(), work_df

In [5]:
# off_df

# Modify Dataframes

In [6]:
def modify_df(work_df, off_df):
    '''Modify Dataframes columns and values to match/flow with specific questions
    
    Arguments:
    work_df -- Dataframe
    off_df -- Dataframe
    
    Return: combined -- Dataframe
    '''
    
    
    # 0 is actually 11pm - 3am, ..., 5 is 7pm - 11pm
    # day starts with 2, which is 7am -11am, night starts with 5, which is 7pm - 11pm
    day_map = {'3rd': "11:00–15:00 h", '4th': "15:00–19:00 h", '5th': "19:00–23:00 h", '6th': "23:00–3:00 h", '1st': "3:00–7:00 h", '2nd': "7:00–11:00 h"}
    night_map = {'6th': '23:00–3:00 h', '1st': '3:00–7:00 h', '2nd': '7:00–11:00 h', '3rd': '11:00–15:00 h', '4th': '15:00–19:00 h', '5th': '19:00–23:00 h'}
    shift_map = {'day': 'day-shift', 'night': 'night-shift'}
    
    work_df_copy = work_df.copy()
    # work_df_copy = work_df_copy.drop(columns=["age",  "gender"])
    update_work_df = work_df_copy.replace({"time": day_map})
    update_work_df = update_work_df.replace({"shift": shift_map})
    
    off_df_copy = off_df.copy()
    update_off_df = off_df_copy.replace({"time": night_map})
    update_off_df = update_off_df.replace({"shift": shift_map})
    
    # combine_dfs = pd.concat([update_work_df, update_off_df])
    return update_work_df, update_off_df

In [7]:
# modified_dfs = modify_df(work_df, off_df)
# modified_dfs_copy = modified_dfs.copy()

In [8]:
# modified_dfs_copy

In [9]:
modified_work_df, modified_off_df = modify_df(work_df, off_df)

# Load Generated Specific Questions

In [10]:
base = "/Users/brinkley97/Documents/development/lab-kcad/"
path_to_questions = "tiles-day-night/my_code"
name_of_questions = "generateSpecificQuestions.ipynb"
questions = base + path_to_questions + name_of_questions

In [11]:
# %load questions
/Users/brinkley97/Documents/development/lab-kcad/tiles-day-night/my_codegenerateSpecificQuestions.ipynb

In [12]:
%run "generateSpecificQuestions.ipynb"

In [13]:
figure_2_specific_questions = fig_2

# Py + R Code Integration

In [14]:
%load_ext rpy2.ipython

In [15]:
import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages

# load and activate() bc if NOT error (Conversion 'py2rpy' not defined for objects of type '<class 'pandas.core.frame.DataFrame'>') will appear
from rpy2.robjects import pandas2ri
pandas2ri.activate()

In [16]:
report = rpackages.importr('report')

In [17]:
rpackages.importr('devtools')
rpackages.importr('ez')

rpy2.robjects.packages.Package as a <module 'ez'>

In [18]:
# def specific_questions_with_df(specific_question, work_df, off_df, word_to_find):
#     '''
    
#     Arguments:
#     specific_question -- string
#     df -- Dataframe
#     word_to_find -- string
#     '''
#     # print(specific_question)
    
#     '''
#     Start of integration
#     '''
#     r_objects = robjects.r
#     r_objects.source("figure2.R")
    
#     if word_to_find == "rest":
#         # print("word_to_find is ", word_to_find)
#         if "work" in specific_question.split("*"):
#             print("work", "\nspecific_question is ", specific_question)
#             rest_work = r_objects.diurnal_rest_work(work_df)
#             # print(rest_work, "\n")
#         if "off" in specific_question.split("*"):
#             print("off", "\nspecific_question is ", specific_question)
#             rest_off = r_objects.diurnal_rest_off(off_df)
#             # print(rest_off, "\n")
            
#     elif word_to_find == "step_ratio":
#         # print("word_to_find is ", word_to_find)
#         pass
#     else:
#         print("word not found")
#     return 

In [19]:
def figure_2(generated_specific_questions, original_work_df, original_off_df, work_df, off_df):
    '''Reproduce Table 2
    
    Arguments:
    generated_specific_questions -- list
    work_df -- Dataframe
    off_df -- Dataframe
    
    Return: specific_data -- Dataframe in list
    
    '''
    # print(type(generated_specific_questions), generated_specific_questions)
    # print(type(work_df), work_df)
    # print(type(off_df), off_df)
    
    combined_dfs = [work_df, off_df]
    for combined_df in range(len(combined_dfs)):
        specific_df = combined_dfs[combined_df]
        specific_df = specific_df.reset_index(drop=True)
        # print(specific_df)
        
        data_columns = list(specific_df.keys())
    #     print(data_columns)

        times = ['23:00–3:00 h', '3:00–7:00 h', '7:00–11:00 h', '11:00–15:00 h', '15:00–19:00 h', '19:00–23:00 h']

        for time in times:
            data_columns.append(time)

        for generated_specific_question in range(len(generated_specific_questions)):
            specific_question = generated_specific_questions[generated_specific_question]
            # print("\nspecific_question : ", specific_question, "*****")

            data_cols = ["time", "id", "shift"]
            time = []

            for data_column in data_columns: 

                if data_column in specific_question.split("*"):
                    # print(data_column, True)
                    data_cols.append(data_column)
                    time.append(data_column)

                else:
                    # print(data_column, False)
                    continue


            data_cols = data_cols[0:-1]
            set_time = time[-1]
            form_df = specific_df.loc[0:, data_cols]

            form_specific_df = form_df.set_index("time")
            # print(form_specific_df, "\n-----")

    #         form_specific_df = specific_work_df.loc[set_time]
            # print(form_specific_df, "\n-----")

            if len(form_specific_df.keys()) == 4:
    #             print(form_specific_df)

                update_specific_df = form_specific_df.reset_index()
                off_df = update_specific_df["work"] == "off"
                # off_df = update_specific_df["work"].nunique()
                # print(off_df[0])
            
                '''
                Start of integration
                '''

                r_objects = robjects.r
                r_objects.source("anova-figure2.R")
    #             print("\n=========================\n")
    #             work_time_shift_id = r_objects.work_model(update_work_df)
                print("\nspecific_question : ", specific_question, "*****")
                if list(update_specific_df.keys())[-1] == "rest" and list(update_specific_df.keys())[-2] == "work":
                    rest_work = r_objects.diurnal_rest_work(original_work_df)
                    # print("\n===========rest_work==============\n")
                    # print(rest_work)


    #             print("\nspecific_question : ", specific_question, "*****")

                if list(update_specific_df.keys())[-1] == "rest" and off_df[0] == True:
                    rest_off = r_objects.diurnal_rest_off(original_off_df)
                    # print("\n===========rest_off==============\n")
                    # print(rest_off)


    #             print("\nspecific_question : ", specific_question, "*****")

                if list(update_specific_df.keys())[-1] == "step_ratio" and list(update_specific_df.keys())[-2] == "work":
                    step_ratio_work = r_objects.diurnal_step_ratio_work(original_work_df)
                    # print("\n===========step_ratio_work==============\n")
                    # print(step_ratio_work)


    #             print("\nspecific_question : ", specific_question, "*****")

                if list(update_specific_df.keys())[-1] == "step_ratio" and off_df[0] == True:
                    step_ratio_off = r_objects.diurnal_step_ratio_off(original_off_df)
                    # print("\n===========step_ratio_off==============\n")
                    print(step_ratio_off)
                
                    

In [20]:
figure_2(fig_2, work_df, off_df, modified_work_df, modified_off_df)

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *23:00–3:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *3:00–7:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *7:00–11:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *11:00–15:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *15:00–19:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *19:00–23:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *step_ratio* between the hours of *23:00–3:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *step_ratio* between the hours of *3:00–7:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *step_ratio* between the hours of *7:00–11:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *step_ratio* between the hours of *11:00–15:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *step_ratio* between the hours of *15:00–19:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *step_ratio* between the hours of *19:00–23:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *run_ratio* between the hours of *23:00–3:00 h* ? *****

specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *run_ratio* between the hours of *3:00–7:00 h* ? *****

specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *run_ratio* between the hours of *7:00–11:00 h* ? *****

specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *run_ratio* between the hours of *11:00–15:00 h* ? *****

specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *run_ratio* between the hours of *15:00–19:00 h* ? 

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *3:00–7:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *7:00–11:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *11:00–15:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *15:00–19:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *19:00–23:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *step_ratio* between the hours of *23:00–3:00 h* ? *****


R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



[1] "********* START : Step/Walk Off, Bottom Right *********"
$ANOVA
      Effect DFn DFd         F             p p<.05         ges
2      shift   1 102   1.17411  2.811132e-01       0.006437382
3       time   5 510 178.45349 1.530516e-109     * 0.433358363
4 shift:time   5 510  24.64066  2.953608e-22     * 0.095514170

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2434794 3.878865e-23     *
4 shift:time 0.2434794 3.878865e-23     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.6246972 1.608110e-69         * 0.6467037 7.216755e-72         *
4 shift:time 0.6246972 8.435836e-15         * 0.6467037 3.074784e-15         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01657207 -0.03291582 -0.000228315 0.0469 *  

$time
                diff       lwr.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



[1] "********* START : Step/Walk Off, Bottom Right *********"
$ANOVA
      Effect DFn DFd         F             p p<.05         ges
2      shift   1 102   1.17411  2.811132e-01       0.006437382
3       time   5 510 178.45349 1.530516e-109     * 0.433358363
4 shift:time   5 510  24.64066  2.953608e-22     * 0.095514170

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2434794 3.878865e-23     *
4 shift:time 0.2434794 3.878865e-23     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.6246972 1.608110e-69         * 0.6467037 7.216755e-72         *
4 shift:time 0.6246972 8.435836e-15         * 0.6467037 3.074784e-15         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01657207 -0.03291582 -0.000228315 0.0469 *  

$time
                diff       lwr.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



[1] "********* START : Step/Walk Off, Bottom Right *********"
$ANOVA
      Effect DFn DFd         F             p p<.05         ges
2      shift   1 102   1.17411  2.811132e-01       0.006437382
3       time   5 510 178.45349 1.530516e-109     * 0.433358363
4 shift:time   5 510  24.64066  2.953608e-22     * 0.095514170

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2434794 3.878865e-23     *
4 shift:time 0.2434794 3.878865e-23     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.6246972 1.608110e-69         * 0.6467037 7.216755e-72         *
4 shift:time 0.6246972 8.435836e-15         * 0.6467037 3.074784e-15         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01657207 -0.03291582 -0.000228315 0.0469 *  

$time
                diff       lwr.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



[1] "********* START : Step/Walk Off, Bottom Right *********"
$ANOVA
      Effect DFn DFd         F             p p<.05         ges
2      shift   1 102   1.17411  2.811132e-01       0.006437382
3       time   5 510 178.45349 1.530516e-109     * 0.433358363
4 shift:time   5 510  24.64066  2.953608e-22     * 0.095514170

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2434794 3.878865e-23     *
4 shift:time 0.2434794 3.878865e-23     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.6246972 1.608110e-69         * 0.6467037 7.216755e-72         *
4 shift:time 0.6246972 8.435836e-15         * 0.6467037 3.074784e-15         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01657207 -0.03291582 -0.000228315 0.0469 *  

$time
                diff       lwr.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



[1] "********* START : Step/Walk Off, Bottom Right *********"
$ANOVA
      Effect DFn DFd         F             p p<.05         ges
2      shift   1 102   1.17411  2.811132e-01       0.006437382
3       time   5 510 178.45349 1.530516e-109     * 0.433358363
4 shift:time   5 510  24.64066  2.953608e-22     * 0.095514170

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2434794 3.878865e-23     *
4 shift:time 0.2434794 3.878865e-23     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.6246972 1.608110e-69         * 0.6467037 7.216755e-72         *
4 shift:time 0.6246972 8.435836e-15         * 0.6467037 3.074784e-15         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01657207 -0.03291582 -0.000228315 0.0469 *  

$time
                diff       lwr.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



[1] "********* START : Step/Walk Off, Bottom Right *********"
$ANOVA
      Effect DFn DFd         F             p p<.05         ges
2      shift   1 102   1.17411  2.811132e-01       0.006437382
3       time   5 510 178.45349 1.530516e-109     * 0.433358363
4 shift:time   5 510  24.64066  2.953608e-22     * 0.095514170

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2434794 3.878865e-23     *
4 shift:time 0.2434794 3.878865e-23     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.6246972 1.608110e-69         * 0.6467037 7.216755e-72         *
4 shift:time 0.6246972 8.435836e-15         * 0.6467037 3.074784e-15         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01657207 -0.03291582 -0.000228315 0.0469 *  

$time
                diff       lwr.