# Reproduce Figure 3
- Py kernel
- Verify by looking at [Figure 3 in the Original Analysis paper](https://www.nature.com/articles/s41598-021-87029-w?proof=t%25C2%25A0) and by running `python3 plot_sleep_data.py` [TGN10+ version](https://github.com/Brinkley97/TGN10/blob/main/code/sleep/plot_sleep_data.py) or [Original version](https://github.com/usc-sail/tiles-day-night/blob/main/code/sleep/plot_sleep_data.py)

In [1]:
import pandas as pd
import numpy as np

# Load Data

In [2]:
path_to_data = "/Users/brinkley97/Documents/development/lab-kcad/datasets/tiles_dataset/figure_3/sleep/sleep.csv.gz"
sleep_stats_df = pd.read_csv(path_to_data)
# sleep_stats_df.keys()

# Modify Pandas DataFrames

In [3]:
def modify_df(sleep_stats_df):
    '''Modify DataFrame columns by splitting into
        day-work
        day-off
        night-work
        night-off
    
    Arguments:
    sleep_stats_df -- Pandas DataFrame

    Return: 
    sleep_dfs -- list of day-work, day-off, night-work, night-off
    
    '''
    sleep_dfs = []
    
    day_sleep_df = sleep_stats_df.loc[sleep_stats_df['shift'] == 'day']
    day_sleep_df = day_sleep_df.rename(columns={"shift": "day-shift"})
    
    day_sleep_work_df = day_sleep_df.loc[day_sleep_df['work'] == 'workday']
    day_sleep_off_df = day_sleep_df.loc[day_sleep_df['work'] == 'offday']
    
    # print(day_sleep_work_df.keys(), "\n")
    # print(day_sleep_off_df.keys(), "\n")
    
    night_sleep_df = sleep_stats_df.loc[sleep_stats_df['shift'] == 'night']
    night_sleep_df = night_sleep_df.rename(columns={"shift": "night-shift"})
    
    night_sleep_work_df = night_sleep_df.loc[night_sleep_df['work'] == 'workday']
    night_sleep_off_df = night_sleep_df.loc[night_sleep_df['work'] == 'offday']
    
    # print(night_sleep_work_df.keys(), "\n")
    # print(night_sleep_off_df.keys(), "\n")
    
    # print()
    
    return day_sleep_work_df, day_sleep_off_df, night_sleep_work_df, night_sleep_off_df

In [4]:
day_sleep_work_df, day_sleep_off_df, night_sleep_work_df, night_sleep_off_df = modify_df(sleep_stats_df)
# day_sleep_work_df

# Load Generated Specific Questions

In [5]:
base = "/Users/brinkley97/Documents/development/lab-kcad/"
generated_sq_file = "TGN10Plus/generateSpecificQuestions.ipynb"
figure_3_specific_questions_path = base + generated_sq_file

In [6]:
# %load figure_3_specific_questions_path
/Users/brinkley97/Documents/development/lab-kcad/TGN10Plus/generateSpecificQuestions.ipynb

In [7]:
%run "../generateSpecificQuestions.ipynb"

In [8]:
# figure_3_sqs, ontology_mappings

# Run Analysis

## Median Sleep

In [9]:
def calculate_median_sleep(specific_sleep_df, specific_f3_ontology_values):
    '''Calculate the average, minimum, and maximum sleep and wake hours
    
    Arguments:
    specific_sleep_df -- pd DataFrame
    
    Return:
    median sleep -- 2D py list
    '''
    
    median_sleep = []
    store_sleep_values = []
    for specific_awake_hour in np.array(specific_sleep_df['start']):
        # print(specific_awake_hour)
        if specific_awake_hour >= 12:
            # print("specific_awake_hour >= 12 : ", specific_awake_hour)
            store_sleep_values.append(specific_awake_hour - 24)
            # print("specific_awake_hour - 24 : ", specific_awake_hour - 24)
        else:
            # print("DATA", specific_awake_hour)
            store_sleep_values.append(specific_awake_hour)
            
    mean_sleep_time = np.round(np.nanmean(specific_sleep_df['start']), 2)
    min_sleep_time = np.round(np.nanmin(specific_sleep_df['start']), 2)
    max_sleep_time = np.round(np.nanmax(specific_sleep_df['start']), 2)
    
    sleep_time = mean_sleep_time, min_sleep_time, max_sleep_time
    median_sleep.append(sleep_time)
    
    mean_wake_time = np.round(np.nanmean(store_sleep_values), 2)
    min_wake_time = np.round(np.nanmin(store_sleep_values) + 24, 2)
    max_wake_time = np.round(np.nanmax(store_sleep_values), 2)
    
    wake_time = mean_wake_time, min_wake_time, max_wake_time
    median_sleep.append(wake_time)
    
    return median_sleep

In [10]:
def match_ontologies_with_sq_words(ontology_mappings, f3_specific_question):
    '''Find onotology mappings within figure 3's specific questions
    
    Arguments:
    ontology_mappings -- py dict
    f3_specific_question -- py string
    
    Return:
    ontology mappings that are in the specific questions -- list
    '''

    ontology_mappings_values = list(ontology_mappings.values())
    # print(ontology_mappings_values)
    
    specific_question_by_words = f3_specific_question.split("*")
    om_in_specific_questions = []
    
    
    for ontology_mappings_values_idx in range(len(ontology_mappings_values)):
        matching_ontology = ontology_mappings_values[ontology_mappings_values_idx]
        
        if matching_ontology in specific_question_by_words:
            
            # print(key_word, True)
            om_in_specific_questions.append(matching_ontology)
            
        else:
            continue       
    return om_in_specific_questions

In [11]:
def figure_three(figure_3_specific_questions, day_sleep_work_df, day_sleep_off_df, night_sleep_work_df, night_sleep_off_df):
    '''Reproduce Figure 3
    
    Arguments:
    figure_3_specific_questions -- py list
    day_sleep_work_df -- pd Dataframe
    day_sleep_off_df -- pd Dataframe
    night_sleep_work_df -- pd Dataframe
    night_sleep_off_df -- pd Dataframe
    
    Functions:
    match_ontologies_with_sq_words()
    calculate_median_sleep
    '''
    
    for figure_3_specific_question_idx in range(len(figure_3_specific_questions)):
        f3_specific_question = figure_3_specific_questions[figure_3_specific_question_idx]
        print(figure_3_specific_question_idx, "f3_specific_question : ", f3_specific_question)
            
        specific_f3_ontology_values = match_ontologies_with_sq_words(ontology_mappings, f3_specific_question)
        print("matching vars: ", specific_f3_ontology_values)
        
        if "day-shift" in specific_f3_ontology_values: 
            if "work" in specific_f3_ontology_values:
                day_shift_at_work = calculate_median_sleep(day_sleep_work_df, specific_f3_ontology_values)
                print(day_shift_at_work)
            else:
                day_shift_off_work = calculate_median_sleep(day_sleep_off_df, specific_f3_ontology_values)
                print(day_shift_off_work)
                
        elif "night-shift" in specific_f3_ontology_values:
            if "work" in specific_f3_ontology_values:
                night_shift_at_work = calculate_median_sleep(night_sleep_work_df, specific_f3_ontology_values)
                print(night_shift_at_work)
            else: 
                night_shift_off_work = calculate_median_sleep(night_sleep_off_df, specific_f3_ontology_values)
                print(night_shift_off_work)

In [12]:
figure_three(figure_3_sqs, day_sleep_work_df, day_sleep_off_df, night_sleep_work_df, night_sleep_off_df)

0 f3_specific_question :  what are differences between sleep *start* and sleep *end* on *work* days for primarily *day-shift* nurses ?
matching vars:  ['day-shift', 'work', 'start', 'end']
[(20.45, 0.07, 23.95), (-1.02, 21.53, 1.43)]
1 f3_specific_question :  what are differences between sleep *start* and sleep *end* on *off* days for primarily *day-shift* nurses ?
matching vars:  ['day-shift', 'off', 'start', 'end']
[(18.42, 0.05, 23.99), (-0.69, 21.57, 1.83)]
2 f3_specific_question :  what are differences between sleep *start* and sleep *end* on *work* days for primarily *night-shift* nurses ?
matching vars:  ['night-shift', 'work', 'start', 'end']
[(9.88, 8.62, 11.83), (9.88, 32.62, 11.83)]
3 f3_specific_question :  what are differences between sleep *start* and sleep *end* on *off* days for primarily *night-shift* nurses ?
matching vars:  ['night-shift', 'off', 'start', 'end']
[(7.54, 0.07, 23.9), (1.54, 21.58, 11.43)]
