# Storing data
- questions -- A dictionary of question templates as the keys and values as list of specific questions
- key_words_in_study -- A dictionary of the variables used in the study. Keys are the main variables and values are the sub variables. 
    - keys :: values
        - group_1, group_2, behavioral, physiological values are lists
        - demographic and xxx values are dict with their values as lists (see function to convert to demographic :: values are list)
- def reformat_key_words_in_study -- Updates the structure of the key_words_in_study and stores in updated_key_words_in_study

In [1]:
# previously questions
t_test_questions = {
                    'what are differences in primarily *group_1* nurses and primarily *group_2* nurses for *demographic* ?': [],
                    'what are differences in primarily *group_1* nurses and primarily *group_2* nurses for *behavioral* ?': [],
}

# three_way_anova_questions = {
#             'what are some differences between primarily *group_1* nurses and primarily *group_2* nurses on daily *physiological* with the covariate *demographic* and the covariate *demographic*?': []
# }

In [2]:
t_test_questions

{'what are differences in primarily *group_1* nurses and primarily *group_2* nurses for *demographic* ?': [],
 'what are differences in primarily *group_1* nurses and primarily *group_2* nurses for *behavioral* ?': []}

In [3]:
# types_of_analysis = {
#     't_test': [],
#     'two_way_anova': [],
#     'three_way_anova': [],
# }

In [4]:
# types_of_analysis

In [5]:
key_words_in_study = {
                        'group_1': 'day-shift',
                        'group_2': 'night-shift',
                        'demographic':  {
                            'Gender': ['Female', 'Male'],
                            'Age': ['< 40 Years', '>= 40 Years'],
                            'Educ': ['Some college or College', 'Graduate'],
                            'native_lang': ['English', 'non-english'],
                        },
                        'behavioral': ['sleep quality (PSQI)', 'affect (PANAS)', 'anxiety (STAI)', 'life satisfaction (SWLS)', 'personality (BFI-2)'],
                        'physiological': ['step count', 'heart rate', 'sleep', 'average rest-activity ratio', 'average walk activity ratio', 'average vigorous activity duration']
    
}

In [6]:
for key, value in key_words_in_study.items():
    print(key, " :: ", value)

group_1  ::  day-shift
group_2  ::  night-shift
demographic  ::  {'Gender': ['Female', 'Male'], 'Age': ['< 40 Years', '>= 40 Years'], 'Educ': ['Some college or College', 'Graduate'], 'native_lang': ['English', 'non-english']}
behavioral  ::  ['sleep quality (PSQI)', 'affect (PANAS)', 'anxiety (STAI)', 'life satisfaction (SWLS)', 'personality (BFI-2)']
physiological  ::  ['step count', 'heart rate', 'sleep', 'average rest-activity ratio', 'average walk activity ratio', 'average vigorous activity duration']


In [7]:
# def convert_demo_values(key_words_in_study):
#     demo_values = list(key_words_in_study.values())[2]
#     print("demo_values : ", demo_values)
#     # demo_values = sum(demo_values, [])
#     # print("demo_values: ", demo_values)
    
#     # for demo_value in demo_values:
#     #     demo_var = demo_values[demo_value]
        
#     return demo_values["gender"]

In [8]:
# convert_demo_values(key_words_in_study)

## Reformatting key words in study

In [9]:
def reformat_key_words_in_study(key_words_in_study):
   
    '''Convert the values of that specific key in key words in study
    
    Argument:
    key_words_in_study -- dict
        keys -- str
        values -- 
            str : group_1, group_2 (remain)
            dict : gender, age, highest degree, native language (convert to list)
            list : behavioral, physiological (remain)
    Functions:
    convert_demo_vars
    '''
    demo_vars = []
    updated_key_words_in_study = key_words_in_study.copy()
    replace_with_vars = list(updated_key_words_in_study.values()) 
    demo_keys = list(replace_with_vars[2].keys())
    # demo_keys = sum(demo_keys, [])
    demo_vals = list(replace_with_vars[2].values())
    demo_vals = sum(demo_vals, [])
    demo_vars.append(demo_keys)
    demo_vars.append(demo_vals)
    print("demo_vars : ", demo_vars)
    
    # if demo_val

    updated_key_words_in_study["demographic"] = sum(demo_vars, [])
        
    return updated_key_words_in_study

In [10]:
updated_key_words_in_study = reformat_key_words_in_study(key_words_in_study)
updated_key_words_in_study

demo_vars :  [['Gender', 'Age', 'Educ', 'native_lang'], ['Female', 'Male', '< 40 Years', '>= 40 Years', 'Some college or College', 'Graduate', 'English', 'non-english']]


{'group_1': 'day-shift',
 'group_2': 'night-shift',
 'demographic': ['Gender',
  'Age',
  'Educ',
  'native_lang',
  'Female',
  'Male',
  '< 40 Years',
  '>= 40 Years',
  'Some college or College',
  'Graduate',
  'English',
  'non-english'],
 'behavioral': ['sleep quality (PSQI)',
  'affect (PANAS)',
  'anxiety (STAI)',
  'life satisfaction (SWLS)',
  'personality (BFI-2)'],
 'physiological': ['step count',
  'heart rate',
  'sleep',
  'average rest-activity ratio',
  'average walk activity ratio',
  'average vigorous activity duration']}

In [11]:
key_words_in_study

{'group_1': 'day-shift',
 'group_2': 'night-shift',
 'demographic': {'Gender': ['Female', 'Male'],
  'Age': ['< 40 Years', '>= 40 Years'],
  'Educ': ['Some college or College', 'Graduate'],
  'native_lang': ['English', 'non-english']},
 'behavioral': ['sleep quality (PSQI)',
  'affect (PANAS)',
  'anxiety (STAI)',
  'life satisfaction (SWLS)',
  'personality (BFI-2)'],
 'physiological': ['step count',
  'heart rate',
  'sleep',
  'average rest-activity ratio',
  'average walk activity ratio',
  'average vigorous activity duration']}

In [12]:
def convert_question_templates(t_test_questions):
    '''Convert question templates to their own individual lists
    
    Arguments:
    t_test_questions -- dict
    
    Return:
    2D list with each index being a single question
    '''
    # print(type(questions), questions)
    
    # Convert from dict --> list
    question_templates = list(t_test_questions.keys())
    # print(type(question_templates), question_templates)
    
    specific_questions = []
    
    # Get specific question template
    for question_template in range(len(question_templates)):
        
        specific_question_templates = []
        
        template_of_question = question_templates[question_template]
        # print(type(template_of_question), template_of_question)
        specific_question_templates.append(template_of_question)
        # print(type(specific_question_templates), specific_question_templates)
        specific_questions.append(specific_question_templates)
    return specific_questions

In [13]:
converted_question_templates = convert_question_templates(t_test_questions)
converted_question_templates

[['what are differences in primarily *group_1* nurses and primarily *group_2* nurses for *demographic* ?'],
 ['what are differences in primarily *group_1* nurses and primarily *group_2* nurses for *behavioral* ?']]

In [14]:
# def replacement(specific_question_template, updated_key_words_in_study, key, find_matching_study_variable, matching_study_variables_start_index):
#     '''Find matching study variables and replace with matching study values
    
#     Arguments:
#     specific_question_template -- string
#     updated_key_words_in_study_keys -- dict
#     find_matching_study_variable -- string
#     matching_study_variables_start_index -- int
    
#     '''
   
        
#     return specific_question_template

In [15]:
def generate_specific_questions(converted_question_templates, updated_key_words_in_study):
    '''Cycle through each question template and return the specific questions
    
    Arguments:
    converted_question_templates -- 2D list of question templates
    updated_key_words_in_study -- dict
    
    Functions:
    convert_question_templates -- dict --> list
    '''
    
    updated_key_words_in_study_keys = list(updated_key_words_in_study.keys())
    updated_key_words_in_study_values = list(updated_key_words_in_study.values())
    question_templates = converted_question_templates
    # print(type(updated_key_words_in_study), updated_key_words_in_study, "\n\n",
    #       type(updated_key_words_in_study_keys), updated_key_words_in_study_keys, "\n\n",
    #       type(updated_key_words_in_study_values), updated_key_words_in_study_values, "\n\n",
    #       type(converted_question_templates), converted_question_templates)

    specific_questions = []
    
    # Get specific question template
    for question_template in range(len(question_templates)):
        
        # Reset every new specific_question_template
        
        specific_question_template = ''.join(question_templates[question_template])
        # print("...", type(specific_question_template), specific_question_template)   
        
        
        # Replace matching variables 
        for index_of_key, key in enumerate(updated_key_words_in_study_keys): 
            # print((index_of_key, key))
            
            matching_study_variables_start_index = specific_question_template.find(key)
            # print(matching_study_variables_start_index)
            
            
            if matching_study_variables_start_index != -1:
                specific_question_templates = []
                # key_word_is_in_sqt = True
                # print((index_of_key, key))
                # print(key_word_is_in_sqt)
                value_in_key = updated_key_words_in_study[key]
                # print("\n",(index_of_key, key), "is of type", type(value_in_key))
                key_words_values_is_type_list = isinstance(updated_key_words_in_study[key], list) 
                
                # print("...", type(specific_question_template), specific_question_template)
                if key_words_values_is_type_list == True:
                    for value in value_in_key:
                        # value = 
                        # print(value, value_in_key)
                        replace = specific_question_template.replace(key, value)
                        # print("\n>>", replace)
                        key = value
                        # print("key: ", key)
                        specific_question_template = replace
                        specific_question_templates.append(specific_question_template)
                        # print("specific_question_templates : ", specific_question_templates)
                        
                else:
                    replace = specific_question_template.replace(key, value_in_key)
                    # print("\n>>", replace)
                    # key = value
                    # print("key: ", key)
                    specific_question_template = replace
                    specific_question_templates.append(specific_question_template)
                    
                    # print("specific_question_templates : ", specific_question_templates)
            else:
                # key_word_is_in_sqt = False
                # print(key_word_is_in_sqt)
                continue
        specific_questions.append(specific_question_templates)
            
        # print(specific_questions)
    return specific_questions

In [16]:
specific_questions = generate_specific_questions(converted_question_templates, updated_key_words_in_study)
specific_questions

[['what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Gender* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Age* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Educ* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *native_lang* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Female* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Male* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *< 40 Years* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *>= 40 Years* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Some college or College* ?',
  'w

# Update original dictionary with specific questions

In [17]:
def update_questions(t_test_questions):
    '''
    Arguments:
    questions -- dict
    
    '''
    question_keys = list(t_test_questions.keys())
    # print(question_keys)
    # types_of_analysis_keys = list(types_of_analysis.keys())
   

    idx = 0 
    for specific_question in range(len(specific_questions)):
        question_key = question_keys[idx]
        t_test_questions[question_key] = specific_questions[specific_question]
                                  
                                  
        idx = idx + 1 
    
    return t_test_questions

In [18]:
update_questions(t_test_questions)

{'what are differences in primarily *group_1* nurses and primarily *group_2* nurses for *demographic* ?': ['what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Gender* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Age* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Educ* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *native_lang* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Female* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *Male* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *< 40 Years* ?',
  'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *>= 40 Years* ?',
  'what are differences 

In [19]:
list(t_test_questions.values())[1]

['what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *sleep quality (PSQI)* ?',
 'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *affect (PANAS)* ?',
 'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *anxiety (STAI)* ?',
 'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *life satisfaction (SWLS)* ?',
 'what are differences in primarily *day-shift* nurses and primarily *night-shift* nurses for *personality (BFI-2)* ?']