In [1]:
import pandas as pd
import numpy as np

Please reference [this link](https://www.goodmedicine.org.uk/files/assessment,%20pittsburgh%20psqi.pdf) for scoring details.

## Response Scoring Example

### Form A

Example data:

In [13]:
example_response_data = pd.read_csv("./responses/form_a_example_responses.csv")
example_response_data = example_response_data.set_axis(
    [
        "timestamp", "participant_ID", "participant_gender", "participant_age", "participant_occupation",
        "q1", "q2", "q3", "q4", "q5a", "q5b", "q5c", "q5d", "q5e", "q5f", "q5g", "q5h", "q5i", "q5j_text", "q5j_freq",
        "q6", "q7", "q8", "q9"
    ],
    axis=1
)
example_response_data.head(1)

Unnamed: 0,timestamp,participant_ID,participant_gender,participant_age,participant_occupation,q1,q2,q3,q4,q5a,...,q5f,q5g,q5h,q5i,q5j,q5_freq,q6,q7,q8,q9
0,10/19/2023 16:21:11,22-T-09-JKT,Female,20,Undergraduate Student,1:00:00 AM,20,9:00:00 AM,6,Once or twice a week,...,Not during the past month,Not during the past month,Less than once a week,Once or twice a week,,,Fairly good,Not during the past month,Less than once a week,Only a very slight problem


#### Component Score Computation

In [9]:
def get_component_1(response_df, categorical_map):
    return response_df["q6"].replace(categorical_map["q6"])

In [10]:
def get_component_2(response_df, categorical_map):
    def process_q2(num):
        assert num >= 0, "q2 response must be nonnegative"
        if num <= 30:
            return num // 15
        else:
            return min(2, num // 30) + 1
    q5a_numerical = response_df["q5a"].replace(categorical_map["q5"])
    sum_of_parts = q5a_numerical + response_df["q2"].apply(process_q2)
    return sum_of_parts.apply(lambda x: min(3, (x + 1) // 2))

In [11]:
def get_component_3(response_df, categorical_map):
    def process_q3(num):
        if num > 7: return 0
        elif 6 <= num <= 7: return 1
        elif 5 <= num < 6: return 2
        else: return 3
    return response_df["q3"].apply(process_q3)

In [21]:
def get_component_4(response_df, categorical_map):
    def intify_time_str(time_str):
        split_time_str = time_str[:time_str.index(" ")].split(":")
        return int(split_time_str[0]) * 60 ** 2 + int(split_time_str[1]) * 60 + split_time_str[2]
        
    def get_time_difference(period_start, period_end):
        time_start, time_end = intify_time_str(period_start), intify_time_str(period_end)
        if time_end < time_start:
            return time_end + 24 * 60 ** 2 - time_start
        else:
            return time_start - time_end
    
    def efficiency_to_score(eff):
        if eff > 0.85: return 0
        elif 0.75 <= eff <= 0.85: return 1
        elif 0.65 <= eff < 0.75: return 2
        else: return 3
    
    time_slept = response_df["q4"].apply(intify_time_str)
    time_on_bed = pd.Series(
        [
            get_time_difference(interval_start, interval_end)
            for interval_start, interval_end in zip(response_df["q1"], response_df["q3"])
        ]
    )
    habitual_sleep_efficiency = time_slept / time_on_bed
    return habitual_sleep_efficiency.apply(efficiency_to_score)

In [22]:
def get_component_5(response_df, categorical_map):
    def sum_component_score_map(score):
        return min(3, (score + 1) // 9)
        
    numerical_responses_q5 = [
        response_df[q5_subpart].replace(categorical_map["q5"])
        for q5_subpart in ["q5b", "q5c", "q5d", "q5e", "q5f", "q5g", "q5h", "q5i", "q5j_freq"]
    ]
    sum_of_q5_subparts = np.sum(numerical_responses_q5)
    return sum_of_q5_subparts.apply(sum_component_score_map)

In [23]:
def get_component_6(response_df, categorical_map):
    return response_df["q7"].replace(categorical_map["q7"])

In [24]:
def get_component_7(response_df, categorical_map):
    q8_numerical = response_df["q8"].replace(categorical_map["q8"])
    q9_numerical = response_df["q8"].replace(categorical_map["q9"])
    sum_of_parts = q8_numerical + q9_numerical
    return sum_of_parts.apply(lambda x: min(3, (x + 1) // 2))

#### Data Cleaning