In [10]:
import pandas as pd
cols = pd.read_csv("var_names.csv")
data = pd.read_csv('CSCS_data_anon.csv', low_memory=False,
                        na_values=["9999", "", " ", "Presented but no response", "NA"])
empty = (data.isna().sum()==data.shape[0])
data = data[empty.index[~empty]] # keep non empty columns only
data = data[data.REMOVE_case=='No'].copy()


In [None]:
def return_matching_keywords(keywords: list[str]) -> list[str]:
    """
    Returns the names of the columns whos string contains any of the keywords
    """
    global data
    return [col for col in data.columns if any(keyword.lower() in col.lower() for keyword in keywords)]


In [42]:
def get_var_details(var_name: str) -> list[tuple[str, str]]:
    """
    Returns a list of pairs detailing the question and datasets 
    corresponding to the given variable name
    """
    global cols
    filtered_data = cols[cols['new_var'] == var_name]
    
    info = []
    result: pd.DataFrame = filtered_data[['new_var', 'text', 'dataset']]
    for index, row in result.iterrows():
        info.append((row['text'], row['dataset']))

    return info



In [None]:
def get_column_info(column_name: str) -> None:
    """
    Returns information about a given column
    """
    global data, cols
    column_info: pd.DataFrame = data[column_name]
    print(f"{' Information about: ' + column_name + ' ':-^70}")
    details = get_var_details(column_name)
    if not details: print(f"{' No associated question found ':-^70}")
    for question, dataset in details:
        print(f"{' Dataset: ' + dataset + ' ':-^70}")
        print(f"{' Question: ' + question + ' ':-^70}")
        print(f"{'':-^70}\n")
    print(f"Number of NA: {column_info.isna().sum()}")
    print(column_info.value_counts())
    print("")

In [57]:
get_column_info("CONNECTION_social_barriers_too_busy")

------- Information about: CONNECTION_social_barriers_too_busy -------
------------------------ Dataset: 2022_cohort ------------------------
 Question: Which of the barriers listed below prevent you from socializing with others as much as you would like to? - I am too busy 
----------------------------------------------------------------------

------------------------ Dataset: 2022_cross -------------------------
 Question: Which of the barriers below make it difficult for you to socialize with others as much as you would like to? (Check all that apply) - Selected Choice - I am too busy  
----------------------------------------------------------------------

Amount of NA: 9075
CONNECTION_social_barriers_too_busy
Not Selected     756
I am too busy    187
Name: count, dtype: int64



In [15]:
data

Unnamed: 0,UNIQUE_id,UNIQUE_num_records,ELIGIBLE_consent,GEO_residence_canada,GEO_province,DEMO_age,DEMO_gender,DEMO_identity_vetrans,DEMO_identity_indigenous,DEMO_identity_lgbtq,...,PSYCH_body_self_image_questionnaire_height_dissatisfaction_score,PSYCH_body_self_image_questionnaire_fatness_evaluation_score,PSYCH_body_self_image_questionnaire_negative_affect_score,PSYCH_body_self_image_questionnaire_social_dependence_score,PSYCH_big_five_inventory_agreeable_score,PSYCH_big_five_inventory_conscientious_score,PSYCH_big_five_inventory_extraverted_score,PSYCH_big_five_inventory_neurotic_score,PSYCH_big_five_inventory_open_score,REMOVE_case
0,cscs_00001,1,Yes,Yes,British Columbia,71.0,Non-binary,,,"Sexual or gender minorities (e.g., LGBTQ2+)",...,,,,,,,,,,No
1,cscs_00002,1,Yes,Yes,Ontario,69.0,Woman,,,Not Selected,...,3.0,8.0,3.0,3.0,,,,,,No
2,cscs_00003,1,Yes,Yes,Quebec,56.0,Woman,,,Not Selected,...,,,,,,,,,,No
3,cscs_00005,1,Yes,Yes,,54.0,Woman,,,Not Selected,...,,,,,28.0,34.0,30.0,32.0,37.0,No
4,cscs_00006,1,Yes,Yes,Ontario,30.0,Man,Not Selected,"Indigenous peoples (e.g., First Nations, Métis...","Sexual or gender minorities (e.g., LGBTQ2+)",...,,,,,,,,,,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11425,cscs_11808,1,Yes,Yes,,45.0,Woman,,,Not Selected,...,,,,,26.0,23.0,18.0,31.0,37.0,No
11426,cscs_11809,1,Yes,Yes,,45.0,Woman,,,"Sexual or gender minorities (e.g., LGBTQ2+)",...,,,,,31.0,33.0,33.0,13.0,39.0,No
11427,cscs_11810,1,Yes,Yes,British Columbia,36.0,Man,,,Not Selected,...,,,,,32.0,37.0,31.0,,38.0,No
11428,cscs_11812,3,Yes,,,,,,,,...,,,,,,,,,,No
