# Caseness cohort breakdown

The purpose of this notebook is to provide a breakdown of the count of patients that were identified as demonstrating the caseness of complex mental health difficulties.

## Imports

In [17]:
%run 'UNSEEN_helper_functions.ipynb'
%store -r

## Load requisites

In [18]:
# Set index date. Usually CURRENT_DATE() but Dec 2021 will be used until cB fixed the missing prescriptions.
myIndexDate =  '2021-12-31'

# Set the duration for which a person must have been registered with their current general practice, in years.
min_GP_registeration_duration = 1

# Set the capture window for criteria diagnoses and prescriptions, in years.
Dx_window = 10
Rx_window = 10
Rx_window_caseness = 10 # Must be <= Rx_window because Rx_window is applied first.

# Set parameters for disclosivity adjustments.
redaction_threshold = 7
target_round = 10

# Set the database attributes.
global server_id
server_id = 'yhcr-prd-phm-bia-core'
global database_id
database_id = 'CB_FDM_PrimaryCare_V7'

# Set folder location.
folder_loc = os.path.dirname(os.path.abspath("UNSEEN create caseness array.ipynb"))
folder = folder_loc + '/codelists/'

%store server_id database_id myIndexDate redaction_threshold target_round

Stored 'server_id' (str)
Stored 'database_id' (str)
Stored 'myIndexDate' (str)
Stored 'redaction_threshold' (int)
Stored 'target_round' (int)


## Load codelist CSV files.
We used opencodelist.org to define codelists that define the set of SNOMED-CT codes used to identify patients based on various attributes.

In [19]:
# Clinical codes of interest.
codes_to_query_mentalIllHealth = pandas.read_csv(folder + "mental_ill_health_codelist.txt", sep = '\t')
codes_to_query_bipolar = pandas.read_csv(folder + "ciaranmci-bipolar-disorder-6a0308d7.csv")
codes_to_query_schizophrenia = pandas.read_csv(folder + "ciaranmci-schizophrenia-05c53c03.csv")
codes_to_query_mentalIllHealth = pandas.DataFrame(
    list(
        set(codes_to_query_mentalIllHealth["Id"]).difference(
            set(codes_to_query_bipolar["code"]).union(
                set(codes_to_query_schizophrenia["code"])
            )
        )
    )
    ,columns = ["Id"]
)

#codes_to_query_caseness = pandas.read_csv(folder + "ciaranmci-unseen-snomed-codes-to-identify-cmhd-5ac8d4fa.csv")

codes_to_query_borderline = pandas.read_csv(folder + "ciaranmci-borderline-personality-disorder-1ed4af38.csv")
codes_to_query_chronicDepression = pandas.read_csv(folder + "ciaranmci-chronic-depression-53a65598.csv")
codes_to_query_chronicPTSD = pandas.read_csv(folder + "ciaranmci-chronic-post-traumatic-stress-disorder-3a96e263.csv")
codes_to_query_complexPTSD = pandas.read_csv(folder + "ciaranmci-complex-post-traumatic-stress-disorder-21876f2e.csv")
codes_to_query_devAcademicDisorder = pandas.read_csv(folder + "ciaranmci-developmental-academic-disorder-50f395a2.csv")
codes_to_query_dysthymia = pandas.read_csv(folder + "ciaranmci-dysthymia-6f6888c3.csv")
codes_to_query_personalityDisorder = pandas.read_csv(folder + "ciaranmci-personality-disorder-243a2f24.csv")

# Medications of interest.
medications_to_query_psychosisAndRelated = pandas.read_csv(folder + "UNSEEN_medications_psychosisAndRelated.csv")
medications_to_query_hypnoticsAndAnxiolytics = pandas.read_csv(folder + "UNSEEN_medications_hypnoticsAndAnxiolytics.csv")
medications_to_query_antidepressants = pandas.read_csv(folder + "UNSEEN_medications_antidepressants.csv")
#medications_to_query_all = pandas.read_csv(folder + "UNSEEN_medications_list.csv")

The script below is an edited version of the main script in `UNSESSN_create_caseness_variables.ipynb`. The main edit is that the `tbl_persons_with_caseness_codes` CTE is replaced by similar CTEs for each of the component diagnoses. I also replace `tbl_persons_with_medications` with similar CTEs for each of the component medications.

The list of component diagnoses are:
1. Borderline personality disorder
2. Chronic depression
3. Chronic posttraumatic stress disorder
4. Complex posttraumatic stress disorder
5. Developmental academic disorder
6. Dysthymia
7. Personality disorder

The list of component medications are:
1. Medications associated with psychois and related disorders
2. Hypnotics and anxiolytics
3. Antidepressants

## Additional subqueries.

In [20]:
sql_caseness_components_codelist_CTEs = \
"""
,tbl_codes_borderlinePD AS (
    SELECT
        my_snomedcode
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, codes_to_query_borderline["code"].tolist())) + """'
                ]) AS my_snomedcode
)
,tbl_codes_chronicDepression AS (
    SELECT
        my_snomedcode
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, codes_to_query_chronicDepression["code"].tolist())) + """'
                ]) AS my_snomedcode
)
,tbl_codes_chronicPTSD AS (
    SELECT
        my_snomedcode
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, codes_to_query_chronicPTSD["code"].tolist())) + """'
                ]) AS my_snomedcode
)
,tbl_codes_complexPTSD AS (
    SELECT
        my_snomedcode
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, codes_to_query_complexPTSD["code"].tolist())) + """'
                ]) AS my_snomedcode
)
,tbl_codes_devAcademicDisorder AS (
    SELECT
        my_snomedcode
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, codes_to_query_devAcademicDisorder["code"].tolist())) + """'
                ]) AS my_snomedcode
)
,tbl_codes_dysthymia AS (
    SELECT
        my_snomedcode
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, codes_to_query_dysthymia["code"].tolist())) + """'
                ]) AS my_snomedcode
)
,tbl_codes_personalityDisorder AS (
    SELECT
        my_snomedcode
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, codes_to_query_personalityDisorder["code"].tolist())) + """'
                ]) AS my_snomedcode
)


,tbl_medications_psychosisAndRelated AS (
    SELECT
        my_nameofmedication
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, medications_to_query_psychosisAndRelated["Medication"].tolist())) + """'
                ]) AS my_nameofmedication
)
,tbl_medications_hypnoticsAndAnxiolytics AS (
    SELECT
        my_nameofmedication
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, medications_to_query_hypnoticsAndAnxiolytics["Medication"].tolist())) + """'
                ]) AS my_nameofmedication
)
,tbl_medications_antidepressants AS (
    SELECT
        my_nameofmedication
    FROM
        UNNEST([
                '""" + '\', \''.join(map(str, medications_to_query_antidepressants["Medication"].tolist())) + """'
                ]) AS my_nameofmedication
)
"""
sql_caseness_components_CTEs = \
"""
,tbl_persons_with_borderlinePD_codes AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS borderlinePD
    FROM
        tbl_persons_firstFilters
    # This join gets the diagnostic SNOMED-CT codes, and filters for 
    # the patients for which we have diagnostic codes because it is an
    # INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srcode
        ON tbl_persons_firstFilters.person_id = tbl_srcode.person_id
    # This join is filtering for patients with the diagnostic SNOMED-CT codes
    # of interest by using an INNER JOIN, which acts like an intersection in
    # set operations.
    JOIN 
        tbl_codes_borderlinePD
        ON tbl_srcode.snomedcode = tbl_codes_borderlinePD.my_snomedcode
    WHERE
        # This filters for diagnoses prior to the index date.
        tbl_srcode.dateevent < myIndexDate
)
,tbl_persons_with_chronicDepression_codes AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS chronicDepression
    FROM
        tbl_persons_firstFilters
    # This join gets the diagnostic SNOMED-CT codes, and filters for 
    # the patients for which we have diagnostic codes because it is an
    # INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srcode
        ON tbl_persons_firstFilters.person_id = tbl_srcode.person_id
    # This join is filtering for patients with the diagnostic SNOMED-CT codes
    # of interest by using an INNER JOIN, which acts like an intersection in
    # set operations.
    JOIN 
        tbl_codes_chronicDepression
        ON tbl_srcode.snomedcode = tbl_codes_chronicDepression.my_snomedcode
    WHERE
        # This filters for diagnoses prior to the index date.
        tbl_srcode.dateevent < myIndexDate
)
,tbl_persons_with_chronicPTSD_codes AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS chronicPTSD
    FROM
        tbl_persons_firstFilters
    # This join gets the diagnostic SNOMED-CT codes, and filters for 
    # the patients for which we have diagnostic codes because it is an
    # INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srcode
        ON tbl_persons_firstFilters.person_id = tbl_srcode.person_id
    # This join is filtering for patients with the diagnostic SNOMED-CT codes
    # of interest by using an INNER JOIN, which acts like an intersection in
    # set operations.
    JOIN 
        tbl_codes_chronicPTSD
        ON tbl_srcode.snomedcode = tbl_codes_chronicPTSD.my_snomedcode
    WHERE
        # This filters for diagnoses prior to the index date.
        tbl_srcode.dateevent < myIndexDate
)
,tbl_persons_with_complexPTSD_codes AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS complexPTSD
    FROM
        tbl_persons_firstFilters
    # This join gets the diagnostic SNOMED-CT codes, and filters for 
    # the patients for which we have diagnostic codes because it is an
    # INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srcode
        ON tbl_persons_firstFilters.person_id = tbl_srcode.person_id
    # This join is filtering for patients with the diagnostic SNOMED-CT codes
    # of interest by using an INNER JOIN, which acts like an intersection in
    # set operations.
    JOIN 
        tbl_codes_complexPTSD
        ON tbl_srcode.snomedcode = tbl_codes_complexPTSD.my_snomedcode
    WHERE
        # This filters for diagnoses prior to the index date.
        tbl_srcode.dateevent < myIndexDate
)
,tbl_persons_with_devAcademicDisorder_codes AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS devAcademicDisorder
    FROM
        tbl_persons_firstFilters
    # This join gets the diagnostic SNOMED-CT codes, and filters for 
    # the patients for which we have diagnostic codes because it is an
    # INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srcode
        ON tbl_persons_firstFilters.person_id = tbl_srcode.person_id
    # This join is filtering for patients with the diagnostic SNOMED-CT codes
    # of interest by using an INNER JOIN, which acts like an intersection in
    # set operations.
    JOIN 
        tbl_codes_devAcademicDisorder
        ON tbl_srcode.snomedcode = tbl_codes_devAcademicDisorder.my_snomedcode
    WHERE
        # This filters for diagnoses prior to the index date.
        tbl_srcode.dateevent < myIndexDate
)
,tbl_persons_with_dysthymia_codes AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS dysthymia
    FROM
        tbl_persons_firstFilters
    # This join gets the diagnostic SNOMED-CT codes, and filters for 
    # the patients for which we have diagnostic codes because it is an
    # INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srcode
        ON tbl_persons_firstFilters.person_id = tbl_srcode.person_id
    # This join is filtering for patients with the diagnostic SNOMED-CT codes
    # of interest by using an INNER JOIN, which acts like an intersection in
    # set operations.
    JOIN 
        tbl_codes_dysthymia
        ON tbl_srcode.snomedcode = tbl_codes_dysthymia.my_snomedcode
    WHERE
        # This filters for diagnoses prior to the index date.
        tbl_srcode.dateevent < myIndexDate
)
,tbl_persons_with_personalityDisorder_codes AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS personalityDisorder
    FROM
        tbl_persons_firstFilters
    # This join gets the diagnostic SNOMED-CT codes, and filters for 
    # the patients for which we have diagnostic codes because it is an
    # INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srcode
        ON tbl_persons_firstFilters.person_id = tbl_srcode.person_id
    # This join is filtering for patients with the diagnostic SNOMED-CT codes
    # of interest by using an INNER JOIN, which acts like an intersection in
    # set operations.
    JOIN 
        tbl_codes_personalityDisorder
        ON tbl_srcode.snomedcode = tbl_codes_personalityDisorder.my_snomedcode
    WHERE
        # This filters for diagnoses prior to the index date.
        tbl_srcode.dateevent < myIndexDate
)


,tbl_persons_with_psychosisAndRelated_meds AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS psychosisAndRelated
    FROM
        tbl_persons_firstFilters
    # This join is adding the medication table so that I can query medications.
    # It also, effectively, removes any patients without a prescription because
    # it is an INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srprimarycaremedication
        ON tbl_persons_firstFilters.person_id = tbl_srprimarycaremedication.person_id
    # This cross join conveniently creates all possible combinations of values of the
    # previous join result and `tbl_medications`. This sets up my interim result to 
    # easily do a row-wise comparison of the medications of interest with the variously-
    # worded `nameofmedication` values in the database.
    CROSS JOIN
        tbl_medications_psychosisAndRelated
    WHERE
        # This filters for the medications of interest.
        REGEXP_CONTAINS(nameofmedication, tbl_medications_psychosisAndRelated.my_nameofmedication) = True
        AND
        DATE_DIFF(myIndexDate, CAST(tbl_srprimarycaremedication.datemedicationstart AS DATE), YEAR) BETWEEN 0 AND """ + str(Rx_window) + """
)
,tbl_persons_with_hypnoticsAndAnxiolytics_meds AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS hypnoticsAndAnxiolytics
    FROM
        tbl_persons_firstFilters
    # This join is adding the medication table so that I can query medications.
    # It also, effectively, removes any patients without a prescription because
    # it is an INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srprimarycaremedication
        ON tbl_persons_firstFilters.person_id = tbl_srprimarycaremedication.person_id
    # This cross join conveniently creates all possible combinations of values of the
    # previous join result and `tbl_medications`. This sets up my interim result to 
    # easily do a row-wise comparison of the medications of interest with the variously-
    # worded `nameofmedication` values in the database.
    CROSS JOIN
        tbl_medications_hypnoticsAndAnxiolytics
    WHERE
        # This filters for the medications of interest.
        REGEXP_CONTAINS(nameofmedication, tbl_medications_hypnoticsAndAnxiolytics.my_nameofmedication) = True
        AND
        DATE_DIFF(myIndexDate, CAST(tbl_srprimarycaremedication.datemedicationstart AS DATE), YEAR) BETWEEN 0 AND """ + str(Rx_window) + """
)
,tbl_persons_with_antidepressants_meds AS (
    SELECT
        DISTINCT tbl_persons_firstFilters.person_id
        ,1 AS antidepressants
    FROM
        tbl_persons_firstFilters
    # This join is adding the medication table so that I can query medications.
    # It also, effectively, removes any patients without a prescription because
    # it is an INNER JOIN.
    JOIN
        """ + server_id + """.""" + database_id + """.tbl_srprimarycaremedication
        ON tbl_persons_firstFilters.person_id = tbl_srprimarycaremedication.person_id
    # This cross join conveniently creates all possible combinations of values of the
    # previous join result and `tbl_medications`. This sets up my interim result to 
    # easily do a row-wise comparison of the medications of interest with the variously-
    # worded `nameofmedication` values in the database.
    CROSS JOIN
        tbl_medications_antidepressants
    WHERE
        # This filters for the medications of interest.
        REGEXP_CONTAINS(nameofmedication, tbl_medications_antidepressants.my_nameofmedication) = True
        AND
        DATE_DIFF(myIndexDate, CAST(tbl_srprimarycaremedication.datemedicationstart AS DATE), YEAR) BETWEEN 0 AND """ + str(Rx_window_caseness) + """
)


,tbl_studyPopulation_casenessBreakdown AS (
    SELECT
        DISTINCT tbl_studyPopulation_no_caseness.person_id
        ,CASE WHEN borderlinePD IS NULL THEN 0 ELSE 1 END AS borderlinePD
        ,CASE WHEN chronicDepression IS NULL THEN 0 ELSE 1 END AS chronicDepression
        ,CASE WHEN chronicPTSD IS NULL THEN 0 ELSE 1 END AS chronicPTSD
        ,CASE WHEN complexPTSD IS NULL THEN 0 ELSE 1 END AS complexPTSD
        ,CASE WHEN devAcademicDisorder IS NULL THEN 0 ELSE 1 END AS devAcademicDisorder
        ,CASE WHEN dysthymia IS NULL THEN 0 ELSE 1 END AS dysthymia
        ,CASE WHEN personalityDisorder IS NULL THEN 0 ELSE 1 END AS personalityDisorder
        
        ,CASE WHEN psychosisAndRelated IS NULL THEN 0 ELSE 1 END AS psychosisAndRelated
        ,CASE WHEN hypnoticsAndAnxiolytics IS NULL THEN 0 ELSE 1 END AS hypnoticsAndAnxiolytics
        ,CASE WHEN antidepressants IS NULL THEN 0 ELSE 1 END AS antidepressants
    FROM
        tbl_studyPopulation_no_caseness
    LEFT JOIN tbl_persons_with_borderlinePD_codes ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_borderlinePD_codes.person_id
    LEFT JOIN tbl_persons_with_chronicDepression_codes ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_chronicDepression_codes.person_id
    LEFT JOIN tbl_persons_with_chronicPTSD_codes ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_chronicPTSD_codes.person_id
    LEFT JOIN tbl_persons_with_complexPTSD_codes ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_complexPTSD_codes.person_id
    LEFT JOIN tbl_persons_with_devAcademicDisorder_codes ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_devAcademicDisorder_codes.person_id
    LEFT JOIN tbl_persons_with_dysthymia_codes ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_dysthymia_codes.person_id
    LEFT JOIN tbl_persons_with_personalityDisorder_codes ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_personalityDisorder_codes.person_id
    
    LEFT JOIN tbl_persons_with_psychosisAndRelated_meds ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_psychosisAndRelated_meds.person_id
    LEFT JOIN tbl_persons_with_hypnoticsAndAnxiolytics_meds ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_hypnoticsAndAnxiolytics_meds.person_id
    LEFT JOIN tbl_persons_with_antidepressants_meds ON tbl_studyPopulation_no_caseness.person_id = tbl_persons_with_antidepressants_meds.person_id
)
"""

## Final select.

In [21]:
sql_final_select =\
"""
SELECT * FROM tbl_studyPopulation_casenessBreakdown ORDER BY person_id
"""
caseness_breakdown_array = pandas.read_gbq(sql_declarations + sql_studyPopulation + sql_caseness_components_codelist_CTEs + sql_caseness_components_CTEs + sql_final_select)

## Calculate prevalence of components.

In [53]:
counts_components = round(caseness_breakdown_array.iloc[:,1:].sum() / target_round) * target_round
#[counts_components / count_studyPopulation]
proportion = round((counts_components / count_studyPopulation) * 100, 2 )
prevalence_per_thousand = round((counts_components / count_studyPopulation) * 1000, 2 )
display( pandas.DataFrame(data = {'proportion' : proportion, 'prevalence_per_thousand' : prevalence_per_thousand} ) )

Unnamed: 0,proportion,prevalence_per_thousand
borderlinePD,0.24,2.36
chronicDepression,0.57,5.67
chronicPTSD,0.06,0.58
complexPTSD,0.0,0.0
devAcademicDisorder,0.86,8.61
dysthymia,0.25,2.5
personalityDisorder,1.75,17.55
psychosisAndRelated,4.68,46.82
hypnoticsAndAnxiolytics,26.78,267.78
antidepressants,62.75,627.47
