In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import duckdb
import os

from dotenv import load_dotenv
from pathlib import Path
from duckdb.typing import *

from utilities.utils import get_flat_table_rows


%load_ext autoreload
%autoreload 2

# Remote connection

In [2]:
# Build paths inside the project like this: BASE_DIR / 'subdir'.
# use this only in development
print("loading env variables...")
env_dir = Path('./').resolve()
load_dotenv(os.path.join(env_dir, '.env'))
print("env variables loaded.\n")

loading env variables...
env variables loaded.



In [3]:
# jdbc:duckdb:md:chronic_disease_analyses_db
# duckdb:///md:chronic_disease_analyses_db
print("connecting to duckdb...")
conn = duckdb.connect(f"md:chronic_disease_analyses_db?motherduck_token={os.environ['MOTHERDUCK_TOKEN']}")
print("connected to duckdb.\n")

connecting to duckdb...
connected to duckdb.



In [4]:
tables = get_flat_table_rows(conn.sql("""SHOW TABLES""").fetchall())
tables

['CDI',
 'CDILocation',
 'CDIStratification',
 'DataValueType',
 'Population',
 'PopulationState',
 'PopulationStratification',
 'Question',
 'Stratification',
 'Topic']

In [5]:
for table in tables:
    count = conn.sql(f"""SELECT COUNT(*) FROM {table}""").fetchall()[0][0]
    print(f"table {table} count: {count}")

table CDI count: 678471
table CDILocation count: 51
table CDIStratification count: 11
table DataValueType count: 15
table Population count: 2947392
table PopulationState count: 51
table PopulationStratification count: 28
table Question count: 192
table Stratification count: 39
table Topic count: 17


In [6]:
conn.sql("""
    SELECT * FROM Question
""")

┌────────────┬─────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────┬────────┐
│ QuestionID │ TopicID │                                                   Question                                                   │ AgeStart │ AgeEnd │
│  varchar   │ varchar │                                                   varchar                                                    │  double  │ double │
├────────────┼─────────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────┼──────────┼────────┤
│ ART1_1     │ ART     │ Arthritis among adults aged >= 18 years                                                                      │     18.0 │    inf │
│ ALC4_0     │ ALC     │ Binge drinking intensity among adults aged >= 18 years who binge drink                                       │     18.0 │    inf │
│ NPAW2_3    │ NPAW    │ Overweight or obesity among women aged 

# Unique topics

In [7]:
topic_id = get_flat_table_rows(conn.sql("""
    SELECT DISTINCT(Question.TopicID, Topic)
    FROM Question
    JOIN Topic
    ON Question.TopicID = Topic.TopicID
""").fetchall())
topic_id

[('TOB', 'Tobacco'),
 ('CAN', 'Cancer'),
 ('CKD', 'Chronic Kidney Disease'),
 ('MTH', 'Mental Health'),
 ('ALC', 'Alcohol'),
 ('IMM', 'Immunization'),
 ('RPH', 'Reproductive Health'),
 ('ART', 'Arthritis'),
 ('OLD', 'Older Adults'),
 ('COPD', 'Chronic Obstructive Pulmonary Disease'),
 ('NPAW', 'Nutrition, Physical Activity, and Weight Status'),
 ('DIA', 'Diabetes'),
 ('ORH', 'Oral Health'),
 ('AST', 'Asthma'),
 ('CVD', 'Cardiovascular Disease'),
 ('DIS', 'Disability'),
 ('OVC', 'Overarching Conditions')]

In [8]:
dvt = get_flat_table_rows(conn.sql("""
    SELECT DISTINCT(DataValueType, DataValueTypeID)
    FROM DataValueType
""").fetchall())
dvt

[('US Dollars', 'USD'),
 ('Mean', 'MEAN'),
 ('Crude Rate', 'CRDRATE'),
 ('Adjusted by age, sex, race and ethnicity', 'AGESEXRACEADJRATE'),
 ('Per capita alcohol consumption', 'PERCAPALC'),
 ('Age-adjusted Mean', 'AGEADJMEAN'),
 ('Percent', 'PRCT'),
 ('Median', 'MEDIAN'),
 ('Crude Prevalence', 'CRDPREV'),
 ('Number', 'NMBR'),
 ('Prevalence', 'PREV'),
 ('Average Annual Crude Rate', 'AVGANNCRDRATE'),
 ('Age-adjusted Prevalence', 'AGEADJPREV'),
 ('Average Annual Age-adjusted Rate', 'AVGANNAGEADJRATE'),
 ('Age-adjusted Rate', 'AGEADJRATE')]

# unique questions under each unique topic

In [9]:
for id, topic in topic_id:
    topic_id_questions = get_flat_table_rows(conn.sql(f"""
        SELECT Question
        FROM Question
        WHERE TopicID = '{id}'
    """).fetchall())
    print(f"topic {topic}, questions: {topic_id_questions}\n")

topic Tobacco, questions: ['Current cigarette smoking among youth', 'Current cigarette smoking among women aged 18-44 years', 'Current smoking among adults aged >= 18 years', 'Pneumococcal vaccination among noninstitutionalized adults aged >= 65 years who smoke', 'Current smokeless tobacco use among adults aged >= 18 years', 'Current smokeless tobacco use among youth', 'Pneumococcal vaccination among noninstitutionalized adults aged 18-64 years who smoke', 'Proportion of the population protected by a comprehensive smoke-free policy prohibiting smoking in all indoor areas of workplaces and public places, including restaurants and bars', 'Quit attempts in the past year among current smokers', 'Secondary schools that have a comprehensive tobacco-free school policy in place', 'Sale of cigarette packs', 'Cigarette smoking before pregnancy', 'Percent tobacco revenue to fund at CDC recommended level']

topic Cancer, questions: ['Papanicolaou smear use among adult women aged 21-65 years', 'Rec

# Unique combinations of datavaluetype and datavalueunit

In [10]:
dvt_dvu_pairs = conn.sql(f"""
    SELECT max(cdi.DataValueUnit), MAX(dvt.DataValueType)
    FROM CDI cdi
    LEFT JOIN DataValueType dvt
    ON cdi.DataValueTypeID = dvt.DataValueTypeID
    GROUP BY cdi.DataValueTypeID, cdi.DataValueUnit                                   
""")

In [11]:
sorted(dvt_dvu_pairs.fetchall(), key=lambda value: value[0])

[('$', 'US Dollars'),
 ('%', 'Prevalence'),
 ('%', 'Age-adjusted Prevalence'),
 ('%', 'Crude Prevalence'),
 ('%', 'Percent'),
 ('Number', 'Median'),
 ('Number', 'Age-adjusted Mean'),
 ('Number', 'Number'),
 ('Number', 'Mean'),
 ('Years', 'Number'),
 ('cases per 1,000', 'Crude Rate'),
 ('cases per 1,000', 'Age-adjusted Rate'),
 ('cases per 1,000,000', 'Number'),
 ('cases per 1,000,000', 'Adjusted by age, sex, race and ethnicity'),
 ('cases per 1,000,000', 'Crude Rate'),
 ('cases per 1,000,000', 'Age-adjusted Rate'),
 ('cases per 10,000', 'Crude Rate'),
 ('cases per 10,000', 'Age-adjusted Rate'),
 ('cases per 100,000', 'Crude Rate'),
 ('cases per 100,000', 'Average Annual Age-adjusted Rate'),
 ('cases per 100,000', 'Number'),
 ('cases per 100,000', 'Average Annual Crude Rate'),
 ('cases per 100,000', 'Age-adjusted Rate'),
 ('gallons', 'Per capita alcohol consumption'),
 ('pack sales per capita', 'Number')]

# What we have to do here now is to identify which questions are more likely to make use of the population values from the Population table we've just calculated and from there 

## Alcohol use among youth

![intial powerbi analyses (1).png](./figures%20&%20images/intial%20powerbi%20analyses%20(1).png)

it seems that `alcohol use among youth` question is something that can't make use of the `Population` table at first glance as what is measured here is alcohol use. But if we look closely the data value can't exactly say for sure whether alcohol amount is used e.g. 3.6% isn't exactly a measurement of alcohol amount but more likely a percentage of population namely the youth using alcohol

![intial powerbi analyses (2).png](./figures%20&%20images/intial%20powerbi%20analyses%20(2).png)

We also have other information like the datavaluetype used for the question. This is basically how PowerBI works when we put a slicer in our workspace we essentially get the unique values a unique value can take in, in this case a topic of `alcohol` has its unique questions like `alcohol use among youth` and then under it it has the unique `datavaluetype's` it uses like `crude prevalence`, and we know `crude prevalence` has exactly one corresponding `datavalueunit` which is the `%` symbol

because this question can make use of the `Population` table we can calculate the population of youth using alcohol. $\frac{datavalue}{100} \cdot youth\_population$

## amount of alcohol excise tax by beverage type (beer)

![](./figures%20&%20images/intial%20powerbi%20analyses%20(4).png)

in this case `amount of alcohol excise tax by beverage type (beer)` can't make use of the Population table as what is measured here is `amount of alcohol excise tax`

![](./figures%20&%20images/intial%20powerbi%20analyses%20(3).png)

calculation will be made here and the `datavalue` alone would just be used. 

## binge drinking frequency among adults aged >= 18 years who binge drink

![](./figures%20&%20images/intial%20powerbi%20analyses%20(5).png)

it seems that `binge drinking frequency among adults aged >= 18 years who binge drink` question is something that can't make use of the `Population` table as is only specific to the population of those 18+ **who binge drink**. To get a tangible number namely the drinking frequency of these people aged 18+ who binge drink, we need to get the prevalence of these people aged 18+ who indeed binge drink, which we know can be derived from other questions namely `binge drinking prevalence among adults aged >= 18 years` and then from there multiply it by the binge drinking frequency, since it is assumed that each person that binge drinks has this frequency.

![](./figures%20&%20images/intial%20powerbi%20analyses%20(6).png)

calculation would be $prevalence\_of\_binge\_drinkers\_aged\_18+ \cdot datavalue$ but since we don't have prevalence of binge drinkers aged 18+ until we further calculate it the alternative calculation would be $mean\_binge\_drinking\_frequency\_among\_adults\_aged\_18+\_who\_binge\_drink$, since its datavalueunit is just Number meaning this number as it is will be the representation of this indicator

## binge drinking prevalence among adults aged >=  18 years

![](./figures%20&%20images/intial%20powerbi%20analyses%20(7).png)

here `binge drinking prevalence among adults aged 18+` can make use of the `Population` table since the only demographic being focused on are adults aged 18+

![](./figures%20&%20images/intial%20powerbi%20analyses%20(8).png)

we can calculate the number of cases or the so called prevalence of these adults aged 18+ that have binge drinking by $ \frac{datavalue}{100} \cdot population\_of\_adults\_aged\_18+$

## chronic liver disease mortality

![](./figures%20&%20images/intial%20powerbi%20analyses%20(9).png)

There is not explicit age group listed in the question of `chronic liver disease mortality` so it is assumed that persons of all age groups will be in this pool, so we look to the stratification details of this data point. But because through secondary SQL transformation we have already created the table containing the `Population` table values based on age, sex, race, origin, state, and year. Again we can make use of the `Population` table here as we and calculate a tangible number pertaining not to the prevalence of a chronic disease indicator but the mortality rate of those having chronic disease.

Again like previous questions each data point has its stratification which we need to take into account and we had taken into account during the creation of the `Population` table, all we have to do now is do calculations to this specific `Population` value coupled with the given `datavalue`

![](./figures%20&%20images/intial%20powerbi%20analyses%20(10).png)

however we are looking now at the rate and we know for `datavaluetype`s of Age-Adjusted Rate and Crude Rate, these don't use `datavalueunit`s of % or percentages, where we can use our `datavalue` and divide it by mere 100. Since age-adjusted rate and crude rate vary in `datavalueunit`s namely cases per 1000, cases per 10000, cases per 100000, and cases per 1000000, we will have to conditionally change our denominator that our datavalue will divide to based on these `datavalueunit`s 

in this question the datavalueunit is not % but cases per 100000, so we should use 100000 as the denominator. Final calculation would be $\frac{datavalue}{100000} \cdot population\_value(considering age, state, year, sex, race, ethnicity)$ 

## Per capita alcohol consumption among persons aged >= 14 years 

![](./figures%20&%20images/intial%20powerbi%20analyses%20(11).png)

Again this wouldn't make sense for us to use the population value of those persons aged 14+ to calculate the number of cases of something as its not really the cases being measured here but the per capita alcohol consumption, but we can still make use of the `Population` table value for this data point in order to get the total amount of alcohol consumed by the whole population for this demographic of 14+ 

![](./figures%20&%20images/intial%20powerbi%20analyses%20(12).png)

To get the total number of "pack sales" for this population, you would multiply the per capita consumption by the total population: $Per Capita Alcohol Consumption (DataValue) * Total Population (>= 14 years)$

## We can actually do this in SQL instead of doing it in powerbi

* '$', 'US Dollars', 'USD': $datavalue$
* '%', 'Age-adjusted Prevalence', 'AGEADJPREV': $\frac{datavalue}{100} \cdot populationvalue$
* '%', 'Crude Prevalence', 'CRDPREV': $\frac{datavalue}{100} \cdot populationvalue$
* '%', 'Percent', 'PRCT': $\frac{datavalue}{100} \cdot populationvalue$
* '%', 'Prevalence', 'PREV': $\frac{datavalue}{100} \cdot populationvalue$
-----
* 'Number', 'Median', 'MEDIAN': $datavalue$
* 'Number', 'Age-adjusted Mean', 'AGEADJMEAN': $datavalue$ 
* 'Number', 'Mean', 'MEAN': $datavalue$
* 'Number', 'Number', 'NMBR': $datavalue$
* 'Years', 'Number', 'NMBR': $datavalue$
-----
* 'cases per 1,000', 'Crude Rate', 'CRDRATE': $\frac{datavalue}{1000} \cdot populationvalue$
* 'cases per 1,000', 'Age-adjusted Rate', 'AGEADJRATE': $\frac{datavalue}{1000} \cdot populationvalue$
-----
* 'cases per 1,000,000', 'Number', 'NMBR': 
* 'cases per 1,000,000', 'Age-adjusted Rate', 'AGEADJRATE': $\frac{datavalue}{1000000} \cdot populationvalue$
* 'cases per 1,000,000', 'Adjusted by age, sex, race and ethnicity', 'AGESEXRACEADJRATE': $\frac{datavalue}{1000000} \cdot populationvalue$
* 'cases per 1,000,000', 'Crude Rate', 'CRDRATE': $\frac{datavalue}{1000000} \cdot populationvalue$
-----
* 'cases per 10,000', 'Crude Rate', 'CRDRATE': $\frac{datavalue}{10000} \cdot populationvalue$
* 'cases per 10,000', 'Age-adjusted Rate', 'AGEADJRATE': $\frac{datavalue}{10000} \cdot populationvalue$
-----
* 'cases per 100,000', 'Average Annual Crude Rate', 'AVGANNCRDRATE': $\frac{datavalue}{100000} \cdot populationvalue$
* 'cases per 100,000', 'Crude Rate', 'CRDRATE': $\frac{datavalue}{100000} \cdot populationvalue$
* 'cases per 100,000', 'Age-adjusted Rate', 'AGEADJRATE': $\frac{datavalue}{100000} \cdot populationvalue$
* 'cases per 100,000', 'Average Annual Age-adjusted Rate', 'AVGANNAGEADJRATE: $\frac{datavalue}{100000} \cdot populationvalue$
* 'cases per 100,000', 'Number', 'NMBR':
-----
* 'gallons', 'Per capita alcohol consumption', 'PERCAPALC': 
* 'pack sales per capita', 'Number', 'NMBR': depends on question if what is measured can make use of the number of cases of people having the CDI

# Asking questions

## 1. What is the prevalence of alcohol use among youth (male and female) in listed year ranges?

![](./figures%20&%20images/initial%20analyses%20powerbi%20(2).png)

![](./figures%20&%20images/initial%20analyses%20powerbi.png)

what we want to do now is to filter the chronic disease table into the `alcohol use among youth` question, with the stratification across hispanic and non hispanic, female and male, and all races

we have this slicer here and it basically filters everything in the CDI table that has the alcohol use among youth question and so we want to calculate some sort of average of alcohol use among youth or any kind of calculation with the table resulting from the slicer filtering this question. Question is how can we use this calculated table visual to make aggregations to it and using some kind of DAX/SQL to queery from this table?

**DAX query** to create the column `TotalEvents` representing the tangible number of a chronic disease indicator:
```
TotalEvents = SWITCH(
    TRUE(),
    'chronic_disease_analyses_db   main   CDI'[DataValueTypeID] IN {"USD", "MEDIAN", "AGEADJMEAN", "NMBR", "MEAN"},
    'chronic_disease_analyses_db   main   CDI'[DataValue],

    'chronic_disease_analyses_db   main   CDI'[DataValueTypeID] IN {"AGEADJPREV", "CRDPREV", "PRCT", "PREV"} && 'chronic_disease_analyses_db   main   CDI'[DataValueUnit] = "%",
    ('chronic_disease_analyses_db   main   CDI'[DataValue] / 100) * RELATED('chronic_disease_analyses_db   main   CalculatedPopulation'[Population]),

    'chronic_disease_analyses_db   main   CDI'[DataValueTypeID] IN {"CRDRATE", "AGEADJRATE"} && 'chronic_disease_analyses_db   main   CDI'[DataValueUnit] = "cases per 1,000",
    ('chronic_disease_analyses_db   main   CDI'[DataValue] / 1000) * RELATED('chronic_disease_analyses_db   main   CalculatedPopulation'[Population]),

    'chronic_disease_analyses_db   main   CDI'[DataValueTypeID] IN {"CRDRATE", "AGEADJRATE"} && 'chronic_disease_analyses_db   main   CDI'[DataValueUnit] = "cases per 10,000",
    ('chronic_disease_analyses_db   main   CDI'[DataValue] / 10000) * RELATED('chronic_disease_analyses_db   main   CalculatedPopulation'[Population]),

    'chronic_disease_analyses_db   main   CDI'[DataValueTypeID] IN {"CRDRATE", "AGEADJRATE", "AVGANNCRDRATE", "AVGANNAGEADJRATE"} && 'chronic_disease_analyses_db   main   CDI'[DataValueUnit] = "cases per 100,000",
    ('chronic_disease_analyses_db   main   CDI'[DataValue] / 100000) * RELATED('chronic_disease_analyses_db   main   CalculatedPopulation'[Population]),

    'chronic_disease_analyses_db   main   CDI'[DataValueTypeID] IN {"CRDRATE", "AGEADJRATE", "AGESEXRACEADJRATE"} && 'chronic_disease_analyses_db   main   CDI'[DataValueUnit] = "cases per 1,000,000",
    ('chronic_disease_analyses_db   main   CDI'[DataValue] / 1000000) * RELATED('chronic_disease_analyses_db   main   CalculatedPopulation'[Population]),

    'chronic_disease_analyses_db   main   CDI'[DataValueTypeID] IN {"PERCAPALC"} && 'chronic_disease_analyses_db   main   CDI'[DataValueUnit] = "gallons",
    'chronic_disease_analyses_db   main   CDI'[DataValue] * RELATED('chronic_disease_analyses_db   main   CalculatedPopulation'[Population])
)
```

Now that we have the `TotalEvents` column we can use it in a visual that shows the top states in a specific year range that has the most youth population estimates using alcohol

![](./figures%20&%20images/initial%20analyses%20powerbi%20(4).png)

this answers our question above in multiple ways
- the states with the most estimated youth populations recorded in 2013 using alcohol were in Texas, New York, Florida, Illinois, and Ohio
- the states with the most estimated youth populations recorded in 2015 using alcohol were in California, Florida, New York, Illinois, and Pennsylvania
- the states with the most estimated youth populations recorded in 2017 using alcohol were in California, Texas, New York, Florida, and Pennsylvania
- the states with the most estimated youth populations recorded in 2019 using alcohol were in California, Texas, New York, Florida, and Illinois

## 2. what is average binge drinking prevalence among adults aged >= 18 from 2001 to 2021

![](./figures%20&%20images/initial%20analyses%20powerbi%20(10).png)

Our question of the prevalene of adults aged >= 18 year who binge drink is answered in multiple ways:
1. The 1st graph which is the `estimated population of adults aged >= 18 years who binge drink across all states and all years per gender and datavaluetype` combines population in all states and all years of those who binge drink by using their average. 
- **We see that the average age adjusted prevalence of males who binge drink is 554k its crude prevalence slightly lower 540k, and average age adjusted prevalence of females who binge drink is 327k its crude prevalence**
2. the 2nd graph which is the `estimated population of adults aged >= 18 years who binge drink across all years per gender and per state` combines population in all years and datavaluetypes who binge drink by using their average. 
- **We see that the top 5 highest populations who binge drink per state are from california, texas, florida, new york, and illinois (a trend relatively consistent to the prevalence of alcohol use in youth) with ranges from 2.05M to 5.23M**
- **top 5 lowest populations who binge drink per state are from delaware, north dakota, alaska, vermont, and wyoming with ranges from 80k to 140k** 
3. the 3rd graph is the `population percentages of male and female adults aged >= 18 years who binge drink across all years per state and per gender` which combines population in all years and datavaluetypes who binge drink by using their average
- it is relatively consistent that the difference between male populations and female populations who binge drink per state **is 30%**

As there are many years recorded for the `binge drinking prevalence among adults aged >= 18 years`, we can create a line graph instead that better visualizes these populations per year per state. These graphs we see **are the states with the highest prevalence of binge drinking**.

#### California
![](./figures%20&%20images/initial%20analyses%20powerbi%20(11).png)

#### Texas
![](./figures%20&%20images/initial%20analyses%20powerbi%20(13).png)

#### Florida
![](./figures%20&%20images/initial%20analyses%20powerbi%20(14).png)

#### New York
![](./figures%20&%20images/initial%20analyses%20powerbi%20(15).png)

#### Illinois
![](./figures%20&%20images/initial%20analyses%20powerbi%20(12).png)

4th graph shows shows the `average population size of the top 5 states associated the most with having high prevalence of individuals who binge drink across all years ranging from 2011 to 2021` which combines population in all datavaluetypes by using the average

Since the male population across all states and all years tend to have on average the highest prevalence of binge drinking the trends in each state convey the ff.
- state of *california* seems to have it's **male population have a seasonal trend of rising and falling prevalence of binge drinking males**. It's highest was 2011 then after 3 years it was at it's lowest and after 3 years in 2017 the prevalence shot up almost approximating the previous high in 2011 then came down again after 4 years in 2021
- **the trend of the female population of binge drinkers seem to be steady with little spikes**
- In the state of *texas* both male and female populations of binge drinkers **seem to have a gradual uptrend starting from 2011 up to 2021** but no seasonal pattern involved.
- in the state of *florida* there seems to be a seasonal patten involved like in california as **for both male and female populations of binge drinkers, the number starting from 2011 decreases after 1 to 2 years then inreases again after 1 to 2 years with a new and slightly higher population of binge drinkers in each gender as before**
- *Illinois* seems interesting as its pattern shows a downtrend starting from 2011 all the way to 2020 but rose in 2021 slightly. The **recorded number of binge drinkers of each gender was at an all time high in 2011 at 1.49M and 880K for male and female respectively and then came down at its lowest in 2020 at 860K and 530K respectively.**
- *New York* seems to have a down trend over the years as both male and female populations of binge drinkers **started out high in 2011 with a recorded number of 1.91m and 1.24m respectively and then came down in 2014 and 2012 respectively then started to rise again slightly thereafter and then had continually gradual decline**


![](./figures%20&%20images/initial%20analyses%20powerbi%20(17).png)

![](./figures%20&%20images/initial%20analyses%20powerbi%20(18).png)

![](./figures%20&%20images/initial%20analyses%20powebi%20(20).png)

![](./figures%20&%20images/initial%20analyses%20powerbi%20(15).png)

![](./figures%20&%20images/initial%20analyses%20powerbi%20(19).png)

![](./figures%20&%20images/initial%20analyses%20powerbi%20(16).png)

## 3. what is the combined drinking intensity and frequency among adults aged >= 18 years who binge drink

question of `binge drinking intensity among adults aged >= 18 years who binge drink` has id `ALC4_0`, `Binge drinking frequency among adults aged >= 18 years who binge drink` has id `ALC3_0`, and the question of `Binge drinking prevalence among adults aged >= 18 years` has id `ALC2_2`. What we want to do here is use a filtered version of the CDI table with the `question_id` as `ALC2_2` which would be the ff.

![](./figures%20&%20images/initial%20analyses%20powerbi%20(7).png)

and then join it to another filtered version of the CDI table with the `question_id` as `ALC4_0` or `AL3_0` which are the `binge drinking frequency/intensity among adults aged >= 18 years who binge drink` questions, as we know this pertains only to the population of those who binge drink which we have already calculated in the totalevents representing the total prevalence of those who indeed binge drinks in a specific state, year, sex, ethnicity, and origin

since these two tables already have their `ageend`s and `agestart`s to be the same we don't need to join these tables on specific join conditions that factors in the age group. We only need to join them based on their `DataValueType`, `LocationDesc`, `YearStart`, `YearEnd`, and `StratificationID`. E.g. `(binge drinking prevalence among adults aged >= 18 years, age-adjusted prevalence, tennessee, 2012, 2012, b_f_all, ...)` must be joined with `(binge drinking frequency among adults aged >= 18 years who binge drink, age-adjusted mean, tennessee, 2012, 2012, b_f_all, ...)`

then the `TotalEvents` in the `ALC2_2` filtered table must be multiplied with the `DataValue` of the `ALC3_0` or `ALC4_0` filtered table

In [12]:
alc2_2 = conn.sql("""
    SELECT *
    FROM CDI
    WHERE QuestionID = 'ALC2_2' AND (StratificationID = 'B_F_ALL' OR StratificationID = 'B_M_ALL')
""")

In [13]:
alc2_2

┌───────────┬─────────┬────────────┬───────────────┬───────────┬────────────────────┬─────────────────────┬────────────┬─────────────────┬──────────────────┬────────┬────────────┐
│ YearStart │ YearEnd │ LocationID │ DataValueUnit │ DataValue │ LowConfidenceLimit │ HighConfidenceLimit │ QuestionID │ DataValueTypeID │ StratificationID │ LogID  │ Population │
│   int32   │  int32  │  varchar   │    varchar    │  double   │       double       │       double        │  varchar   │     varchar     │     varchar      │ int32  │   int64    │
├───────────┼─────────┼────────────┼───────────────┼───────────┼────────────────────┼─────────────────────┼────────────┼─────────────────┼──────────────────┼────────┼────────────┤
│      2015 │    2015 │ WV         │ %             │       5.8 │                4.8 │                 7.0 │ ALC2_2     │ AGEADJPREV      │ B_F_ALL          │    242 │     783908 │
│      2011 │    2011 │ WV         │ %             │       5.9 │                4.8 │               

In [14]:
alc2_2s = alc2_2.fetchall()
len(alc2_2s)

2236

In [15]:
alc4_0 = conn.sql("""
    SELECT *
    FROM CDI
    WHERE QuestionID = 'ALC4_0' AND (StratificationID = 'B_F_ALL' OR StratificationID = 'B_M_ALL')
""")

In [16]:
alc4_0

┌───────────┬─────────┬────────────┬───────────────┬───────────┬────────────────────┬─────────────────────┬────────────┬─────────────────┬──────────────────┬────────┬────────────┐
│ YearStart │ YearEnd │ LocationID │ DataValueUnit │ DataValue │ LowConfidenceLimit │ HighConfidenceLimit │ QuestionID │ DataValueTypeID │ StratificationID │ LogID  │ Population │
│   int32   │  int32  │  varchar   │    varchar    │  double   │       double       │       double        │  varchar   │     varchar     │     varchar      │ int32  │   int64    │
├───────────┼─────────┼────────────┼───────────────┼───────────┼────────────────────┼─────────────────────┼────────────┼─────────────────┼──────────────────┼────────┼────────────┤
│      2019 │    2019 │ MA         │ Number        │       4.5 │                4.3 │                 4.8 │ ALC4_0     │ AGEADJMEAN      │ B_F_ALL          │   1467 │    3028874 │
│      2021 │    2021 │ NV         │ Number        │       4.8 │                4.3 │               

In [17]:
len(alc4_0.fetchall())

2236

it seems like the average binge drinking prevalence among those aged >= 18 fin 2011 is highest in California what I want to be able to do is use this average binge drinking drinking intensity/frequency of those who already binge drink 

the question specifically targets the subpopulation of "adults aged >= 18 years who binge drink," the total adult population of Arizona is not directly applicable for converting this average frequency into a tangible number of individuals or binge drinking episodes.

Here's why:

The Average is Conditional: The average binge drinking frequency of 3.6 is conditional on the individual being a binge drinker. It's calculated only among those who engage in this behavior.
We Don't Know the Number of Binge Drinkers: The total adult population includes both binge drinkers and non-binge drinkers. Without knowing the prevalence (i.e., the proportion or number) of binge drinkers within that 150,000 adult population, we can't determine how many individuals this average applies to.

What the 3.6 Tells Us:

On average, an adult in Arizona aged 18 and above who binge drinks does so approximately 3.6 times within the specified time period (e.g., per month, per year - this timeframe isn't provided).


To get a tangible number, you would need additional information:

Prevalence of Binge Drinking: You would need to know what percentage or the actual number of adults aged 18 and above in Arizona in 2015 were classified as binge drinkers.

Example: If a study showed that 20% of the 150,000 adults in Arizona were binge drinkers, then the number of binge drinkers would be 0.20 * 150,000 = 30,000.



Calculating Tangible Numbers with Prevalence:

Number of Binge Drinkers: Once you have the prevalence, you can calculate the estimated number of individuals in the target demographic who engage in binge drinking (as shown in the example above).
Total Binge Drinking Episodes: You could then estimate the total number of binge drinking episodes in that population by multiplying the number of binge drinkers by their average frequency: 30,000 binge drinkers * 3.6 episodes/period = 108,000 binge drinking episodes per period.


that means I have to do a self join on the filtered table containing the "binge drinking prevalence among adults aged >= 18 question" from 2001 to 2021 in all states and join these  

In [18]:
conn.sql("""
    WITH ALC2_2 AS (
        SELECT 
            *,
            CASE
                WHEN DataValueTypeID IN ('USD', 'MEDIAN', 'AGEADJMEAN', 'NMBR', 'MEAN') THEN DataValue
                WHEN DataValueTypeID IN ('AGEADJPREV', 'CRDPREV', 'PRCT', 'PREV') AND DataValueUnit = '%' THEN ((DataValue / 100) * Population)
                WHEN DataValueTypeID IN ('CRDRATE', 'AGEADJRATE') AND DataValueUnit = 'cases per 1,000' THEN ((DataValue / 1000) * Population)
                WHEN DataValueTypeID IN ('CRDRATE', 'AGEADJRATE') AND DataValueUnit = 'cases per 10,000' THEN ((DataValue / 10000) * Population)
                WHEN DataValueTypeID IN ('CRDRATE', 'AGEADJRATE', 'AVGANNCRDRATE', 'AVGANNAGEADJRATE') AND DataValueUnit = 'cases per 100,000' THEN ((DataValue / 100000) * Population)
                WHEN DataValueTypeID IN ('CRDRATE', 'AGEADJRATE', 'AGESEXRACEADJRATE') AND DataValueUnit = 'cases per 1,000,000' THEN ((DataValue / 1000000) * Population)
                WHEN DataValueTypeID IN ('PERCAPALC') AND DataValueUnit = 'gallons' THEN DataValue * Population
                ELSE DataValue
            END AS TotalEvents
        FROM CDI
        WHERE QuestionID = 'ALC2_2' AND (StratificationID = 'B_F_ALL' OR StratificationID = 'B_M_ALL')
    ),

    -- this table contains the questions related to binge drinking intensity
    ALC4_0 AS (
        SELECT *
        FROM CDI
        WHERE QuestionID = 'ALC4_0' AND (StratificationID = 'B_F_ALL' OR StratificationID = 'B_M_ALL')
    ),
         
    -- this table contains the questions related to binge drinking frequency
    ALC3_0 AS (
        SELECT *
        FROM CDI
        WHERE QuestionID = 'ALC3_0' AND (StratificationID = 'B_F_ALL' OR StratificationID = 'B_M_ALL')
    ),
    
    -- we self join all the tables on the condition that their 
    -- LocationID, StratificationID, YearStart, YearEnd are the same
    BingeDrinkingIntFreq AS (
        SELECT 
            ALC2_2.LogID,
            ALC2_2.QuestionID AS AlcPrevID,
            ALC4_0.QuestionID AS AlcIntID,
            ALC3_0.QuestionID AS AlcFreqID,
            ALC2_2.DataValue AS AlcPrevDataValue,
            ALC4_0.DataValue AS AlcIntDataValue,
            ALC3_0.DataValue AS AlcFreqDataValue,
            ALC2_2.DataValueUnit AS AlcPrevDataValueUnit,
            ALC4_0.DataValueUnit AS AlcIntDataValueUnit,
            ALC3_0.DataValueUnit AS AlcFreqDataValueUnit,
            ALC2_2.DataValueTypeID AS AlcPrevDataValueTypeID,
            ALC4_0.DataValueTypeID AS AlcIntDataValueTypeID,
            ALC3_0.DataValueTypeID AS AlcFreqDataValueTypeID,
            ALC2_2.StratificationID,
            ALC2_2.LocationID,
            ALC2_2.YearStart,
            ALC2_2.YearEnd,
            ALC2_2.Population,
            ALC2_2.TotalEvents AS BingeDrinkingPopulation,
            (ALC2_2.TotalEvents * ALC4_0.DataValue) AS BingeDrinkingPopInt,
            (ALC2_2.TotalEvents * ALC3_0.DataValue) AS BingeDrinkingPopFreq
        FROM ALC2_2
        INNER JOIN ALC4_0
        ON (
            (ALC2_2.DataValueTypeID = 'AGEADJPREV' AND ALC4_0.DataValueTypeID = 'AGEADJMEAN')
            OR (ALC2_2.DataValueTypeID = 'CRDPREV' AND ALC4_0.DataValueTypeID = 'MEAN')
        )
        AND (ALC2_2.StratificationID = ALC4_0.StratificationID)
        AND (ALC2_2.LocationID = ALC4_0.LocationID)
        AND (ALC2_2.YearStart = ALC4_0.YearStart)
        AND (ALC2_2.YearEnd = ALC4_0.YearEnd)
        INNER JOIN ALC3_0
        ON (
            (ALC2_2.DataValueTypeID = 'AGEADJPREV' AND ALC3_0.DataValueTypeID = 'AGEADJMEAN')
            OR (ALC2_2.DataValueTypeID = 'CRDPREV' AND ALC3_0.DataValueTypeID = 'MEAN')
        )
        AND (ALC2_2.StratificationID = ALC3_0.StratificationID)
        AND (ALC2_2.LocationID = ALC3_0.LocationID)
        AND (ALC2_2.YearStart = ALC3_0.YearStart)
        AND (ALC2_2.YearEnd = ALC3_0.YearEnd)
    )
                                   
    SELECT *
    FROM BingeDrinkingIntFreq
""")

┌────────┬───────────┬──────────┬───────────┬──────────────────┬─────────────────┬──────────────────┬──────────────────────┬─────────────────────┬──────────────────────┬────────────────────────┬───────────────────────┬────────────────────────┬──────────────────┬────────────┬───────────┬─────────┬────────────┬─────────────────────────┬─────────────────────┬──────────────────────┐
│ LogID  │ AlcPrevID │ AlcIntID │ AlcFreqID │ AlcPrevDataValue │ AlcIntDataValue │ AlcFreqDataValue │ AlcPrevDataValueUnit │ AlcIntDataValueUnit │ AlcFreqDataValueUnit │ AlcPrevDataValueTypeID │ AlcIntDataValueTypeID │ AlcFreqDataValueTypeID │ StratificationID │ LocationID │ YearStart │ YearEnd │ Population │ BingeDrinkingPopulation │ BingeDrinkingPopInt │ BingeDrinkingPopFreq │
│ int32  │  varchar  │ varchar  │  varchar  │      double      │     double      │      double      │       varchar        │       varchar       │       varchar        │        varchar         │        varchar        │        varchar   

In [19]:
binge_drinking_int_freq = conn.sql("""
    -- binge drinking prevalence among adults aged >= 18 years only and not among women aged 18-44 
    -- years and among youth, since these latter demongraphics have no binge drinking frequencies
    -- and intensities we can join on
    WITH BingeDrinkingPrev AS (
        SELECT 
            *,
            CASE
                WHEN DataValueTypeID IN ('USD', 'MEDIAN', 'AGEADJMEAN', 'NMBR', 'MEAN') THEN DataValue
                WHEN DataValueTypeID IN ('AGEADJPREV', 'CRDPREV', 'PRCT', 'PREV') AND DataValueUnit = '%' THEN ((DataValue / 100) * Population)
                WHEN DataValueTypeID IN ('CRDRATE', 'AGEADJRATE') AND DataValueUnit = 'cases per 1,000' THEN ((DataValue / 1000) * Population)
                WHEN DataValueTypeID IN ('CRDRATE', 'AGEADJRATE') AND DataValueUnit = 'cases per 10,000' THEN ((DataValue / 10000) * Population)
                WHEN DataValueTypeID IN ('CRDRATE', 'AGEADJRATE', 'AVGANNCRDRATE', 'AVGANNAGEADJRATE') AND DataValueUnit = 'cases per 100,000' THEN ((DataValue / 100000) * Population)
                WHEN DataValueTypeID IN ('CRDRATE', 'AGEADJRATE', 'AGESEXRACEADJRATE') AND DataValueUnit = 'cases per 1,000,000' THEN ((DataValue / 1000000) * Population)
                WHEN DataValueTypeID IN ('PERCAPALC') AND DataValueUnit = 'gallons' THEN DataValue * Population
                ELSE DataValue
            END AS TotalEvents
        FROM CDI
        WHERE QuestionID = 'ALC2_2' AND StratificationID IN ('B_B_ALL', 'B_M_ALL', 'B_F_ALL', 'H_B_ALL', 'NH_B_BLACK', 'NH_B_MULTI', 'NH_B_OTHER', 'NH_B_WHITE')
    ),
                                

    -- this table contains the questions related to binge drinking intensity
    ALC4_0 AS (
        SELECT *
        FROM CDI
        WHERE QuestionID = 'ALC4_0' AND StratificationID IN ('B_B_ALL', 'B_M_ALL', 'B_F_ALL', 'H_B_ALL', 'NH_B_BLACK', 'NH_B_MULTI', 'NH_B_OTHER', 'NH_B_WHITE')
    ),
         
    -- this table contains the questions related to binge drinking frequency
    ALC3_0 AS (
        SELECT *
        FROM CDI
        WHERE QuestionID = 'ALC3_0' AND StratificationID IN ('B_B_ALL', 'B_M_ALL', 'B_F_ALL', 'H_B_ALL', 'NH_B_BLACK', 'NH_B_MULTI', 'NH_B_OTHER', 'NH_B_WHITE')
    ),
    
    -- we self join all the tables on the condition that their 
    -- LocationID, StratificationID, YearStart, YearEnd are the same
    BingeDrinkingIntFreq AS (
        SELECT 
            BingeDrinkingPrev.LogID,
            BingeDrinkingPrev.QuestionID AS AlcPrevID,
            ALC4_0.QuestionID AS AlcIntID,
            ALC3_0.QuestionID AS AlcFreqID,
            BingeDrinkingPrev.DataValue AS AlcPrevDataValue,
            ALC4_0.DataValue AS AlcIntDataValue,
            ALC3_0.DataValue AS AlcFreqDataValue,
            BingeDrinkingPrev.DataValueUnit AS AlcPrevDataValueUnit,
            ALC4_0.DataValueUnit AS AlcIntDataValueUnit,
            ALC3_0.DataValueUnit AS AlcFreqDataValueUnit,
            BingeDrinkingPrev.DataValueTypeID AS AlcPrevDataValueTypeID,
            ALC4_0.DataValueTypeID AS AlcIntDataValueTypeID,
            ALC3_0.DataValueTypeID AS AlcFreqDataValueTypeID,
            BingeDrinkingPrev.StratificationID,
            BingeDrinkingPrev.LocationID,
            BingeDrinkingPrev.YearStart,
            BingeDrinkingPrev.YearEnd,
            BingeDrinkingPrev.Population,
            BingeDrinkingPrev.TotalEvents AS BingeDrinkingPopulation,
            (BingeDrinkingPrev.TotalEvents * ALC4_0.DataValue) AS BingeDrinkingPopInt,
            (BingeDrinkingPrev.TotalEvents * ALC3_0.DataValue) AS BingeDrinkingPopFreq
        FROM BingeDrinkingPrev
        INNER JOIN ALC4_0
        ON (
            (BingeDrinkingPrev.DataValueTypeID = 'AGEADJPREV' AND ALC4_0.DataValueTypeID = 'AGEADJMEAN')
            OR (BingeDrinkingPrev.DataValueTypeID = 'CRDPREV' AND ALC4_0.DataValueTypeID = 'MEAN')
        )
        AND (BingeDrinkingPrev.StratificationID = ALC4_0.StratificationID)
        AND (BingeDrinkingPrev.LocationID = ALC4_0.LocationID)
        AND (BingeDrinkingPrev.YearStart = ALC4_0.YearStart)
        AND (BingeDrinkingPrev.YearEnd = ALC4_0.YearEnd)
        INNER JOIN ALC3_0
        ON (
            (BingeDrinkingPrev.DataValueTypeID = 'AGEADJPREV' AND ALC3_0.DataValueTypeID = 'AGEADJMEAN')
            OR (BingeDrinkingPrev.DataValueTypeID = 'CRDPREV' AND ALC3_0.DataValueTypeID = 'MEAN')
        )
        AND (BingeDrinkingPrev.StratificationID = ALC3_0.StratificationID)
        AND (BingeDrinkingPrev.LocationID = ALC3_0.LocationID)
        AND (BingeDrinkingPrev.YearStart = ALC3_0.YearStart)
        AND (BingeDrinkingPrev.YearEnd = ALC3_0.YearEnd)
    )
                                   
    SELECT 
        MAX(LogID) AS LogID,
        AVG(AlcPrevDataValue) AS AvgAlcPrevDataValue,
        AVG(AlcIntDataValue) AS AvgAlcIntDataValue,
        AVG(AlcFreqDataValue) AS AvgAlcFreqDataValue,
        LocationID,
        StratificationID,
        YearEnd,
        YearStart,
        AVG(BingeDrinkingPopInt) AS AvgBingeDrinkingPopInt,
        AVG(BingeDrinkingPopFreq) AS AvgBingeDrinkingPopFreq,
        AVG(BingeDrinkingPopulation) AS AvgBingeDrinkingPopulation
    FROM BingeDrinkingIntFreq
    GROUP BY LocationID, StratificationID, YearEnd, YearStart, AlcPrevID, AlcIntID, AlcFreqID
""")

In [20]:
binge_drinking_int_freq

┌────────┬─────────────────────┬────────────────────┬─────────────────────┬────────────┬──────────────────┬─────────┬───────────┬────────────────────────┬─────────────────────────┬────────────────────────────┐
│ LogID  │ AvgAlcPrevDataValue │ AvgAlcIntDataValue │ AvgAlcFreqDataValue │ LocationID │ StratificationID │ YearEnd │ YearStart │ AvgBingeDrinkingPopInt │ AvgBingeDrinkingPopFreq │ AvgBingeDrinkingPopulation │
│ int32  │       double        │       double       │       double        │  varchar   │     varchar      │  int32  │   int32   │         double         │         double          │           double           │
├────────┼─────────────────────┼────────────────────┼─────────────────────┼────────────┼──────────────────┼─────────┼───────────┼────────────────────────┼─────────────────────────┼────────────────────────────┤
│ 325736 │                12.5 │  4.949999999999999 │  2.8499999999999996 │ CT         │ B_F_ALL          │    2011 │      2011 │      939141.2677499999 │      

as shown here we don't really know really know what is measured when the data value of let's say a data point's question of `binge drinking intensity among adults aged >= 18 years who binge drink` is 5.6. The number alone is not enough because you might think is it 5.6 miles per hour? 5.6 kilometers per hour? 5.6 revolutions per second? So 5.6 doesn't really mean anything, however it could mean several very different things, depending on the source's methodology:

1. Average Number of Drinks Per Binge Episode - This is the most common interpretation for "intensity." It would mean that, among adults who binge drink, the average number of alcoholic drinks consumed during a single binge drinking episode is 5.6. For context a "binge drinking episode" is usually defined as consuming 4 or more drinks for women, or 5 or more drinks for men, on one occasion. So, 5.6 means they're going beyond the minimum threshold.

2. Average Number of Binge Episodes Per Time Period (e.g., per month, per year) - Less likely to be called "intensity" but possible. It would mean that, among adults who binge drink, they average 5.6 binge drinking episodes in a given timeframe (e.g., 5.6 times per month).
Distinction: This is more about frequency than intensity. Your previous question "binge drinking frequency among adults aged >= 18 years who binge drink" with a value of 3.6 directly points to this. So, if your current data value is also 5.6 for "intensity," it's highly likely they are distinct measures, and "intensity" refers to drinks per episode.

3. A Composite Score - Sometimes, researchers create a composite "intensity" score based on a weighted combination of factors (e.g., number of drinks, speed of consumption, negative consequences experienced). In this case, 5.6 would be a point on an arbitrary scale, and you'd need the methodology to understand it. This is less common for "intensity" alone.

For the Most Probable Meaning for "Intensity" given that I separately have a "binge drinking frequency" measure, it's highly probable that "binge drinking intensity" with a value of 5.6 refers to the average number of drinks consumed per binge drinking occasion by those who binge drink.

You would need to refer to the metadata, documentation, or codebook of the Chronic Disease Indicators (CDI) dataset itself. Somewhere within that documentation, there should be a precise definition for "binge drinking intensity" and its unit of measure. It might be:
- "Average number of alcoholic drinks consumed per binge drinking occasion."
- "Mean number of drinks consumed on days when alcohol is consumed (among those who drink)."

In [21]:
binge_drinking_int_freq_df = binge_drinking_int_freq.fetchdf()
binge_drinking_int_freq_df

Unnamed: 0,LogID,AvgAlcPrevDataValue,AvgAlcIntDataValue,AvgAlcFreqDataValue,LocationID,StratificationID,YearEnd,YearStart,AvgBingeDrinkingPopInt,AvgBingeDrinkingPopFreq,AvgBingeDrinkingPopulation
0,488901,20.55,4.95,3.10,DC,B_F_ALL,2015,2015,3.169674e+05,1.968510e+05,63906.5955
1,597352,13.90,5.50,3.80,MI,B_F_ALL,2017,2017,3.203007e+06,2.214108e+06,583433.3180
2,271538,12.40,6.30,3.70,AL,NH_B_BLACK,2019,2019,7.687664e+05,4.515555e+05,122042.0400
3,217300,18.30,7.00,4.20,DC,NH_B_BLACK,2011,2011,3.065413e+05,1.841355e+05,43818.9840
4,488654,14.80,6.65,5.40,ID,B_B_ALL,2020,2020,1.436546e+06,1.167814e+06,216153.7040
...,...,...,...,...,...,...,...,...,...,...,...
2884,597208,12.15,5.45,3.20,VA,B_F_ALL,2016,2016,2.332561e+06,1.369430e+06,428332.4550
2885,597243,13.20,5.90,3.45,OH,B_F_ALL,2014,2014,3.783898e+06,2.216311e+06,641915.6040
2886,597263,14.05,6.50,2.95,CA,NH_B_OTHER,2013,2013,8.777306e+05,3.982185e+05,134769.8480
2887,434049,13.80,6.75,5.10,KS,NH_B_OTHER,2021,2021,9.247336e+04,6.910228e+04,13677.7320


In [22]:
VIZ_DIR = "./data/visualizers-data/"
os.makedirs(VIZ_DIR, exist_ok=True)
binge_drinking_int_freq_df.to_csv(f"{VIZ_DIR}/binge_drinking_int_freq.csv")

however we cannot directly write sql queries in powerbi, so somehow we have to create this binge drinking intensity frequency table using DAX measures or using the power query editor, which both use different syntax

So far this is what I've achieved. I have no clue how to join these tables on multiple conditions using only DAX
```
BingeDrinkingIntFreq = 
VAR ALC2_2 = FILTER(
    'chronic_disease_analyses_db   main   CDI', 
    'chronic_disease_analyses_db   main   CDI'[QuestionID] = "ALC2_2" && ('chronic_disease_analyses_db   main   CDI'[StratificationID] = "B_F_ALL" || 'chronic_disease_analyses_db   main   CDI'[StratificationID] = "B_M_ALL")
)

VAR ALC4_0 = FILTER(
    'chronic_disease_analyses_db   main   CDI', 
    'chronic_disease_analyses_db   main   CDI'[QuestionID] = "ALC4_0" && ('chronic_disease_analyses_db   main   CDI'[StratificationID] = "B_F_ALL" || 'chronic_disease_analyses_db   main   CDI'[StratificationID] = "B_M_ALL")
)

VAR ALC3_0 = FILTER(
    'chronic_disease_analyses_db   main   CDI', 
    'chronic_disease_analyses_db   main   CDI'[QuestionID] = "ALC3_0" && ('chronic_disease_analyses_db   main   CDI'[StratificationID] = "B_F_ALL" || 'chronic_disease_analyses_db   main   CDI'[StratificationID] = "B_M_ALL")
)

// VAR JoinedWithALC4_0 = ADDCOLUMNS(
//     ALC2_2,
//     "AlcIntID",
//     VAR _currentLoc = [LocationID]
//     VAR _currentStrat = [StratificationID]
//     VAR _currentYS = [YearStart]
//     VAR _currentYE = [YearEnd]
//     VAR _currentDVType = [DataValueTypeID]
//     RETURN ALC4_0[QuestionID]
    // "AlcIntDataValue",
    //     VAR _currentLoc = [LocationID]
    //     VAR _currentStrat = [StratificationID]
    //     VAR _currentYS = [YearStart]
    //     VAR _currentYE = [YearEnd]
    //     VAR _currentDVType = [DataValueTypeID]
    //     RETURN
    //         CALCULATE(
    //             SELECTEDVALUE(ALC4_0[DataValue]), // Use SELECTEDVALUE for a single value lookup
    //             ALC4_0[LocationID] = _currentLoc,
    //             ALC4_0[StratificationID] = _currentStrat,
    //             ALC4_0[YearStart] = _currentYS,
    //             ALC4_0[YearEnd] = _currentYE,
    //             (
    //                 (_currentDVType = "AGEADJPREV" && ALC4_0[DataValueTypeID] = "AGEADJMEAN") ||
    //                 (_currentDVType = "CRDPREV" && ALC4_0[DataValueTypeID] = "MEAN")
    //             )
    //         ),
    // "AlcIntDataValueUnit",
    //     VAR _currentLoc = [LocationID]
    //     VAR _currentStrat = [StratificationID]
    //     VAR _currentYS = [YearStart]
    //     VAR _currentYE = [YearEnd]
    //     VAR _currentDVType = [DataValueTypeID]
    //     RETURN
    //         CALCULATE(
    //             SELECTEDVALUE(ALC4_0[DataValueUnit]),
    //             ALC4_0[LocationID] = _currentLoc,
    //             ALC4_0[StratificationID] = _currentStrat,
    //             ALC4_0[YearStart] = _currentYS,
    //             ALC4_0[YearEnd] = _currentYE,
    //             (
    //                 (_currentDVType = "AGEADJPREV" && ALC4_0[DataValueTypeID] = "AGEADJMEAN") ||
    //                 (_currentDVType = "CRDPREV" && ALC4_0[DataValueTypeID] = "MEAN")
    //             )
    //         ),
    // "AlcIntDataValueTypeID",
    //     VAR _currentLoc = [LocationID]
    //     VAR _currentStrat = [StratificationID]
    //     VAR _currentYS = [YearStart]
    //     VAR _currentYE = [YearEnd]
    //     VAR _currentDVType = [DataValueTypeID]
    //     RETURN
    //         CALCULATE(
    //             SELECTEDVALUE(ALC4_0[DataValueTypeID]),
    //             ALC4_0[LocationID] = _currentLoc,
    //             ALC4_0[StratificationID] = _currentStrat,
    //             ALC4_0[YearStart] = _currentYS,
    //             ALC4_0[YearEnd] = _currentYE,
    //             (
    //                 (_currentDVType = "AGEADJPREV" && ALC4_0[DataValueTypeID] = "AGEADJMEAN") ||
    //                 (_currentDVType = "CRDPREV" && ALC4_0[DataValueTypeID] = "MEAN")
    //             )
    //         )

RETURN ALC4_0
```

In [None]:
conn.sql("""
    
""")