***
# ETL Project: Extract, Transform, Load
***
### Part 3 :  Demographic information on opioid usage and overdose death across the US
***
## Step 1: Extract 
> Data source extraction from a csv file, after deep exploration of the data we needed for this project, we started the retrieval process by writing the code for extraction and then running it through transformation steps. 

### Importing Dependencies

In [42]:
# Import Dependencies:
import pandas as pd
import os
from sqlalchemy import create_engine
from config import password
from config import username

import plotly.express as px

#### Note: We will do all steps in the order of the files numbers

In [2]:
# Creating csv data files path: 
# File 1:
death_cause = os.path.join("Resources", "Multiple_Cause_death.csv")

# File 2:
workforce = os.path.join("Resources", "NSDUH Workforce Adults.csv")

# File 3:
medication_assisted = os.path.join("Resources", "mat_annually.csv")

# File 4:
opioid_death = os.path.join("Resources", "opioid_death.csv")

### Store CSV into DataFrame

In [3]:
# File 1: Reading in data file to store into Pandas DataFrame:
death_cause_df = pd.read_csv("Resources/Multiple_Cause_death.csv")
death_cause_df.head()

Unnamed: 0,State,Year,Deaths,Population,Crude Rate,Crude Rate Lower 95% Confidence Interval,Crude Rate Upper 95% Confidence Interval,Prescriptions Dispensed by US Retailers in that year (millions)
0,Alabama,1999,39,4430141,0.9,0.6,1.2,116
1,Alabama,2000,46,4447100,1.0,0.8,1.4,126
2,Alabama,2001,67,4467634,1.5,1.2,1.9,138
3,Alabama,2002,75,4480089,1.7,1.3,2.1,142
4,Alabama,2003,54,4503491,1.2,0.9,1.6,149


In [4]:
# File 2: Reading in data file to store into Pandas DataFrame:
workforce_df = pd.read_csv("Resources/NSDUH Workforce Adults.csv")
workforce_df.head()

Unnamed: 0.1,Unnamed: 0,IRPINC3,IRFAMIN3,marij_ever,marij_month,marij_year,cocaine_ever,cocaine_month,cocaine_year,crack_ever,...,EverDrugTest,EverDrugTest2,race_str,race_num,education,WouldWorkForDrugTester,SelectiveLeave,SkipSick,sex,druglist
0,1,2,4,1,1,1,1,0,0,1,...,0.0,No,Hispanic,7,2,3,0,0,1,Marijuana Cocaine Crack Hallucinogen 0
1,2,4,4,0,0,0,0,0,0,0,...,1.0,Yes,Hispanic,7,2,1,0,0,1,0
2,3,4,7,1,0,0,0,0,0,0,...,1.0,Yes,White,1,3,3,0,0,2,Marijuana 0
3,4,4,7,0,0,0,0,0,0,0,...,1.0,Yes,Hispanic,7,3,3,0,0,1,0
4,5,2,3,0,0,0,0,0,0,0,...,1.0,Yes,Hispanic,7,1,1,2,2,2,0


In [5]:
# File 3: Reading in data file to store into Pandas DataFrame:
medication_assisted_treatment_df = pd.read_csv("Resources/mat_annually.csv")
medication_assisted_treatment_df.head()

Unnamed: 0,County,Year,Medication_Assisted_Treatment,Beneficiaries,Status,Annotation,Annotation_Description
0,Statewide,2010,Buprenorphine,1265.0,F,,
1,Statewide,2011,Buprenorphine,1680.0,F,,
2,Statewide,2012,Buprenorphine,2099.0,F,,
3,Statewide,2013,Buprenorphine,2129.0,F,,
4,Statewide,2014,Buprenorphine,5000.0,F,,


In [6]:
# File 4: Reading in data file to store into Pandas DataFrame:
opioid_death_df = pd.read_csv("Resources/opioid_death.csv")
opioid_death_df.head()

Unnamed: 0,State,State Code,Gender,Gender Code,Race,Race Code,Year,Year Code,UCD - Drug/Alcohol Induced Cause,UCD - Drug/Alcohol Induced Cause Code,Deaths,Population,Crude Rate
0,Alabama,1,Female,F,Black or African American,2054-5,1999,1999,Drug poisonings (overdose) Unintentional (X40-...,D1,10,623475,Unreliable
1,Alabama,1,Female,F,Black or African American,2054-5,2003,2003,Drug poisonings (overdose) Unintentional (X40-...,D1,13,636876,Unreliable
2,Alabama,1,Female,F,Black or African American,2054-5,2004,2004,Drug poisonings (overdose) Unintentional (X40-...,D1,11,641873,Unreliable
3,Alabama,1,Female,F,Black or African American,2054-5,2007,2007,Drug poisonings (overdose) Unintentional (X40-...,D1,18,664831,Unreliable
4,Alabama,1,Female,F,Black or African American,2054-5,2008,2008,Drug poisonings (overdose) Unintentional (X40-...,D1,17,673102,Unreliable


***
## Step 2: Transform
***
> Transforming the dataset to suit the needs of our project, this will include:
> 1. Cleaning Data
> 2. Removing the null
> 3. Selecting needed columns
> 4. Renaming columns
> 5. Selecting columns to join

In [7]:
# File 1: Cleaning dataset and dropping any bad records:
cleaned_death_cause_df = death_cause_df.dropna(how='any')
cleaned_death_cause_df.head()

Unnamed: 0,State,Year,Deaths,Population,Crude Rate,Crude Rate Lower 95% Confidence Interval,Crude Rate Upper 95% Confidence Interval,Prescriptions Dispensed by US Retailers in that year (millions)
0,Alabama,1999,39,4430141,0.9,0.6,1.2,116
1,Alabama,2000,46,4447100,1.0,0.8,1.4,126
2,Alabama,2001,67,4467634,1.5,1.2,1.9,138
3,Alabama,2002,75,4480089,1.7,1.3,2.1,142
4,Alabama,2003,54,4503491,1.2,0.9,1.6,149


In [8]:
# File 2: Cleaning dataset and dropping any bad records:
cleaned_workforce_df = workforce_df.dropna(how='any')
cleaned_workforce_df.head()

Unnamed: 0.1,Unnamed: 0,IRPINC3,IRFAMIN3,marij_ever,marij_month,marij_year,cocaine_ever,cocaine_month,cocaine_year,crack_ever,...,EverDrugTest,EverDrugTest2,race_str,race_num,education,WouldWorkForDrugTester,SelectiveLeave,SkipSick,sex,druglist
0,1,2,4,1,1,1,1,0,0,1,...,0.0,No,Hispanic,7,2,3,0,0,1,Marijuana Cocaine Crack Hallucinogen 0
1,2,4,4,0,0,0,0,0,0,0,...,1.0,Yes,Hispanic,7,2,1,0,0,1,0
2,3,4,7,1,0,0,0,0,0,0,...,1.0,Yes,White,1,3,3,0,0,2,Marijuana 0
3,4,4,7,0,0,0,0,0,0,0,...,1.0,Yes,Hispanic,7,3,3,0,0,1,0
4,5,2,3,0,0,0,0,0,0,0,...,1.0,Yes,Hispanic,7,1,1,2,2,2,0


In [9]:
# File 3: will drop after selecting columns as it have values that were giving errors if we do at this step.

In [10]:
# File 4: Cleaning dataset and dropping any bad records:
opioid_death_df =opioid_death_df.dropna(how='any')
opioid_death_df.head()

Unnamed: 0,State,State Code,Gender,Gender Code,Race,Race Code,Year,Year Code,UCD - Drug/Alcohol Induced Cause,UCD - Drug/Alcohol Induced Cause Code,Deaths,Population,Crude Rate
0,Alabama,1,Female,F,Black or African American,2054-5,1999,1999,Drug poisonings (overdose) Unintentional (X40-...,D1,10,623475,Unreliable
1,Alabama,1,Female,F,Black or African American,2054-5,2003,2003,Drug poisonings (overdose) Unintentional (X40-...,D1,13,636876,Unreliable
2,Alabama,1,Female,F,Black or African American,2054-5,2004,2004,Drug poisonings (overdose) Unintentional (X40-...,D1,11,641873,Unreliable
3,Alabama,1,Female,F,Black or African American,2054-5,2007,2007,Drug poisonings (overdose) Unintentional (X40-...,D1,18,664831,Unreliable
4,Alabama,1,Female,F,Black or African American,2054-5,2008,2008,Drug poisonings (overdose) Unintentional (X40-...,D1,17,673102,Unreliable


### Create new data with select columns

In [11]:
# File 1:
# Filtering dataset by selecting columns needed to answer potential query:
# Extracting only needed columns:
death_cause_subset = cleaned_death_cause_df[["State", "Year", "Population", "Crude Rate", "Prescriptions Dispensed by US Retailers in that year (millions)" ]]
death_cause_subset.head()

Unnamed: 0,State,Year,Population,Crude Rate,Prescriptions Dispensed by US Retailers in that year (millions)
0,Alabama,1999,4430141,0.9,116
1,Alabama,2000,4447100,1.0,126
2,Alabama,2001,4467634,1.5,138
3,Alabama,2002,4480089,1.7,142
4,Alabama,2003,4503491,1.2,149


In [12]:
# File 2:
# Filtering dataset by selecting subset columns needed to answer potential queries:
# Extracting only needed columns
workforce_subset = cleaned_workforce_df[["IRFAMIN3", "painrelieve_ever", "EmploymentStatus", "race_str", "education", "sex"]]
workforce_subset.head()

Unnamed: 0,IRFAMIN3,painrelieve_ever,EmploymentStatus,race_str,education,sex
0,4,0,1,Hispanic,2,1
1,4,0,1,Hispanic,2,1
2,7,0,1,White,3,2
3,7,0,1,Hispanic,3,1
4,3,0,2,Hispanic,1,2


In [13]:
# File 3:
# Filtering dataset by selecting columns needed to answer potential query:
# Extracting only needed columns:
mat_subset = medication_assisted_treatment_df[["County", "Year", "Beneficiaries"]]
mat_subset.head()

Unnamed: 0,County,Year,Beneficiaries
0,Statewide,2010,1265.0
1,Statewide,2011,1680.0
2,Statewide,2012,2099.0
3,Statewide,2013,2129.0
4,Statewide,2014,5000.0


In [14]:
# File 3: now we're able to drop null values from previous step
# Cleaning dataset and dropping any bad records:
assisted_treatment = mat_subset.dropna(how='any')
assisted_treatment.head()

Unnamed: 0,County,Year,Beneficiaries
0,Statewide,2010,1265.0
1,Statewide,2011,1680.0
2,Statewide,2012,2099.0
3,Statewide,2013,2129.0
4,Statewide,2014,5000.0


In [15]:
# File 4:
# Filtering dataset by selecting columns needed to answer potential query:
# Extracting only needed columns:
opioid_death = opioid_death_df[["State", "Gender", "Race", "Year", "Deaths", "Population"]]
opioid_death.head()

Unnamed: 0,State,Gender,Race,Year,Deaths,Population
0,Alabama,Female,Black or African American,1999,10,623475
1,Alabama,Female,Black or African American,2003,13,636876
2,Alabama,Female,Black or African American,2004,11,641873
3,Alabama,Female,Black or African American,2007,18,664831
4,Alabama,Female,Black or African American,2008,17,673102


#### Checking Datasets columns data types

In [16]:
# File 1: Checking columns data type to see if we need to change in order to avoid errors in loading step
death_cause_subset.dtypes

State                                                              object
Year                                                                int64
Population                                                          int64
Crude Rate                                                         object
Prescriptions Dispensed by US Retailers in that year (millions)     int64
dtype: object

In [17]:
# File 1: We will need to convert the Crude Rate columns data type
# Converting data type for Crude Rate column:
death_cause_subset['Crude Rate'] = death_cause_subset['Crude Rate'].astype(str)
death_cause_subset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,State,Year,Population,Crude Rate,Prescriptions Dispensed by US Retailers in that year (millions)
0,Alabama,1999,4430141,0.9,116
1,Alabama,2000,4447100,1.0,126
2,Alabama,2001,4467634,1.5,138
3,Alabama,2002,4480089,1.7,142
4,Alabama,2003,4503491,1.2,149


In [18]:
# File 2: Checking columns data type to see if we need to change in order to avoid errors in loading step
workforce_subset.dtypes

IRFAMIN3             int64
painrelieve_ever     int64
EmploymentStatus     int64
race_str            object
education            int64
sex                  int64
dtype: object

In [19]:
# File 2: We will need to convert the race_str columns data type
# Converting data type for race_str column:
workforce_subset['race_str'] = workforce_subset['race_str'].astype(str)
workforce_subset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,IRFAMIN3,painrelieve_ever,EmploymentStatus,race_str,education,sex
0,4,0,1,Hispanic,2,1
1,4,0,1,Hispanic,2,1
2,7,0,1,White,3,2
3,7,0,1,Hispanic,3,1
4,3,0,2,Hispanic,1,2


In [20]:
# File 3: Checking columns data type to see if we need to change in order to avoid errors in loading step
assisted_treatment.dtypes

County            object
Year               int64
Beneficiaries    float64
dtype: object

In [21]:
# File 3: we don't need to convert any dsta type in columns

In [22]:
# File 4: Checking columns data type to see if we need to change in order to avoid errors in loading step
opioid_death.dtypes

State         object
Gender        object
Race          object
Year           int64
Deaths         int64
Population     int64
dtype: object

In [23]:
# File 4: we don't need to convert any dsta type in columns

#### Renaming columns for more clearity and easier for loading process

In [24]:
# File 1: 
# Renaming the subset dataframe columns:
overdose_death = death_cause_subset.rename(columns={
    'Crude Rate': 'Crude_Rate',
    'Prescriptions Dispensed by US Retailers in that year (millions)': 'Dispensed_Prescriptions'})
overdose_death.head()

Unnamed: 0,State,Year,Population,Crude_Rate,Dispensed_Prescriptions
0,Alabama,1999,4430141,0.9,116
1,Alabama,2000,4447100,1.0,126
2,Alabama,2001,4467634,1.5,138
3,Alabama,2002,4480089,1.7,142
4,Alabama,2003,4503491,1.2,149


In [25]:
# File 2:
# Renaming the subset dataframe columns:
demographic_drug_use = workforce_subset.rename(columns={
    'IRFAMIN3': 'Total_Family_Income',
    'painrelieve_ever': 'Pain_Relieve_Ever',
    'EmploymentStatus': 'Employment_Status',
    'race_str': 'Race',
    'education': 'Education',
    'sex': 'Gender'
})
demographic_drug_use.head()

Unnamed: 0,Total_Family_Income,Pain_Relieve_Ever,Employment_Status,Race,Education,Gender
0,4,0,1,Hispanic,2,1
1,4,0,1,Hispanic,2,1
2,7,0,1,White,3,2
3,7,0,1,Hispanic,3,1
4,3,0,2,Hispanic,1,2


In [26]:
# File 3: columns will stay the same no renaming

In [27]:
# File 4: columns will stay the same no renaming

#### Saving cleaned csv files into Output folder for loading step

In [28]:
# File 1:
# Saving needed subset into csv file: 
overdose_death.to_csv('Resources/Output/overdose_death.csv', index=False)

In [29]:
# File 2:
# Save needed subset into csv file:
demographic_drug_use.to_csv('Resources/Output/demographic_drug_use.csv', index=False)

In [30]:
# File 3:
assisted_treatment.to_csv('Resources/Output/assisted_treatment.csv', index=False)

In [31]:
# File 4:
opioid_death.to_csv('Resources/Output/opioid_death.csv', index=False)

***
## Step 3: Load
***
> * This is the final step of the ETL process, here we loaded our extracted and transformed data into a database
> * We used postgres for this step to load and store our database

### Connect to local database

In [32]:
# Connecting to localhost database:
engine = create_engine(f'postgresql://{username}:{password}@localhost:5432/ETL_Project')
connection = engine.connect()

### Check for tables

In [33]:
# Reviewing the tables from SQL Database
engine.table_names()

['overdose_death',
 'demographic_drug_use',
 'assisted_treatment',
 'opioid_death']

### Confirm data has been added by querying the customer_name table

In [34]:
# File 1: 
pd.read_sql_query('select * from overdose_death', con=engine).head()

Unnamed: 0,state,year,population,crude_rate,dispensed_prescriptions
0,Alabama,1999,4430141,0.9,116
1,Alabama,2000,4447100,1.0,126
2,Alabama,2001,4467634,1.5,138
3,Alabama,2002,4480089,1.7,142
4,Alabama,2003,4503491,1.2,149


In [35]:
# File 2: 
pd.read_sql_query('select * from demographic_drug_use', con=engine).head()

Unnamed: 0,total_family_income,pain_relieve_ever,employment_status,race,education,gender
0,4,0,1,Hispanic,2,1
1,4,0,1,Hispanic,2,1
2,7,0,1,White,3,2
3,7,0,1,Hispanic,3,1
4,3,0,2,Hispanic,1,2


In [36]:
# File 3: 
pd.read_sql_query('select * from assisted_treatment', con=engine).head()

Unnamed: 0,county,year,beneficiaries
0,Statewide,2010,1265.0
1,Statewide,2011,1680.0
2,Statewide,2012,2099.0
3,Statewide,2013,2129.0
4,Statewide,2014,5000.0


In [37]:
# File 4: 
pd.read_sql_query('select * from opioid_death', con=engine).head()

Unnamed: 0,state,gender,race,year,deaths,population
0,Alabama,Female,Black or African American,1999,10,623475
1,Alabama,Female,Black or African American,2003,13,636876
2,Alabama,Female,Black or African American,2004,11,641873
3,Alabama,Female,Black or African American,2007,18,664831
4,Alabama,Female,Black or African American,2008,17,673102


# Bonus:
***
## Step 4: Analysis
***

### In this part we used the retrieved, transformed and loaded datasets to answer some of the following questions:
> 1. What is the prevalence of overdose deaths across the US from 1999 to 2014?
> 2. What are the rates of usage organized by demographics across the US?
> 3. Using California as a model, is there a relationship between enrollments in medically-assisted facilities and rates of overdose deaths?
> 4. What are the total number of opioid overdose deaths for each state between 1999 and 2018?
### These are the dataset sources:
> 1. Opioid Overdose Deaths: https://data.world/health/opioid-overdose-deaths
> 2. Drug Use, Employment, Work Absence, Income, Race, Education: https://data.world/balexturner/drug-use-employment-work-absence-income-race-education
> 3. Medication-Assisted Treatment in Medi-Cal for Opioid Use: https://data.world/chhs/8329a339-ab77-4d05-ab7a-405d0ae5765c
> 4. Opioid related deaths by state with demographics: https://wonder.cdc.gov/controller/datarequest/D77;jsessionid=3A003B38B5A0AF1F621FF31D92C8

#### What are the total number of opioid overdose deaths for each state between 1999 and 2018?

> The below plot shows the number of annual deaths between 1999 & 2018 for all states. This data was acquired from the CDC's wonder database by quering specific paramters and using the ICD-10 codes for opioid related deaths.

In [43]:
# Preparing Data for plotting:
testdf=pd.read_csv("Resources/opioid_death.csv")


# Getting state codes from internet. for ex: NC: North Carolina, read as df
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')

# Creating a dictionary of state name to state code:
stateMapping=dict(zip(df.state, df.code)) 

# Creating a column 'code' in the main dataframe (testdf) based upon the 'State' column:
testdf['code']=testdf['State'].map(stateMapping)

# Getting list of unique state codes in test:
s=list(testdf['code'].unique())

# Adding custom list of all colors that plotly understands below:
Allcolors="""
aliceblue, antiquewhite, aqua, aquamarine, azure, beige, bisque, black, blanchedalmond, blue, blueviolet, brown, burlywood, cadetblue, chartreuse, chocolate, coral, cornflowerblue, cornsilk, crimson, cyan, darkblue, darkcyan, darkgoldenrod, darkgray, darkgrey, darkgreen, darkkhaki, darkmagenta, darkolivegreen, darkorange, darkorchid, darkred, darksalmon, darkseagreen, darkslateblue, darkslategray, darkslategrey, darkturquoise, darkviolet, deeppink, deepskyblue, dimgray, dimgrey, dodgerblue, firebrick, floralwhite, forestgreen, fuchsia, gainsboro, ghostwhite, gold, goldenrod, gray, grey, green, greenyellow, honeydew, hotpink, indianred, indigo, ivory, khaki, lavender, lavenderblush, lawngreen, lemonchiffon, lightblue, lightcoral, lightcyan, lightgoldenrodyellow, lightgray, lightgrey, lightgreen, lightpink, lightsalmon, lightseagreen, lightskyblue, lightslategray, lightslategrey, lightsteelblue, lightyellow, lime, limegreen, linen, magenta, maroon, mediumaquamarine, mediumblue, mediumorchid, mediumpurple, mediumseagreen, mediumslateblue, mediumspringgreen, mediumturquoise, mediumvioletred, midnightblue, mintcream, mistyrose, moccasin, navajowhite, navy, oldlace, olive, olivedrab, orange, orangered, orchid, palegoldenrod, palegreen, paleturquoise, palevioletred, papayawhip, peachpuff, peru, pink, plum, powderblue, purple, red, rosybrown, royalblue, rebeccapurple, saddlebrown, salmon, sandybrown, seagreen, seashell, sienna, silver, skyblue, slateblue, slategray, slategrey, snow, springgreen, steelblue, tan, teal, thistle, tomato, turquoise, violet, wheat, white, whitesmoke, yellow, yellowgreen"""

# Create list of colors:
colList=Allcolors.split(", ")

# Randomly map list of states to some unique color in colList:
colorMapping=dict(zip(s,colList[1:len(colList)]))

# Created a dict of color mapping:
colorMapping
testdf=testdf.dropna() 

In [47]:
# Animated figure with Plotly Express shows the number of annual deaths between 1999 & 2018 for all states:
px.scatter(testdf, x="Year", y="Deaths", color="code",color_discrete_map=colorMapping)


#### Analyzing the number of annual deaths for female vs male

In [45]:
# # Number of death for female:
# female_death = """
#     SELECT od.State AS "State",
#             op.year AS "Year",
#             op.gender AS "Gender",
#             op.race AS "Race",
#             op.deaths AS "Deaths" 
#     FROM overdose_death as od 
#     LEFT JOIN opioid_death as op 
#     ON od.year = op.year
#     WHERE Gender = 'Female';
# """
# female_death_df = pd.read_sql(female_death, connection)
# female_death_df.head()

#### What is the prevalence of overdose deaths across the US from 1999 to 2014?
> Death prevalence due to overdose there are for every 100,000 people in 2014 for all states

In [41]:
# Filtering crude rate by year:
crude_rate_2014 = """
    SELECT year,
        state,
        crude_rate  
    FROM overdose_death
    WHERE year = 2014;
"""
# Reading in new joined database"
crude_rate_2014 = pd.read_sql(crude_rate_2014, connection)
crude_rate_2014.head()

Unnamed: 0,year,state,crude_rate
0,2014,Alabama,5.8
1,2014,Alaska,10.7
2,2014,Arizona,9.2
3,2014,Arkansas,6.0
4,2014,California,5.6


#### What are the rates of usage of opioid for pain relief organized by demographics across the US?
> First, we filtered by ever using pain relief for all races. Then, calculated the total number of people using opioid pain killers by race. 
> Whites had five times as many usages of opioids than the next highest group, Hispanics. 

In [44]:
# Filtering race and using pain relief ever:
count = """
    SELECT race, pain_relieve_ever
    FROM demographic_drug_use
    WHERE pain_relieve_ever = '1';  
"""
count = pd.read_sql(count, connection)
count.head()

Unnamed: 0,race,pain_relieve_ever
0,White,1
1,White,1
2,White,1
3,White,1
4,Hispanic,1


In [45]:
# For White:
count = """
    SELECT COUNT(pain_relieve_ever)
    FROM demographic_drug_use
    WHERE pain_relieve_ever = '1'
    AND race = 'White';  
    
"""
count = pd.read_sql(count, connection)
count

Unnamed: 0,count
0,2758


In [46]:
# For Hispanic
count = """
    SELECT COUNT(pain_relieve_ever)
    FROM demographic_drug_use
    WHERE pain_relieve_ever = '1'
    AND race = 'Hispanic';  
    
"""
count = pd.read_sql(count, connection)
count

Unnamed: 0,count
0,494


In [47]:
# For Black/African American:
count = """
    SELECT COUNT(pain_relieve_ever)
    FROM demographic_drug_use
    WHERE pain_relieve_ever = '1'
    AND race = 'Black/African American';  
    
"""
count = pd.read_sql(count, connection)
print(f"Black/African American:")
count

Black/African American:


Unnamed: 0,count
0,293


In [48]:
# For Native American/Alaskan Native:
count = """
    SELECT COUNT(pain_relieve_ever)
    FROM demographic_drug_use
    WHERE pain_relieve_ever = '1'
    AND race = 'Native American/Alaskan Native';  
    
"""
count = pd.read_sql(count, connection)
count

Unnamed: 0,count
0,55


In [49]:
# For Hawaiian/Pacific Islander:
count = """
    SELECT COUNT(pain_relieve_ever)
    FROM demographic_drug_use
    WHERE pain_relieve_ever = '1'
    AND race = 'Hawaiian/Pacific Islander';  
    
"""
count = pd.read_sql(count, connection)
count

Unnamed: 0,count
0,14


In [50]:
# For Mixed:
count = """
    SELECT COUNT(pain_relieve_ever)
    FROM demographic_drug_use
    WHERE pain_relieve_ever = '1'
    AND race = 'Mixed';  
    
"""
count = pd.read_sql(count, connection)
count

Unnamed: 0,count
0,193
