In [1]:
import pandas as pd

In [2]:
codes = ["T40.1", "T40.2", "T40.3", "T40.4", "T40.5", "T40.6", "T40.7"]
names = ["heroin", "nat_opioid", "methadone",  "syn_opioid", "cocaine", "unspecified", "cannabis"]

names_to_code = {names[i]:codes[i] for i in range(len(codes))}
code_to_names = {codes[i]:names[i] for i in range(len(codes))}
code_to_names

{'T40.1': 'heroin',
 'T40.2': 'nat_opioid',
 'T40.3': 'methadone',
 'T40.4': 'syn_opioid',
 'T40.5': 'cocaine',
 'T40.6': 'unspecified',
 'T40.7': 'cannabis'}

In [3]:
def extract_age_range(age_group):
    if age_group == '< 1 year':
        return "1"
    else:
        return age_group.replace("-", "_").split()[0]

In [4]:
df = pd.read_csv("./data/cdc_wonders_age.csv")
df["drug_name"] = [code_to_names[i] for i in df["Multiple Cause of death Code"]]

df.head()

Unnamed: 0,Year,Year Code,Month,Month Code,Multiple Cause of death,Multiple Cause of death Code,Five-Year Age Groups,Five-Year Age Groups Code,Deaths,drug_name
0,2018,2018,"Jan., 2018",2018/01,Heroin,T40.1,15-19 years,15-19,12,heroin
1,2018,2018,"Jan., 2018",2018/01,Heroin,T40.1,20-24 years,20-24,82,heroin
2,2018,2018,"Jan., 2018",2018/01,Heroin,T40.1,25-29 years,25-29,194,heroin
3,2018,2018,"Jan., 2018",2018/01,Heroin,T40.1,30-34 years,30-34,190,heroin
4,2018,2018,"Jan., 2018",2018/01,Heroin,T40.1,35-39 years,35-39,144,heroin


In [5]:
df = df[["Month Code", "drug_name","Five-Year Age Groups", "Deaths"]]
df["age_range"] = df['Five-Year Age Groups'].apply(extract_age_range)
df["drug_name_age_range"] = df["drug_name"] + "_" + df["age_range"]
df

Unnamed: 0,Month Code,drug_name,Five-Year Age Groups,Deaths,age_range,drug_name_age_range
0,2018/01,heroin,15-19 years,12,15_19,heroin_15_19
1,2018/01,heroin,20-24 years,82,20_24,heroin_20_24
2,2018/01,heroin,25-29 years,194,25_29,heroin_25_29
3,2018/01,heroin,30-34 years,190,30_34,heroin_30_34
4,2018/01,heroin,35-39 years,144,35_39,heroin_35_39
...,...,...,...,...,...,...
5398,2024/12,cocaine,55-59 years,127,55_59,cocaine_55_59
5399,2024/12,cocaine,60-64 years,124,60_64,cocaine_60_64
5400,2024/12,cocaine,65-69 years,87,65_69,cocaine_65_69
5401,2024/12,cocaine,70-74 years,20,70_74,cocaine_70_74


In [6]:
df["drug_name_age_range"].unique()

array(['heroin_15_19', 'heroin_20_24', 'heroin_25_29', 'heroin_30_34',
       'heroin_35_39', 'heroin_40_44', 'heroin_45_49', 'heroin_50_54',
       'heroin_55_59', 'heroin_60_64', 'heroin_65_69', 'nat_opioid_15_19',
       'nat_opioid_20_24', 'nat_opioid_25_29', 'nat_opioid_30_34',
       'nat_opioid_35_39', 'nat_opioid_40_44', 'nat_opioid_45_49',
       'nat_opioid_50_54', 'nat_opioid_55_59', 'nat_opioid_60_64',
       'nat_opioid_65_69', 'nat_opioid_70_74', 'nat_opioid_75_79',
       'methadone_25_29', 'methadone_30_34', 'methadone_35_39',
       'methadone_40_44', 'methadone_45_49', 'methadone_50_54',
       'methadone_55_59', 'methadone_60_64', 'syn_opioid_15_19',
       'syn_opioid_20_24', 'syn_opioid_25_29', 'syn_opioid_30_34',
       'syn_opioid_35_39', 'syn_opioid_40_44', 'syn_opioid_45_49',
       'syn_opioid_50_54', 'syn_opioid_55_59', 'syn_opioid_60_64',
       'syn_opioid_65_69', 'syn_opioid_70_74', 'cocaine_15_19',
       'cocaine_20_24', 'cocaine_25_29', 'cocaine_30_34',

In [7]:
death_df = df.pivot(index="Month Code", 
                      columns="drug_name_age_range", 
                      values="Deaths")
death_df = death_df.reset_index()
death_df = death_df.dropna(axis=1)
death_df

drug_name_age_range,Month Code,cocaine_20_24,cocaine_25_29,cocaine_30_34,cocaine_35_39,cocaine_40_44,cocaine_45_49,cocaine_50_54,cocaine_55_59,cocaine_60_64,...,syn_opioid_20_24,syn_opioid_25_29,syn_opioid_30_34,syn_opioid_35_39,syn_opioid_40_44,syn_opioid_45_49,syn_opioid_50_54,syn_opioid_55_59,syn_opioid_60_64,syn_opioid_65_69
0,2018/01,59.0,141.0,147.0,177.0,118.0,137.0,137.0,134.0,91.0,...,188.0,379.0,400.0,360.0,255.0,260.0,250.0,214.0,127.0,38.0
1,2018/02,54.0,150.0,149.0,134.0,136.0,123.0,130.0,123.0,71.0,...,166.0,364.0,391.0,337.0,273.0,227.0,222.0,177.0,112.0,46.0
2,2018/03,73.0,143.0,195.0,190.0,133.0,124.0,163.0,152.0,80.0,...,212.0,370.0,446.0,392.0,305.0,281.0,263.0,185.0,102.0,57.0
3,2018/04,57.0,165.0,161.0,171.0,133.0,156.0,142.0,132.0,77.0,...,198.0,405.0,394.0,381.0,276.0,260.0,221.0,198.0,115.0,58.0
4,2018/05,63.0,138.0,175.0,188.0,141.0,151.0,186.0,156.0,77.0,...,174.0,402.0,437.0,400.0,263.0,270.0,275.0,221.0,112.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,2024/08,38.0,104.0,158.0,194.0,244.0,175.0,172.0,212.0,207.0,...,149.0,289.0,417.0,496.0,536.0,367.0,303.0,342.0,293.0,160.0
80,2024/09,31.0,87.0,131.0,208.0,188.0,155.0,180.0,200.0,180.0,...,128.0,265.0,362.0,473.0,400.0,326.0,302.0,281.0,236.0,118.0
81,2024/10,35.0,79.0,147.0,183.0,194.0,159.0,171.0,179.0,169.0,...,109.0,269.0,417.0,479.0,443.0,331.0,296.0,264.0,220.0,142.0
82,2024/11,39.0,65.0,132.0,172.0,177.0,166.0,142.0,183.0,173.0,...,130.0,238.0,415.0,445.0,434.0,324.0,297.0,277.0,217.0,143.0


In [8]:
census = pd.read_csv("./data/census_data.csv")
census["month_num_str"] = census["month_num"].astype(str).str.zfill(2)
census["Month Code"] = census["year"].astype(str) + "/" + census["month_num_str"]
census

Unnamed: 0,year,month,month_num,resident_population,month_num_str,Month Code
0,2021,January,1,331845010,1,2021/01
1,2021,February,2,331781640,2,2021/02
2,2021,March,3,331797645,3,2021/03
3,2021,April,4,331860794,4,2021/04
4,2021,May,5,331927373,5,2021/05
5,2021,June,6,332001243,6,2021/06
6,2021,July,7,332099760,7,2021/07
7,2021,August,8,332309095,8,2021/08
8,2021,September,9,332476987,9,2021/09
9,2021,October,10,332632825,10,2021/10


In [9]:
death_census_merge = pd.merge(death_df, census, on='Month Code', how='inner')
death_census_merge

Unnamed: 0,Month Code,cocaine_20_24,cocaine_25_29,cocaine_30_34,cocaine_35_39,cocaine_40_44,cocaine_45_49,cocaine_50_54,cocaine_55_59,cocaine_60_64,...,syn_opioid_45_49,syn_opioid_50_54,syn_opioid_55_59,syn_opioid_60_64,syn_opioid_65_69,year,month,month_num,resident_population,month_num_str
0,2021/01,84.0,150.0,209.0,235.0,197.0,198.0,227.0,250.0,168.0,...,514.0,514.0,478.0,372.0,127.0,2021,January,1,331845010,1
1,2021/02,73.0,145.0,191.0,193.0,192.0,182.0,204.0,193.0,150.0,...,446.0,461.0,398.0,290.0,124.0,2021,February,2,331781640,2
2,2021/03,108.0,167.0,237.0,282.0,257.0,237.0,240.0,268.0,185.0,...,611.0,580.0,523.0,384.0,156.0,2021,March,3,331797645,3
3,2021/04,86.0,186.0,239.0,266.0,280.0,248.0,267.0,304.0,189.0,...,601.0,621.0,609.0,389.0,159.0,2021,April,4,331860794,4
4,2021/05,103.0,161.0,238.0,248.0,280.0,241.0,257.0,277.0,165.0,...,579.0,609.0,560.0,354.0,167.0,2021,May,5,331927373,5
5,2021/06,80.0,161.0,210.0,252.0,253.0,237.0,260.0,278.0,164.0,...,586.0,546.0,590.0,355.0,163.0,2021,June,6,332001243,6
6,2021/07,79.0,184.0,261.0,262.0,269.0,225.0,278.0,268.0,183.0,...,599.0,578.0,513.0,383.0,174.0,2021,July,7,332099760,7
7,2021/08,87.0,176.0,241.0,272.0,267.0,235.0,293.0,282.0,187.0,...,598.0,607.0,511.0,391.0,155.0,2021,August,8,332309095,8
8,2021/09,74.0,189.0,232.0,250.0,239.0,225.0,282.0,274.0,192.0,...,584.0,543.0,603.0,376.0,150.0,2021,September,9,332476987,9
9,2021/10,76.0,172.0,247.0,266.0,246.0,193.0,278.0,306.0,198.0,...,527.0,588.0,541.0,373.0,167.0,2021,October,10,332632825,10


In [10]:
cols = death_census_merge.loc[:, 'cocaine_20_24':'syn_opioid_65_69'].columns.tolist()
print(cols)

['cocaine_20_24', 'cocaine_25_29', 'cocaine_30_34', 'cocaine_35_39', 'cocaine_40_44', 'cocaine_45_49', 'cocaine_50_54', 'cocaine_55_59', 'cocaine_60_64', 'cocaine_65_69', 'heroin_30_34', 'heroin_35_39', 'heroin_40_44', 'heroin_45_49', 'heroin_50_54', 'heroin_55_59', 'heroin_60_64', 'methadone_30_34', 'methadone_35_39', 'methadone_40_44', 'methadone_45_49', 'methadone_50_54', 'methadone_55_59', 'methadone_60_64', 'nat_opioid_25_29', 'nat_opioid_30_34', 'nat_opioid_35_39', 'nat_opioid_40_44', 'nat_opioid_45_49', 'nat_opioid_50_54', 'nat_opioid_55_59', 'nat_opioid_60_64', 'nat_opioid_65_69', 'nat_opioid_70_74', 'syn_opioid_15_19', 'syn_opioid_20_24', 'syn_opioid_25_29', 'syn_opioid_30_34', 'syn_opioid_35_39', 'syn_opioid_40_44', 'syn_opioid_45_49', 'syn_opioid_50_54', 'syn_opioid_55_59', 'syn_opioid_60_64', 'syn_opioid_65_69']


In [11]:
for col in cols:
    death_census_merge[f"{col}_norm"] = death_census_merge[col] / death_census_merge["resident_population"] * 100000

death_census_merge

Unnamed: 0,Month Code,cocaine_20_24,cocaine_25_29,cocaine_30_34,cocaine_35_39,cocaine_40_44,cocaine_45_49,cocaine_50_54,cocaine_55_59,cocaine_60_64,...,syn_opioid_20_24_norm,syn_opioid_25_29_norm,syn_opioid_30_34_norm,syn_opioid_35_39_norm,syn_opioid_40_44_norm,syn_opioid_45_49_norm,syn_opioid_50_54_norm,syn_opioid_55_59_norm,syn_opioid_60_64_norm,syn_opioid_65_69_norm
0,2021/01,84.0,150.0,209.0,235.0,197.0,198.0,227.0,250.0,168.0,...,0.12325,0.206422,0.262773,0.243186,0.179602,0.154892,0.154892,0.144043,0.112101,0.038271
1,2021/02,73.0,145.0,191.0,193.0,192.0,182.0,204.0,193.0,150.0,...,0.106998,0.188377,0.22123,0.222435,0.176019,0.134426,0.138947,0.119958,0.087407,0.037374
2,2021/03,108.0,167.0,237.0,282.0,257.0,237.0,240.0,268.0,185.0,...,0.137433,0.229357,0.273661,0.265222,0.231768,0.184148,0.174805,0.157626,0.115733,0.047017
3,2021/04,86.0,186.0,239.0,266.0,280.0,248.0,267.0,304.0,189.0,...,0.13138,0.213644,0.270294,0.262761,0.234436,0.1811,0.187127,0.183511,0.117218,0.047912
4,2021/05,103.0,161.0,238.0,248.0,280.0,241.0,257.0,277.0,165.0,...,0.124425,0.206069,0.280483,0.266323,0.228664,0.174436,0.183474,0.168712,0.10665,0.050312
5,2021/06,80.0,161.0,210.0,252.0,253.0,237.0,260.0,278.0,164.0,...,0.119578,0.203312,0.258433,0.253011,0.220782,0.176505,0.164457,0.17771,0.106927,0.049096
6,2021/07,79.0,184.0,261.0,262.0,269.0,225.0,278.0,268.0,183.0,...,0.115327,0.206564,0.273713,0.263776,0.235471,0.180367,0.174044,0.154472,0.115327,0.052394
7,2021/08,87.0,176.0,241.0,272.0,267.0,235.0,293.0,282.0,187.0,...,0.119768,0.217569,0.26692,0.262707,0.236527,0.179953,0.182661,0.153772,0.117662,0.046643
8,2021/09,74.0,189.0,232.0,250.0,239.0,225.0,282.0,274.0,192.0,...,0.103466,0.201818,0.264981,0.263778,0.224677,0.175651,0.16332,0.181366,0.113091,0.045116
9,2021/10,76.0,172.0,247.0,266.0,246.0,193.0,278.0,306.0,198.0,...,0.104319,0.187895,0.271471,0.268164,0.230885,0.158433,0.176771,0.162642,0.112136,0.050206


In [12]:
death_census_merge2 = death_census_merge.drop(cols, axis=1)
death_census_merge2

Unnamed: 0,Month Code,year,month,month_num,resident_population,month_num_str,cocaine_20_24_norm,cocaine_25_29_norm,cocaine_30_34_norm,cocaine_35_39_norm,...,syn_opioid_20_24_norm,syn_opioid_25_29_norm,syn_opioid_30_34_norm,syn_opioid_35_39_norm,syn_opioid_40_44_norm,syn_opioid_45_49_norm,syn_opioid_50_54_norm,syn_opioid_55_59_norm,syn_opioid_60_64_norm,syn_opioid_65_69_norm
0,2021/01,2021,January,1,331845010,1,0.025313,0.045202,0.062981,0.070816,...,0.12325,0.206422,0.262773,0.243186,0.179602,0.154892,0.154892,0.144043,0.112101,0.038271
1,2021/02,2021,February,2,331781640,2,0.022002,0.043703,0.057568,0.058171,...,0.106998,0.188377,0.22123,0.222435,0.176019,0.134426,0.138947,0.119958,0.087407,0.037374
2,2021/03,2021,March,3,331797645,3,0.03255,0.050332,0.071429,0.084992,...,0.137433,0.229357,0.273661,0.265222,0.231768,0.184148,0.174805,0.157626,0.115733,0.047017
3,2021/04,2021,April,4,331860794,4,0.025914,0.056048,0.072018,0.080154,...,0.13138,0.213644,0.270294,0.262761,0.234436,0.1811,0.187127,0.183511,0.117218,0.047912
4,2021/05,2021,May,5,331927373,5,0.031031,0.048505,0.071702,0.074715,...,0.124425,0.206069,0.280483,0.266323,0.228664,0.174436,0.183474,0.168712,0.10665,0.050312
5,2021/06,2021,June,6,332001243,6,0.024096,0.048494,0.063253,0.075903,...,0.119578,0.203312,0.258433,0.253011,0.220782,0.176505,0.164457,0.17771,0.106927,0.049096
6,2021/07,2021,July,7,332099760,7,0.023788,0.055405,0.078591,0.078892,...,0.115327,0.206564,0.273713,0.263776,0.235471,0.180367,0.174044,0.154472,0.115327,0.052394
7,2021/08,2021,August,8,332309095,8,0.02618,0.052963,0.072523,0.081852,...,0.119768,0.217569,0.26692,0.262707,0.236527,0.179953,0.182661,0.153772,0.117662,0.046643
8,2021/09,2021,September,9,332476987,9,0.022257,0.056846,0.069779,0.075193,...,0.103466,0.201818,0.264981,0.263778,0.224677,0.175651,0.16332,0.181366,0.113091,0.045116
9,2021/10,2021,October,10,332632825,10,0.022848,0.051709,0.074256,0.079968,...,0.104319,0.187895,0.271471,0.268164,0.230885,0.158433,0.176771,0.162642,0.112136,0.050206


In [13]:
death_census_merge2.to_csv("./data/wonders_death_census_age_range.csv", index=False)