In [1]:
%reload_ext lab_black
import pandas as pd
import numpy as np
from plotnine import *

In [3]:
# read data
od = pd.read_csv(
    "https://raw.githubusercontent.com/MIDS-at-Duke/estimating-impact-of-opioids-2020-purlple-team/master/10_code/od_deaths_state.csv?token=ARFW6V7BEO4J4IQLTQD3BPK7UL5CA"
)

population = pd.read_csv(
    "https://raw.githubusercontent.com/MIDS-at-Duke/estimating-impact-of-opioids-2020-purlple-team/ssloate/20_intermediate_files/countypopulations_clean.csv?token=ARFW6V4TNKDVXK6K4IKWWR27UL5FI"
)

In [4]:
# rename od columns to match
od = od.rename(columns={"County Code": "FIPS"})

# drop _merge column in popualtion dataset
population = population.drop(columns="_merge")

In [5]:
# remove Alaska from od Data
od = od[od["State"] != "AK"]

In [6]:
# merge dataset. Use right merge, because it's okay if some years/counties don't have data in the od dataset. That just means there were less than 10 deaths
od_pop = pd.merge(population, od, on=["FIPS", "Year"], how="right", indicator=True)

assert od_pop[od_pop["_merge"] == "both"].all

In [7]:
# convert years to integers
od_pop["Year"] = od_pop["Year"].astype("int")
od_pop.sample(25)

Unnamed: 0.1,Unnamed: 0,State Name,County_x,Year,Population,FIPS,State Abbr,County_y,State,Deaths,_merge
1269,44647.0,Pennsylvania,Chester County,2007,486156.0,42029,PA,Chester County,PA,40.0,both
4442,59815.0,West Virginia,McDowell County,2015,19762.0,54047,WV,McDowell County,WV,24.0,both
7585,55585.0,Utah,Weber County,2005,210150.0,49057,UT,Weber County,UT,27.0,both
2238,6185.0,Florida,Escambia County,2005,298339.0,12033,FL,Escambia County,FL,32.0,both
3908,20587.0,Kentucky,Laurel County,2007,57938.0,21125,KY,Laurel County,KY,12.0,both
7774,61851.0,Wisconsin,Winnebago County,2011,167493.0,55139,WI,Winnebago County,WI,21.0,both
3106,27986.0,Mississippi,Hinds County,2006,248989.0,28049,MS,Hinds County,MS,10.0,both
7271,4252.0,California,Ventura County,2012,833516.0,6111,CA,Ventura County,CA,111.0,both
7060,22835.0,Louisiana,Terrebonne Parish,2015,113817.0,22109,LA,Terrebonne Parish,LA,35.0,both
5830,61473.0,Wisconsin,Racine County,2013,194680.0,55101,WI,Racine County,WI,30.0,both


In [8]:
# drop unneeded columns

od_pop = od_pop.drop(columns=["County_y", "State", "_merge"])

In [9]:
# rename columns for easier understanding
od_pop = od_pop.rename(
    columns={"State Name": "State", "County_x": "County"},
    errors="raise",
)

# reorder columns
od_pop = od_pop[
    ["FIPS", "State Abbr", "State", "County", "Year", "Deaths", "Population"]
]

# sort by state, county
od_pop = od_pop.sort_values(["State", "County", "Year"])
od_pop.sample(25)

Unnamed: 0,FIPS,State Abbr,State,County,Year,Deaths,Population
4938,40101,OK,Oklahoma,Muskogee County,2004,14.0,69592.0
1445,42033,PA,Pennsylvania,Clearfield County,2015,12.0,80718.0
286,24510,MD,Maryland,Baltimore City,2009,204.0,620509.0
4218,6039,CA,California,Madera County,2005,11.0,140313.0
2839,28045,MS,Mississippi,Hancock County,2003,11.0,45772.0
7045,51185,VA,Virginia,Tazewell County,2012,13.0,44264.0
5503,42101,PA,Pennsylvania,Philadelphia County,2015,580.0,1571065.0
6271,6081,CA,California,San Mateo County,2008,47.0,703830.0
7238,26159,MI,Michigan,Van Buren County,2013,12.0,75318.0
7546,26163,MI,Michigan,Wayne County,2010,407.0,1815081.0


In [10]:
# check for null values
od_pop[od_pop["State"].isnull()]

Unnamed: 0,FIPS,State Abbr,State,County,Year,Deaths,Population
373,51515,,,,2015,0.0,
1484,51560,,,,2015,0.0,


In [125]:
# FIPS 51515 and 51560 represent two counties in Virginia, Bedford City and Clifton Forge Valley, that were incorporated in to other counties. It is safe to drop them

od_pop = od_pop[(od_pop["FIPS"] != 51515) & (od_pop["FIPS"] != 51560)]
od_pop[od_pop["State"].isnull()]

Unnamed: 0,FIPS,State Abbr,State,County,Year,Deaths,Population


In [126]:
# Create per capita deaths column
od_pop["Deaths Per 100,000 People"] = (od_pop["Deaths"] / od_pop["Population"]) * 100000

In [128]:
# save to excel

od_pop.to_csv(
    "/Users/samsloate/Desktop/Data_Science/Opioids_Project/estimating-impact-of-opioids-2020-purlple-team/20_intermediate_files/death_data_with_pop.csv"
)