# Setup

In [1]:
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
from collections import defaultdict, Counter
import matplotlib.pyplot as plt
from matplotlib import style
import datetime
import seaborn as sns

In [2]:
# There is some datacleanup needed as some counties are in all caps and listed as PA
def fix(df, inplace = True):
    if inplace:
        new_df = df
    else:
        new_df = df.copy()
        
    new_df["County"] = new_df["County"].map(lambda x: str.title(x.split(',')[0]))
    
    if not inplace:
        return new_df

## This Notebook combines all the county data 

# Aggregations

In [3]:
###PA.gov
def yn_switch(op_df, inplace = True):
    
    if inplace:
        opdfnew = op_df
    else:
        opdfnew = op_df.copy()
        
    opdfnew["Narcan Admin"] = opdfnew['Naloxone Administered'].map(lambda x: 1 if x == "Y" else 0)
    opdfnew["Survive"] = opdfnew['Survive'].map(lambda x: 1 if x == "Y" else 0)
    
    if not inplace:
        return opdfnew

df = pd.read_csv('../data/PAGOV.csv')
df.head()
opiod_list = ['CARFENTANIL','FENTANYL','FENTANYL ANALOG/OTHER SYNTHETIC OPIOID','HEROIN','METHADONE','PHARMACEUTICAL OPIOID','SUBOXONE']

# Reduce incidents to ones with specific opioid
opdf = df.loc[df['Susp OD Drug Desc'].isin(opiod_list)]

# Store relevant information
opdf = opdf[['Incident ID','Incident County Name','Incident Date','Victim ID','Gender Desc','Day','Naloxone Administered','Age Range','Survive','Response Desc','Incident Time']]

# Remove duplicates
opdf = opdf.drop_duplicates()
opdf['Incident Date ym'] = pd.to_datetime(opdf['Incident Date']).dt.to_period('Y')

opdf['Incident Date ym'] = opdf['Incident Date ym'].astype(str)
opdf['Incident Date ym'] = opdf['Incident Date ym'].astype(int)

oppdf = opdf.groupby(['Incident Date ym'])[['Incident ID']].count().reset_index()
opdfnew = yn_switch(opdf, inplace = False)

# limiting
opdfnew = opdfnew[['Incident ID','Incident County Name',
                   'Narcan Admin','Survive','Incident Date ym']]

# Renaming for joins later
opdfnew  = opdfnew.rename(columns={'Incident County Name': 'County','Incident Date ym':'Year'})

# Getting totals
opdfnew['Total Overdoses Per County'] = opdfnew.groupby(['County','Year'])['County'].transform('count')
opdfnew['Percent Narcan Admin Per County/Year'] = opdfnew.groupby(['County','Year'])['Narcan Admin'].transform('mean')
opdfnew['Percent Survive Overdose Per County/Year'] = opdfnew.groupby(['County','Year'])['Survive'].transform('mean')

# Limiting
opdfnew = opdfnew[['County','Year','Percent Narcan Admin Per County/Year','Percent Survive Overdose Per County/Year']]
opdfnew = opdfnew.drop_duplicates()


opdfnew.head()

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year
2,Delaware,2018,0.65625,0.703125
4,Chester,2018,0.813397,0.751196
5,Beaver,2018,0.685185,0.814815
6,Bucks,2018,0.739895,0.804921
8,Philadelphia,2018,0.95122,0.926829


In [4]:
# Next, Takeback and treaments
# Loading Takeback dataset
takebackdf = pd.read_csv('../data/Prescription_Drug_Take-Back_Box_Locations_County_Drug_and_Alcohol_Programs.csv')

# Fixing County name
takebackdf1 = fix(takebackdf, False)

# Count takeback locations for each county
takebackdf1['Total of Take Back Locations by County'] = takebackdf1.groupby('County')['County'].transform('count')

# Loading treatment
treatdf = pd.read_csv('../data/Drug_and_Alcohol_Treatment_Facilities_May_2018_County_Drug_and_Alcohol_Programs.csv')

# Count Treatment locations by County
treatdf['Total of Treatment Locations by County'] = treatdf.groupby('County')['County'].transform('count')

# Extract relevant columns
treatdf = treatdf[['County','Total of Treatment Locations by County']]

# Merging dataframes
dfcnt2 = pd.merge(takebackdf1,treatdf, how="inner",left_on = 'County',right_on = 'County')

dftakebackandtreatment = dfcnt2[['County','Total of Take Back Locations by County','Total of Treatment Locations by County']]
dftakebackandtreatment = dftakebackandtreatment.drop_duplicates()

# Merge Takeback dataframe with PA.GOV
dfmerge1 = pd.merge(opdfnew,dftakebackandtreatment, how="inner",left_on = 'County',right_on = 'County')
dfmerge1['Year'] = dfmerge1['Year'].astype(int)

# Sanity Check
dfmerge1.head()

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year,Total of Take Back Locations by County,Total of Treatment Locations by County
0,Delaware,2018,0.65625,0.703125,55,30
1,Delaware,2019,0.717949,0.615385,55,30
2,Delaware,2020,0.555556,0.587302,55,30
3,Delaware,2021,0.56,0.7,55,30
4,Delaware,2022,0.5,0.714286,55,30


In [5]:
# Risky Precribing dataset
df = pd.read_csv('../data/Risky_Prescribing_Measures_Quarter_3_2016_-_Current_Quarterly_County___Statewide_Health.csv')

# Removing PA as to not count twice
df = df[df['County'] != 'Pennsylvania']

# Suming per county/year
df['Total Risky Prescibing per County/Year'] = df.groupby(['County','Year'])['Rate or Count'].transform('sum')

# limiting
dfrisky = df[['County','Year','Total Risky Prescibing per County/Year']].drop_duplicates()

dfrisky.head()

Unnamed: 0,County,Year,Total Risky Prescibing per County/Year
1,Lackawanna,2020,36468.040054
4,Mifflin,2021,3224.363865
5,Schuylkill,2021,11536.172907
6,Butler,2019,26160.09
7,Snyder,2016,4034.04


In [6]:
# Merging risky dataset
dfmerge2 = pd.merge(dfmerge1,dfrisky, how="inner",left_on = ['County','Year'],right_on = ['County','Year'])

dfmerge2.head()

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year,Total of Take Back Locations by County,Total of Treatment Locations by County,Total Risky Prescibing per County/Year
0,Delaware,2018,0.65625,0.703125,55,30,87748.56
1,Delaware,2019,0.717949,0.615385,55,30,70744.46
2,Delaware,2020,0.555556,0.587302,55,30,57569.091674
3,Delaware,2021,0.56,0.7,55,30,38628.769756
4,Chester,2018,0.813397,0.751196,26,25,64356.48


In [7]:
### Then we can add in rest for main dashboard