In [60]:
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
from collections import defaultdict, Counter
import matplotlib.pyplot as plt
from matplotlib import style
import datetime
import seaborn as sns
import numpy as np
from functools import reduce

## This Notebook combines all the county data 

In [61]:
###PA.gov
def yn_switch(opdfnew):
    opdfnew["Narcan Admin"] = opdfnew['Naloxone Administered'].map(lambda x: 1 if x == "Y" else 0)
    opdfnew["Survive"] = opdfnew['Survive'].map(lambda x: 1 if x == "Y" else 0)

    return(opdfnew)   
df = pd.read_csv('../data/PAGOV.csv')
df.head()
opiod_list = ['CARFENTANIL','FENTANYL','FENTANYL ANALOG/OTHER SYNTHETIC OPIOID','HEROIN','METHADONE','PHARMACEUTICAL OPIOID','SUBOXONE']

# Reduce incidents to ones with specific opioid
opdf = df.loc[df['Susp OD Drug Desc'].isin(opiod_list)]

# Store relevant information, duplicates
opdf = opdf[['Incident ID','Incident County Name','Incident Date','Victim ID','Gender Desc','Day','Naloxone Administered',
             'Age Range','Survive','Response Desc','Incident Time']].drop_duplicates()


opdf['Incident Date ym'] = pd.to_datetime(opdf['Incident Date']).dt.to_period('Y')

opdf['Incident Date ym'] = opdf['Incident Date ym'].astype(str)
opdf['Incident Date ym'] = opdf['Incident Date ym'].astype(int)

oppdf = opdf.groupby(['Incident Date ym'])[['Incident ID']].count().reset_index()
opdfnew = yn_switch(opdf.copy())

#limitign
opdfnew = opdfnew[['Incident ID','Incident County Name',
                   'Narcan Admin','Survive','Incident Date ym']]

#Renaming for joins later
opdfnew  = opdfnew.rename(columns={'Incident County Name': 'County','Incident Date ym':'Year'})

##Getting totals

opdfnew['Total Overdoses Per County'] = opdfnew.groupby(['County','Year'])['County'].transform('count')

opdfnew['Percent Narcan Admin Per County/Year'] = opdfnew.groupby(['County','Year'])['Narcan Admin'].transform('mean')
opdfnew['Percent Survive Overdose Per County/Year'] = opdfnew.groupby(['County','Year'])['Survive'].transform('mean')

##Limiting

opdfnew = opdfnew[['County','Year','Percent Narcan Admin Per County/Year',
                   'Percent Survive Overdose Per County/Year']].drop_duplicates()



In [62]:
opdfnew.head()

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year
2,Delaware,2018,0.65625,0.703125
4,Chester,2018,0.813397,0.751196
5,Beaver,2018,0.685185,0.814815
6,Bucks,2018,0.739895,0.804921
8,Philadelphia,2018,0.95122,0.926829


In [63]:
### Next, Takeback and treaments
takebackdf = pd.read_csv('../data/Prescription_Drug_Take-Back_Box_Locations_County_Drug_and_Alcohol_Programs.csv')
### There is some datacleanup needed as some counties are in all caps and listed as PA

def fix(df):
    for i, row in df.iterrows():
        try:
            cngcnty = row['County'].split(',')[0]
            cngcnty = str.title(cngcnty)
            df.at[i,row['County']] = cngcnty
        except:
            pass
    return(df)        
takebackdf1 = fix(takebackdf.copy())

takebackdf1['Total of Take Back Locations by County'] = takebackdf1.groupby('County')['County'].transform('count')




###Loading treatment
treatdf = pd.read_csv('../data/Drug_and_Alcohol_Treatment_Facilities_May_2018_County_Drug_and_Alcohol_Programs.csv')

treatdf['Total of Treatment Locations by County'] = treatdf.groupby('County')['County'].transform('count')

treatdf = treatdf[['County','Total of Treatment Locations by County']]

### Merging dataframes
dfcnt2 = pd.merge(takebackdf1,treatdf, how="inner",left_on = 'County',right_on = 'County')

dftakebackandtreatment = dfcnt2[['County','Total of Take Back Locations by County',
                                 'Total of Treatment Locations by County']].drop_duplicates()



In [64]:
dfmerge1 = pd.merge(opdfnew,dftakebackandtreatment, how="inner",left_on = 'County',right_on = 'County')

In [65]:
dfmerge1.head()  ##PA.gov and takebacks/treatments

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year,Total of Take Back Locations by County,Total of Treatment Locations by County
0,Delaware,2018,0.65625,0.703125,55,30
1,Delaware,2019,0.717949,0.615385,55,30
2,Delaware,2020,0.555556,0.587302,55,30
3,Delaware,2021,0.56,0.7,55,30
4,Delaware,2022,0.5,0.714286,55,30


In [66]:
###Risky Precribing dataset
df = pd.read_csv('../data/Risky_Prescribing_Measures_Quarter_3_2016_-_Current_Quarterly_County___Statewide_Health.csv')

### Removing PA as to not count twice
df = df[df['County'] != 'Pennsylvania']

##Suming per county/year

df['Total Risky Prescibing per County/Year'] = df.groupby(['County','Year'])['Rate or Count'].transform('sum')

#limiting
dfrisky = df[['County','Year','Total Risky Prescibing per County/Year']]




In [67]:
dfmerge1['Year'] = dfmerge1['Year'].astype(int)

In [68]:
dfrisky.head()

Unnamed: 0,County,Year,Total Risky Prescibing per County/Year
1,Lackawanna,2020,36468.040054
4,Mifflin,2021,3224.363865
5,Schuylkill,2021,11536.172907
6,Butler,2019,26160.09
7,Snyder,2016,4034.04


In [69]:
dfmerge1.head()

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year,Total of Take Back Locations by County,Total of Treatment Locations by County
0,Delaware,2018,0.65625,0.703125,55,30
1,Delaware,2019,0.717949,0.615385,55,30
2,Delaware,2020,0.555556,0.587302,55,30
3,Delaware,2021,0.56,0.7,55,30
4,Delaware,2022,0.5,0.714286,55,30


In [70]:
##Merging risky dataset
dfmerge2 = pd.merge(dfmerge1,dfrisky, how="inner",left_on = ['County','Year'],right_on = ['County','Year'])

In [71]:
dfmerge2.head()

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year,Total of Take Back Locations by County,Total of Treatment Locations by County,Total Risky Prescibing per County/Year
0,Delaware,2018,0.65625,0.703125,55,30,87748.56
1,Delaware,2018,0.65625,0.703125,55,30,87748.56
2,Delaware,2018,0.65625,0.703125,55,30,87748.56
3,Delaware,2018,0.65625,0.703125,55,30,87748.56
4,Delaware,2018,0.65625,0.703125,55,30,87748.56


In [72]:
### Dispensation_Data
df = pd.read_csv('../data/Dispensation_Data.csv')
###Keeping age groups, removing all ages ## will have to do Genders seperatly as 
##They don't count them together
df = df[df['Age Group'] == 'All Ages']
df = df[df['Gender'] == 'All Genders']

df  = df.rename(columns={'County Name': 'County'})
df['Total Dispensation per County/Year'] = df.groupby(['County','Year'])['Rate or Count'].transform('sum')
dispdf = df[['County','Year','Total Dispensation per County/Year']].drop_duplicates()


In [73]:
##Merging Dispensation
dfmerge3 = pd.merge(dfmerge2,dispdf, how="inner",left_on = ['County','Year'],right_on = ['County','Year'])


In [74]:
dfmerge3 = dfmerge3.drop_duplicates()

In [75]:
dfmerge3.head()

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year,Total of Take Back Locations by County,Total of Treatment Locations by County,Total Risky Prescibing per County/Year,Total Dispensation per County/Year
0,Delaware,2018,0.65625,0.703125,55,30,87748.56,653144.92
64,Delaware,2019,0.717949,0.615385,55,30,70744.46,582654.38
128,Delaware,2020,0.555556,0.587302,55,30,57569.091674,514813.12
192,Delaware,2021,0.56,0.7,55,30,38628.769756,367033.862535
240,Chester,2018,0.813397,0.751196,23,25,64356.48,480173.99


## Arrests

In [152]:
df = pd.read_csv('../data/Opioid_Seizures_and_Arrests_CY_2013_-_Current_Quarterly_County_State_Police(1).csv')


In [153]:
df = df.rename(columns={'County Name': 'County'})

In [154]:
df1Fentanyl = df.assign(Fentanyl_Incident = np.where(df['Drug']=='Fentanyl',df['Incident Count'],0))\
   .groupby(['County','Year']).agg({'Fentanyl_Incident':sum}).reset_index().drop_duplicates()

df1Heroin= df.assign(Heroin_Incident = np.where(df['Drug']=='Heroin',df['Incident Count'],0))\
   .groupby(['County','Year']).agg({'Heroin_Incident':sum}).reset_index().drop_duplicates()

df1Opium= df.assign(Opium_Incident = np.where(df['Drug']=='Opium',df['Incident Count'],0))\
   .groupby(['County','Year']).agg({'Opium_Incident':sum}).reset_index().drop_duplicates()

In [155]:
df2Fentanyl = df.assign(Fentanyl_Arrests = np.where(df['Drug']=='Fentanyl',df['Arrests'],0))\
   .groupby(['County','Year']).agg({'Fentanyl_Arrests':sum}).reset_index().drop_duplicates()

df2Heroin= df.assign(Heroin_Arrests = np.where(df['Drug']=='Heroin',df['Arrests'],0))\
   .groupby(['County','Year']).agg({'Heroin_Arrests':sum}).reset_index().drop_duplicates()

df2Opium= df.assign(Opium_Arrests = np.where(df['Drug']=='Opium',df['Arrests'],0))\
   .groupby(['County','Year']).agg({'Opium_Arrests':sum}).reset_index().drop_duplicates()

In [156]:
df3Fentanyl = df.assign(Fentanyl_Quantity = np.where(df['Drug']=='Fentanyl',df['Drug Quantity'],0))\
   .groupby(['County','Year']).agg({'Fentanyl_Quantity':sum}).reset_index().drop_duplicates()

df3Heroin= df.assign(Heroin_Quantity = np.where(df['Drug']=='Heroin',df['Drug Quantity'],0))\
   .groupby(['County','Year']).agg({'Heroin_Quantity':sum}).reset_index().drop_duplicates()

df3Opium= df.assign(Opium_Quantity = np.where(df['Drug']=='Opium',df['Drug Quantity'],0))\
   .groupby(['County','Year']).agg({'Opium_Quantity':sum}).reset_index().drop_duplicates()

In [157]:
data_frames = [df1Fentanyl,df1Heroin,df1Opium,df2Fentanyl,df2Heroin,df2Opium,df3Fentanyl,
              df3Heroin,df3Opium]  ## combining dataframes

## Merging dataframe
df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['County','Year'],
                                            how='outer'), data_frames)


##Creating computed collumns

df_merged['%Incidents Fentanyl'] = (df_merged['Fentanyl_Incident']/ 
(df_merged['Fentanyl_Incident'] +df_merged['Opium_Incident'] + df_merged['Heroin_Incident']*100))

df_merged['%Arrests Fentanyl'] = (df_merged['Fentanyl_Arrests']/ 
(df_merged['Fentanyl_Arrests'] + df_merged['Opium_Arrests'] + df_merged['Heroin_Arrests']*100))

df_merged['%Quantity Fentanyl'] = (df_merged['Fentanyl_Quantity']/ 
(df_merged['Fentanyl_Quantity'] + df_merged['Opium_Quantity'] + df_merged['Heroin_Quantity']*100))
dfarrestsfinal = df_merged

In [158]:
dfmerge4 = pd.merge(dfmerge3,dfarrestsfinal, how="inner",left_on = ['County','Year'],right_on = ['County','Year'])



In [159]:
dfmerge4 =dfmerge4.drop_duplicates()

In [160]:
dfmerge4.head()

Unnamed: 0,County,Year,Percent Narcan Admin Per County/Year,Percent Survive Overdose Per County/Year,Total of Take Back Locations by County,Total of Treatment Locations by County,Total Risky Prescibing per County/Year,Total Dispensation per County/Year,Fentanyl_Incident,Heroin_Incident,Opium_Incident,Fentanyl_Arrests,Heroin_Arrests,Opium_Arrests,Fentanyl_Quantity,Heroin_Quantity,Opium_Quantity,%Incidents Fentanyl,%Arrests Fentanyl,%Quantity Fentanyl
0,Delaware,2018,0.65625,0.703125,55,30,87748.56,653144.92,5,144,0,3,164,0,0.02077,0.538028,0.0,0.000347,0.000183,0.000386
1,Delaware,2019,0.717949,0.615385,55,30,70744.46,582654.38,15,173,0,19,198,0,5.27221,5.50115,0.0,0.000866,0.000959,0.009493
2,Delaware,2020,0.555556,0.587302,55,30,57569.091674,514813.12,23,159,0,27,202,0,0.16478,1.270373,0.0,0.001444,0.001335,0.001295
3,Delaware,2021,0.56,0.7,55,30,38628.769756,367033.862535,27,122,2,39,150,2,17.05824,4.397683,1.2e-05,0.002208,0.002593,0.037341
4,Chester,2018,0.813397,0.751196,23,25,64356.48,480173.99,5,76,2,6,64,2,1.09655,0.106369,0.00025,0.000657,0.000936,0.093453


In [161]:
dfmerge4.to_csv('Mastercounty.csv')