In [1]:
# Dependencies
import pandas as pd
from sqlalchemy import create_engine
import datetime
import numpy as np
import requests

from census import Census

# Import password and api key
from config2 import api_key, password

# EXTRACT

### Perform API request for census data from 2012 to 2019

In [2]:
# Create a list for target years
years = [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]
# Create a blank dataframe to store census data from different years
census_test_df = pd.DataFrame()
# Make API calls by looping through each years
for year in years:
    c = Census(api_key, year=year)
    census_data = c.acs5.get(("NAME", "B19013_001E", "B02001_002E","B02001_003E","B02001_004E","B02001_005E","B02001_006E","B02001_008E",
                        "B03001_003E","B01003_001E","B01002_001E","B19301_001E","B17001_002E","B15003_002E","B23025_005E"), {'for':'county:*','in':'state:09'})
    # Convert to DataFrame
    census_pd = pd.DataFrame(census_data)
    census_pd['Year'] = year 
    census_test_df = census_test_df.append(census_pd, ignore_index=True, sort=False)

# Read CSV file for drug overdose accident in Connecticus from 2012 to 2020

In [3]:
drug_df = pd.read_csv("Resources/Accidental_Drug_Related_Deaths_2012-2020.csv")
drug_df.head()

Unnamed: 0,ID,Date,Date Type,Age,Sex,Race,Residence City,Residence County,Residence State,Death City,...,Morphine (Not Heroin),Hydromorphone,Xylazine,Other,Opiate NOS,Any Opioid,Manner of Death,DeathCityGeo,ResidenceCityGeo,InjuryCityGeo
0,12-0187,07/17/2012,DateofDeath,34.0,Female,White,MAHOPAC,PUTNAM,,DANBURY,...,,,,Duster,,,Accident,"DANBURY, CT\n(41.393666, -73.451539)",,"CT\n(41.575155, -72.738288)"
1,12-0258,10/01/2012,DateofDeath,51.0,Male,White,PORTLAND,MIDDLESEX,,PORTLAND,...,,,,,,,Accident,"PORTLAND, CT\n(41.581345, -72.634112)","PORTLAND, CT\n(41.581345, -72.634112)","CT\n(41.575155, -72.738288)"
2,13-0146,04/28/2013,DateofDeath,28.0,Male,White,,,,HARTFORD,...,,,,,,,Accident,"HARTFORD, CT\n(41.765775, -72.673356)","CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)"
3,14-0150,04/06/2014,DateofDeath,46.0,Male,White,WATERBURY,,,TORRINGTON,...,,,,,,,Accident,"TORRINGTON, CT\n(41.812186, -73.101552)","WATERBURY, CT\n(41.554261, -73.043069)","CT\n(41.575155, -72.738288)"
4,14-0183,04/27/2014,DateofDeath,52.0,Male,White,NEW LONDON,,,NEW LONDON,...,,,,,,,Accident,"NEW LONDON, CT\n(41.355167, -72.099561)","NEW LONDON, CT\n(41.355167, -72.099561)","CT\n(41.575155, -72.738288)"


# TRANSFORM

## Census data cleaning

In [4]:
census_df=census_test_df.copy()
census_df.head(2)

Unnamed: 0,NAME,B19013_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E,B03001_003E,B01003_001E,B01002_001E,B19301_001E,B17001_002E,B15003_002E,B23025_005E,state,county,Year
0,"Windham County, Connecticut",58489.0,108037.0,2233.0,337.0,1420.0,44.0,3111.0,11332.0,118046.0,39.4,27456.0,13303.0,865.0,6870.0,9,15,2012
1,"Middlesex County, Connecticut",76659.0,147872.0,7618.0,126.0,4430.0,82.0,3417.0,7968.0,165639.0,43.0,39776.0,9517.0,703.0,6339.0,9,7,2012


In [5]:
# Rename columns and add a year column
census_df = census_df.rename(columns={"B01003_001E": "Population", "B02001_002E": "White","B02001_003E": "Black","B02001_004E": "American Indian",
                                        "B02001_005E": "Asian","B02001_006E": "Native Hawaiian","B02001_008E": "Two or more races",
                                        "B03001_003E": "Hispanic","B01002_001E": "Median Age", "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income","B17001_002E": "Poverty Count","B15003_002E":"Uneducated","B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "state": "State"})  

In [6]:
#Split NAME column to separate county and state
census = census_df["Name"].astype(str).str.split(pat=",").tolist()
county_state = ["County name", "State",]
census = pd.DataFrame(census, columns = county_state)
census_df['County'] = census['County name']
# Add in Poverty Rate (Poverty Count / Population)
census_df["Poverty Rate"] = 100 * \
    census_df["Poverty Count"].astype(
        int) / census_df["Population"].astype(int)
# Add in Uneducated Rate (Uneducated / Population)
census_df["Uneducated Rate"] = 100 * \
    census_df["Uneducated"].astype(
        int) / census_df["Population"].astype(int)

In [7]:
# Split and extract only county
county_lst = census_df.County.tolist()
county_lst = [x.strip("County") for x in county_lst]
census_df.County = county_lst
# Change county names to uppercase
census_df["County"] = census_df["County"].str.upper()
census_df.head(2)

Unnamed: 0,Name,Household Income,White,Black,American Indian,Asian,Native Hawaiian,Two or more races,Hispanic,Population,...,Per Capita Income,Poverty Count,Uneducated,Unemployment Count,State,county,Year,County,Poverty Rate,Uneducated Rate
0,"Windham County, Connecticut",58489.0,108037.0,2233.0,337.0,1420.0,44.0,3111.0,11332.0,118046.0,...,27456.0,13303.0,865.0,6870.0,9,15,2012,WINDHAM,11.269336,0.732765
1,"Middlesex County, Connecticut",76659.0,147872.0,7618.0,126.0,4430.0,82.0,3417.0,7968.0,165639.0,...,39776.0,9517.0,703.0,6339.0,9,7,2012,MIDDLESEX,5.745628,0.424417


In [8]:
# Filter out unrelated columns
census_df = census_df[[ 'Year',  'County', 'Population','Median Age',
                            'Household Income','Per Capita Income','Poverty Rate','Uneducated Rate',
                             'White', 'Black', 'American Indian','Asian', 'Native Hawaiian', 'Hispanic']]


In [9]:
# lowercase column names
census_df.columns = map(str.lower, census_df.columns)

In [10]:
census_df['year']

0     2012
1     2012
2     2012
3     2012
4     2012
      ... 
59    2019
60    2019
61    2019
62    2019
63    2019
Name: year, Length: 64, dtype: int64

In [11]:
census_df.columns

Index(['year', 'county', 'population', 'median age', 'household income',
       'per capita income', 'poverty rate', 'uneducated rate', 'white',
       'black', 'american indian', 'asian', 'native hawaiian', 'hispanic'],
      dtype='object')

In [12]:
# Rename column names again to match database schema
census_df = census_df.rename(columns = {'household income':'household_income',
                                        'white':'white_population_rate',
                                        'black':'black_population_rate',
                                        'american indian':'american_indian_population_rate',
                                        'asian':'asian_population_rate',
                                        'native hawaiian':'native_hawaiian_population_rate',
                                        'hispanic':'hispanic_population_rate', 
                                        'median age':'median_age',
                                        'per capita income':'per_capita_income',
                                        'poverty rate':'poverty_rate',
                                        'uneducated rate':'uneducated_rate'})
census_df.head()

Unnamed: 0,year,county,population,median_age,household_income,per_capita_income,poverty_rate,uneducated_rate,white_population_rate,black_population_rate,american_indian_population_rate,asian_population_rate,native_hawaiian_population_rate,hispanic_population_rate
0,2012,WINDHAM,118046.0,39.4,58489.0,27456.0,11.269336,0.732765,108037.0,2233.0,337.0,1420.0,44.0,11332.0
1,2012,MIDDLESEX,165639.0,43.0,76659.0,39776.0,5.745628,0.424417,147872.0,7618.0,126.0,4430.0,82.0,7968.0
2,2012,NEW HAVEN,860995.0,39.1,62234.0,32487.0,11.654307,0.724278,649827.0,107283.0,2188.0,30743.0,416.0,129612.0
3,2012,LITCHFIELD,189277.0,44.4,71345.0,37410.0,6.060958,0.460172,178558.0,2229.0,507.0,3099.0,8.0,8575.0
4,2012,HARTFORD,893504.0,39.9,64752.0,34356.0,11.127538,0.771345,654726.0,117181.0,2327.0,38823.0,397.0,137155.0


In [13]:
# Set index for the dataframe as "id"
census_df.insert(0, 'id', range(1, 1+ len(census_df)))
census_df.set_index("id", inplace=True)
# Change data type in "year" to datetime
#census_df['year'] = pd.to_datetime(census_df['year'])
census_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 64 entries, 1 to 64
Data columns (total 14 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   year                             64 non-null     int64  
 1   county                           64 non-null     object 
 2   population                       64 non-null     float64
 3   median_age                       64 non-null     float64
 4   household_income                 64 non-null     float64
 5   per_capita_income                64 non-null     float64
 6   poverty_rate                     64 non-null     float64
 7   uneducated_rate                  64 non-null     float64
 8   white_population_rate            64 non-null     float64
 9   black_population_rate            64 non-null     float64
 10  american_indian_population_rate  64 non-null     float64
 11  asian_population_rate            64 non-null     float64
 12  native_hawaiian_populati

In [14]:
# Save as CSV file
census_df.to_csv("Resources/census_CT.csv")

## Drug overdose death data cleaning

In [60]:
# read CSV file
drug_accident = drug_df
drug_accident.head(2)

Unnamed: 0,ID,Date,Date Type,Age,Sex,Race,Residence City,Residence County,Residence State,Death City,...,Morphine (Not Heroin),Hydromorphone,Xylazine,Other,Opiate NOS,Any Opioid,Manner of Death,DeathCityGeo,ResidenceCityGeo,InjuryCityGeo
0,12-0187,07/17/2012,DateofDeath,34.0,Female,White,MAHOPAC,PUTNAM,,DANBURY,...,,,,Duster,,,Accident,"DANBURY, CT\n(41.393666, -73.451539)",,"CT\n(41.575155, -72.738288)"
1,12-0258,10/01/2012,DateofDeath,51.0,Male,White,PORTLAND,MIDDLESEX,,PORTLAND,...,,,,,,,Accident,"PORTLAND, CT\n(41.581345, -72.634112)","PORTLAND, CT\n(41.581345, -72.634112)","CT\n(41.575155, -72.738288)"


In [61]:
# Fitler out unrelated columns
target_cols = ['Date', 'Age', 'Sex', 'Race', 'Residence City',
       'Residence County', 'Residence State', 'Cause of Death', 'Heroin', 'Cocaine',
       'Fentanyl', 'Fentanyl Analogue', 'Oxycodone', 'Oxymorphone', 'Ethanol',
       'Hydrocodone', 'Benzodiazepine', 'Methadone', 'Amphet', 'Tramad',
       'Morphine (Not Heroin)', 'Hydromorphone', 'Xylazine', 'Other',
       'Opiate NOS', 'Any Opioid', 'Manner of Death']
drug_death = drug_accident[target_cols].copy()

In [62]:
drug_accident["Residence State"].value_counts()

CT    5776
NY      48
MA      40
FL      24
NJ      14
RI      12
NC      10
PA       7
CA       5
TX       5
NH       4
VA       4
ME       4
CO       3
AL       3
SC       3
MD       2
WA       2
GA       2
VT       2
IL       2
LA       2
MN       1
SD       1
IA       1
MI       1
TN       1
OH       1
OK       1
Name: Residence State, dtype: int64

In [63]:
# Filter only data from Connecticut
drug_death = drug_death.loc[drug_death['Residence State'] == 'CT']
drug_death.head(2)

Unnamed: 0,Date,Age,Sex,Race,Residence City,Residence County,Residence State,Cause of Death,Heroin,Cocaine,...,Methadone,Amphet,Tramad,Morphine (Not Heroin),Hydromorphone,Xylazine,Other,Opiate NOS,Any Opioid,Manner of Death
5,01/12/2015,50.0,Male,White,NEW LONDON,NEW LONDON,CT,Acute Cocaine Toxicity,,Y,...,,,,,,,,,,Accident
6,02/01/2015,52.0,Male,White,MIDDLETOWN,MIDDLESEX,CT,Acute Heroin Toxicity,Y,,...,,,,,,,,,Y,Accident


In [64]:
drug_death["Residence County"].value_counts()

HARTFORD      1643
NEW HAVEN     1612
FAIRFIELD      924
NEW LONDON     525
LITCHFIELD     352
MIDDLESEX      258
WINDHAM        222
TOLLAND        182
Name: Residence County, dtype: int64

In [65]:
drug_death.columns

Index(['Date', 'Age', 'Sex', 'Race', 'Residence City', 'Residence County',
       'Residence State', 'Cause of Death', 'Heroin', 'Cocaine', 'Fentanyl',
       'Fentanyl Analogue', 'Oxycodone', 'Oxymorphone', 'Ethanol',
       'Hydrocodone', 'Benzodiazepine', 'Methadone', 'Amphet', 'Tramad',
       'Morphine (Not Heroin)', 'Hydromorphone', 'Xylazine', 'Other',
       'Opiate NOS', 'Any Opioid', 'Manner of Death'],
      dtype='object')

In [66]:
drug_death.isnull().sum()

Date                        0
Age                         0
Sex                         7
Race                       13
Residence City              3
Residence County           58
Residence State             0
Cause of Death              0
Heroin                   3534
Cocaine                  3875
Fentanyl                 1913
Fentanyl Analogue        5246
Oxycodone                5250
Oxymorphone              5715
Ethanol                  4199
Hydrocodone              5686
Benzodiazepine           4219
Methadone                5251
Amphet                   5498
Tramad                   5609
Morphine (Not Heroin)    5736
Hydromorphone            5731
Xylazine                 5644
Other                    5393
Opiate NOS               5669
Any Opioid               1303
Manner of Death             1
dtype: int64

In [67]:
# Filter out any NaN in 'Race', 'Residence County' and 'Manner of Death'
drug_death = drug_death.dropna(subset=['Race', 'Sex', 'Residence City','Residence County', 'Manner of Death'])
drug_death.isnull().sum()

Date                        0
Age                         0
Sex                         0
Race                        0
Residence City              0
Residence County            0
Residence State             0
Cause of Death              0
Heroin                   3484
Cocaine                  3823
Fentanyl                 1886
Fentanyl Analogue        5173
Oxycodone                5172
Oxymorphone              5633
Ethanol                  4144
Hydrocodone              5606
Benzodiazepine           4159
Methadone                5180
Amphet                   5421
Tramad                   5528
Morphine (Not Heroin)    5655
Hydromorphone            5649
Xylazine                 5564
Other                    5315
Opiate NOS               5588
Any Opioid               1277
Manner of Death             0
dtype: int64

In [69]:
drug_death.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5694 entries, 5 to 7678
Data columns (total 27 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Date                   5694 non-null   object 
 1   Age                    5694 non-null   float64
 2   Sex                    5694 non-null   object 
 3   Race                   5694 non-null   object 
 4   Residence City         5694 non-null   object 
 5   Residence County       5694 non-null   object 
 6   Residence State        5694 non-null   object 
 7   Cause of Death         5694 non-null   object 
 8   Heroin                 2210 non-null   object 
 9   Cocaine                1871 non-null   object 
 10  Fentanyl               3808 non-null   object 
 11  Fentanyl Analogue      521 non-null    object 
 12  Oxycodone              522 non-null    object 
 13  Oxymorphone            61 non-null     object 
 14  Ethanol                1550 non-null   object 
 15  Hydr

In [70]:
# Change data type of column 'Date' to datetime
#drug_death['Date'] = pd.to_datetime(drug_death['Date'])
#drug_death['Date']=pd.to_numeric(drug_death['Date'])
#drug_death['Date']=drug_death['Date'].astype(int)
drug_death['Date']

5       01/12/2015
6       02/01/2015
8       05/26/2015
9       07/17/2015
11      01/17/2016
           ...    
7673    08/18/2018
7675    11/19/2020
7676    10/31/2020
7677    09/17/2016
7678    10/25/2019
Name: Date, Length: 5694, dtype: object

In [71]:
# Change and lowercase column names
drug_death_df = drug_death.rename(columns = {
    'Date': 'date', 'Age':'age', 'Sex':'sex', 'Race':'race', 'Residence City': 'city', 'Residence County': 'county',
       'Residence State': 'state', 'Manner of Death': 'manner_of_death', 'Cause of Death': 'cause_of_death',
        'Heroin': 'heroin', 'Cocaine':'cocaine', 'Fentanyl':'fentanyl','Fentanyl Analogue':'fentanyl_analogue', 
        'Oxycodone':'oxycodone', 'Oxymorphone': 'oxymorphone', 'Ethanol':'ethanol', 'Hydrocodone':'hydrocodone', 
        'Benzodiazepine':'benzodiazepine', 'Methadone':'methadone', 'Amphet':'amphet', 'Tramad':'tramad',
        'Morphine (Not Heroin)':'morphine_not_heroin', 'Hydromorphone':'hydromorphone', 'Xylazine':'xylazine',
         'Other':'other', 'Opiate NOS':'opiate_nos', 'Any Opioid':'any_opioid'})
drug_death_df.columns

Index(['date', 'age', 'sex', 'race', 'city', 'county', 'state',
       'cause_of_death', 'heroin', 'cocaine', 'fentanyl', 'fentanyl_analogue',
       'oxycodone', 'oxymorphone', 'ethanol', 'hydrocodone', 'benzodiazepine',
       'methadone', 'amphet', 'tramad', 'morphine_not_heroin', 'hydromorphone',
       'xylazine', 'other', 'opiate_nos', 'any_opioid', 'manner_of_death'],
      dtype='object')

In [72]:
# Create index for the dataframe
drug_death_df.insert(0, 'id', range(1, 1+ len(drug_death_df)))
drug_death_df.set_index("id")
drug_death_df.head(3)

Unnamed: 0,id,date,age,sex,race,city,county,state,cause_of_death,heroin,...,methadone,amphet,tramad,morphine_not_heroin,hydromorphone,xylazine,other,opiate_nos,any_opioid,manner_of_death
5,1,01/12/2015,50.0,Male,White,NEW LONDON,NEW LONDON,CT,Acute Cocaine Toxicity,,...,,,,,,,,,,Accident
6,2,02/01/2015,52.0,Male,White,MIDDLETOWN,MIDDLESEX,CT,Acute Heroin Toxicity,Y,...,,,,,,,,,Y,Accident
8,3,05/26/2015,38.0,Female,White,WATERBURY,NEW HAVEN,CT,Cocaine,,...,,,,,,,,,,Accident


In [73]:
# Split drug_death_df into two dataframes
tar_cols1 = ['id', 'date', 'age', 'sex', 'race', 'city', 'county', 'state', 'manner_of_death', 'cause_of_death']
death_df = drug_death_df[tar_cols1].copy()
tar_cols2 = ['id','date', 'age', 'sex', 'race', 'city', 'county', 'state', 'manner_of_death', 'heroin', 'cocaine', 'fentanyl', 
            'fentanyl_analogue', 'oxycodone', 'oxymorphone', 'ethanol', 'hydrocodone', 'benzodiazepine','methadone', 
            'amphet', 'tramad', 'morphine_not_heroin', 'hydromorphone','xylazine', 'other', 'opiate_nos', 'any_opioid']
drug_type_df = drug_death_df[tar_cols2].copy()

In [74]:
# Set index for each dataframes
death_df.set_index('id', inplace=True)
# drug_type_df.set_index('id', inplace=True)

In [75]:
drug_type_df.head(2)

Unnamed: 0,id,date,age,sex,race,city,county,state,manner_of_death,heroin,...,benzodiazepine,methadone,amphet,tramad,morphine_not_heroin,hydromorphone,xylazine,other,opiate_nos,any_opioid
5,1,01/12/2015,50.0,Male,White,NEW LONDON,NEW LONDON,CT,Accident,,...,,,,,,,,,,
6,2,02/01/2015,52.0,Male,White,MIDDLETOWN,MIDDLESEX,CT,Accident,Y,...,,,,,,,,,,Y


In [76]:
drug_list = ['heroin', 'cocaine', 'fentanyl', 'fentanyl_analogue',
       'oxycodone', 'oxymorphone', 'ethanol', 'hydrocodone', 'benzodiazepine',
       'methadone', 'amphet', 'tramad', 'morphine_not_heroin', 'hydromorphone',
       'xylazine', 'other', 'opiate_nos', 'any_opioid']



In [77]:
for x in drug_list:
   drug_type_df[x] = drug_type_df[x].replace(np.nan, 'N')

In [78]:
drug_type_df.head()

Unnamed: 0,id,date,age,sex,race,city,county,state,manner_of_death,heroin,...,benzodiazepine,methadone,amphet,tramad,morphine_not_heroin,hydromorphone,xylazine,other,opiate_nos,any_opioid
5,1,01/12/2015,50.0,Male,White,NEW LONDON,NEW LONDON,CT,Accident,N,...,N,N,N,N,N,N,N,N,N,N
6,2,02/01/2015,52.0,Male,White,MIDDLETOWN,MIDDLESEX,CT,Accident,Y,...,N,N,N,N,N,N,N,N,N,Y
8,3,05/26/2015,38.0,Female,White,WATERBURY,NEW HAVEN,CT,Accident,N,...,N,N,N,N,N,N,N,N,N,N
9,4,07/17/2015,42.0,Male,White,CANTERBURY,WINDHAM,CT,Accident,Y,...,N,N,N,N,N,N,N,N,N,Y
11,5,01/17/2016,26.0,Male,Black,BRISTOL,HARTFORD,CT,Accident,Y,...,N,N,N,N,N,N,N,N,N,Y


# Merging all drug columns to one

In [79]:
drug_merge_df=drug_type_df.copy()

In [90]:
# Change data type of column 'Date' to datetime
drug_merge_df['date'] = pd.to_datetime(drug_merge_df['date'])
#drug_merge_df['date']=pd.to_numeric(drug_merge_df['date'])
#drug_merge_df['date']=drug_merge_df['date'].astype(int)
drug_merge_df['date']

5      2015-01-12
6      2015-02-01
8      2015-05-26
9      2015-07-17
11     2016-01-17
          ...    
7673   2018-08-18
7675   2020-11-19
7676   2020-10-31
7677   2016-09-17
7678   2019-10-25
Name: date, Length: 5694, dtype: datetime64[ns]

In [94]:
for x in drug_list:
     drug_merge_df[x] = drug_merge_df[x].str.replace("Y", x)

In [95]:
drug_merge_df.head()

Unnamed: 0,id,date,age,sex,race,city,county,state,manner_of_death,heroin,...,benzodiazepine,methadone,amphet,tramad,morphine_not_heroin,hydromorphone,xylazine,other,opiate_nos,any_opioid
5,1,2015-01-12,50.0,Male,White,NEW LONDON,NEW LONDON,CT,Accident,N,...,N,N,N,N,N,N,N,N,N,N
6,2,2015-02-01,52.0,Male,White,MIDDLETOWN,MIDDLESEX,CT,Accident,heroin,...,N,N,N,N,N,N,N,N,N,any_opioid
8,3,2015-05-26,38.0,Female,White,WATERBURY,NEW HAVEN,CT,Accident,N,...,N,N,N,N,N,N,N,N,N,N
9,4,2015-07-17,42.0,Male,White,CANTERBURY,WINDHAM,CT,Accident,heroin,...,N,N,N,N,N,N,N,N,N,any_opioid
11,5,2016-01-17,26.0,Male,Black,BRISTOL,HARTFORD,CT,Accident,heroin,...,N,N,N,N,N,N,N,N,N,any_opioid


In [96]:
drug_merge_df['drug']=''
for x in drug_list:
    drug_merge_df['drug']=drug_merge_df[x]+","+drug_merge_df['drug']

In [97]:
drug_merge_df['drug']=drug_merge_df['drug'].str.replace(',N',"")

In [98]:
drug_merge_df['drug']=drug_merge_df['drug'].str.replace('N,',"")

In [99]:
#deleting individual drug columns
#del drug_merge_df['drug_type']
for x in drug_list:
    del drug_merge_df[x]

In [None]:
drug_merge_df.head()

In [43]:
drug_merge_df.loc[(drug_merge_df['age']==50) & (drug_merge_df['county']=='NEW LONDON') ]

Unnamed: 0,id,date,age,sex,race,city,county,state,manner_of_death,drug
5,1,2015-01-12,50.0,Male,White,NEW LONDON,NEW LONDON,CT,Accident,"cocaine,"
85,56,2019-01-05,50.0,Male,White,NEW LONDON,NEW LONDON,CT,Accident,"any_opioid,benzodiazepine,ethanol,fentanyl,"
4848,3591,2016-10-16,50.0,Female,White,NORWICH,NEW LONDON,CT,Accident,"any_opioid,oxycodone,heroin,"
5886,4364,2018-06-14,50.0,Male,White,MYSTIC,NEW LONDON,CT,Accident,"any_opioid,ethanol,fentanyl,heroin,"
6500,4827,2019-03-16,50.0,Male,"Hispanic, White",NORWICH,NEW LONDON,CT,Accident,"any_opioid,heroin,"
6991,5185,2015-02-25,50.0,Male,"Hispanic, White",NEW LONDON,NEW LONDON,CT,Accident,"any_opioid,cocaine,heroin,"
7131,5290,2017-06-03,50.0,Female,White,NORWICH,NEW LONDON,CT,Accident,"ethanol,fentanyl,"
7193,5333,2015-03-18,50.0,Male,Black,NEW LONDON,NEW LONDON,CT,Accident,"any_opioid,heroin,"
7231,5360,2020-12-09,50.0,Male,White,NORWICH,NEW LONDON,CT,Accident,"any_opioid,xylazine,ethanol,fentanyl,"
7357,5455,2015-04-21,50.0,Female,White,UNCASVILLE,NEW LONDON,CT,Accident,"any_opioid,heroin,"


In [44]:
drug_merge_df.loc[drug_type_df['id']==5522]

Unnamed: 0,id,date,age,sex,race,city,county,state,manner_of_death,drug
7450,5522,2018-07-14,41.0,Female,White,WATERBURY,NEW HAVEN,CT,Accident,


In [101]:
drug_merge_df.set_index("id",inplace=True)

In [102]:
drug_type_df.to_csv('Resources/drug_type2.csv')
drug_merge_df.to_csv('Resources/drug_MERGE.csv')

# Connecting to database

In [103]:
engine = create_engine(f'postgresql+psycopg2://postgres:{password}@localhost:5432/Census_DB')
conn=engine.connect()

In [91]:
engine.table_names()

  engine.table_names()


['ct_census', 'drugoverdose']

In [50]:
census_df.to_sql(name='ct_census',con=engine,if_exists='append',index=True)

In [104]:
drug_merge_df.to_sql(name='drugoverdose',con=engine,if_exists='append',index=True)