# Import Dependencies

In [1]:
from sqlalchemy import create_engine
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
# Create MySQL Database Connection
# ----------------------------------
engine = create_engine('mysql+pymysql://user:password@host/accidentaldrugdeaths', pool_recycle=3600)
conn = engine.connect()

In [3]:
# Confirm connection by printing table in database
engine.table_names()

['accidentaldrugdeaths']

# Explore Data Using <font color="red">SELECT *</font>

<br>
<strong>SQL Syntax</strong><br>
SELECT *<br>
FROM table
LIMIT VALUE;

In [4]:
sql_view = "SELECT * FROM accidentaldrugdeaths LIMIT 3;"

In [5]:
# Run query
view_data = pd.read_sql(sql_view, conn)
# Displaying subset of data
view_data

Unnamed: 0,ID,Date,DateType,Age,Sex,Race,ResidenceCity,ResidenceCounty,ResidenceState,DeathCity,DeathCounty,Location,LocationifOther,DescriptionofInjury,InjuryPlace,InjuryCity,InjuryCounty,InjuryState,COD,OtherSignifican,Heroin,Cocaine,Fentanyl,FentanylAnalogue,Oxycodone,Oxymorphone,Ethanol,Hydrocodone,Benzodiazepine,Methadone,Amphet,Tramad,Morphine_NotHeroin,Hydromorphone,Other,OpiateNOS,AnyOpioid,MannerofDeath,DeathCityGeo,ResidenceCityGeo,InjuryCityGeo
0,14-0273,06/28/2014 12:00:00 AM,DateReported,,,,,,,,,,,substance,,,,,"Acute fent, hydrocod, benzodiazepine",,,,Y,,,,,Y,Y,,,,,,,,,Accident,"CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)"
1,13-0102,03/21/2013 12:00:00 AM,DateofDeath,48.0,Male,Black,NORWALK,,,NORWALK,FAIRFIELD,Hospital,,,,,,,Cocaine Intoxication,,,Y,,,,,,,,,,,,,,,,Accident,"Norwalk, CT\n(41.11805, -73.412906)","NORWALK, CT\n(41.11805, -73.412906)","CT\n(41.575155, -72.738288)"
2,16-0165,03/13/2016 12:00:00 AM,DateofDeath,30.0,Female,White,SANDY HOOK,FAIRFIELD,CT,DANBURY,,Hospital,,Substance Abuse,Unknown,UNKNOWN,,,Acute Heroin and Cocaine Intoxication,,Y,Y,,,,,,,,,,,,,,,Y,Accident,"Danbury, CT\n(41.393666, -73.451539)","SANDY HOOK, CT\n(41.419998, -73.282501)",


# The <font color="red">LIKE</font> OPERATOR

- The LIKE clause is needed for any search clause that evokes a wildcard.
- LIKE directs the DBMS to use the following pattern in the search.

<br>
<strong>SQL Syntax</strong><br>
SELECT * <br>
FROM table
WHERE column <font color="red">LIKE</font> wildcards(s)+pattern;

# The <font color="red">%</font> Wildcard

 - The percent sign (%) is the most frequencly used wildcard
 - % means to match any number of character occurences
 - Where the % is positioned in the text string affects how the search is performed.

## Case 1: The % Wildcard <font color="red">at the end </font> of the pattern <font color="red">example: 'pattern%'</font>

 - Will search for all instances that start with the pattern before the % wildcard.
 - Case-Sensitivity: Check your DBMS documentation regarding case-sensitivity
 
<strong><font color="red">NOTE: '%%' are used here only because we are using python SqlAlchemy to run our SQL query. But when using MySql, only use a single '%' for this type of case.</font></strong>

In [6]:
sql_wildcard1 = """SELECT * FROM accidentaldrugdeaths 
                    WHERE COD LIKE 'Cocaine%%' LIMIT 5;"""

In [7]:
# Run query
view_data1 = pd.read_sql(sql_wildcard1, conn)
# Displaying subset of data
view_data1

Unnamed: 0,ID,Date,DateType,Age,Sex,Race,ResidenceCity,ResidenceCounty,ResidenceState,DeathCity,DeathCounty,Location,LocationifOther,DescriptionofInjury,InjuryPlace,InjuryCity,InjuryCounty,InjuryState,COD,OtherSignifican,Heroin,Cocaine,Fentanyl,FentanylAnalogue,Oxycodone,Oxymorphone,Ethanol,Hydrocodone,Benzodiazepine,Methadone,Amphet,Tramad,Morphine_NotHeroin,Hydromorphone,Other,OpiateNOS,AnyOpioid,MannerofDeath,DeathCityGeo,ResidenceCityGeo,InjuryCityGeo
0,13-0102,03/21/2013 12:00:00 AM,DateofDeath,48,Male,Black,NORWALK,,,NORWALK,FAIRFIELD,Hospital,,,,,,,Cocaine Intoxication,,,Y,,,,,,,,,,,,,,,,Accident,"Norwalk, CT\n(41.11805, -73.412906)","NORWALK, CT\n(41.11805, -73.412906)","CT\n(41.575155, -72.738288)"
1,14-0114,03/08/2014 12:00:00 AM,DateofDeath,50,Male,White,HARTFORD,,,HARTFORD,,Residence,Friend's Apartment,Injection,Other,,,,"Cocaine, Heroin and Phencyclidine Intoxication",,Y,Y,,,,,,,,,,,,,,,,Accident,"Hartford, CT\n(41.765775, -72.673356)","HARTFORD, CT\n(41.765775, -72.673356)","CT\n(41.575155, -72.738288)"
2,13-0188,06/07/2013 12:00:00 AM,DateofDeath,46,Male,White,EAST HAVEN,NEW HAVEN,,NEW HAVEN,NEW HAVEN,Hospital,,Substance Abuse,Other,,,,Cocaine and Heroin Intoxication,,Y,Y,,,,,,,,,,,,,,,,Accident,"New Haven, CT\n(41.308252, -72.924161)","EAST HAVEN, CT\n(41.277971, -72.871954)","CT\n(41.575155, -72.738288)"
3,14-0219,05/23/2014 12:00:00 AM,DateofDeath,30,Male,White,TORRINGTON,,,TORRINGTON,LITCHFIELD,Residence,,Injection,Residence,,,,Cocaine and Heroin Intoxication,,Y,Y,,,,,,,,,,,,,,,,Accident,"TORRINGTON, CT\n(41.812186, -73.101552)","TORRINGTON, CT\n(41.812186, -73.101552)","CT\n(41.575155, -72.738288)"
4,15-0120,03/10/2015 12:00:00 AM,DateReported,56,Male,Black,HARTFORD,HARTFORD,CT,HARTFORD,HARTFORD,Hospital,,,Residence,,,,Cocaine Intoxication,,,Y,,,,,,,,,,,,,,,,Accident,"Hartford, CT\n(41.765775, -72.673356)","HARTFORD, CT\n(41.765775, -72.673356)","CT\n(41.575155, -72.738288)"


## Case 2: Multiple % Wildcards <font color="red">at the front and at the end</font> of a string pattern <font color="red">example: '%pattern%'</font>

 - This will search and return and values that contain the the text between the % wildcards.
 
<strong><font color="red">NOTE: '%%' are used here only because we are using python SqlAlchemy to run our SQL query. But when using MySql, only use a single '%' for this type of case.</font></strong>

In [8]:
sql_wildcard2 = """SELECT * FROM accidentaldrugdeaths 
                    WHERE COD LIKE '%%hydrocod%%' LIMIT 5;"""

In [9]:
# Run query
view_data2 = pd.read_sql(sql_wildcard2, conn)
# Displaying subset of data
view_data2

Unnamed: 0,ID,Date,DateType,Age,Sex,Race,ResidenceCity,ResidenceCounty,ResidenceState,DeathCity,DeathCounty,Location,LocationifOther,DescriptionofInjury,InjuryPlace,InjuryCity,InjuryCounty,InjuryState,COD,OtherSignifican,Heroin,Cocaine,Fentanyl,FentanylAnalogue,Oxycodone,Oxymorphone,Ethanol,Hydrocodone,Benzodiazepine,Methadone,Amphet,Tramad,Morphine_NotHeroin,Hydromorphone,Other,OpiateNOS,AnyOpioid,MannerofDeath,DeathCityGeo,ResidenceCityGeo,InjuryCityGeo
0,14-0273,06/28/2014 12:00:00 AM,DateReported,,,,,,,,,,,substance,,,,,"Acute fent, hydrocod, benzodiazepine",,,,Y,,,,,Y,Y,,,,,,,,,Accident,"CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)"
1,15-0475,09/14/2015 12:00:00 AM,DateReported,51.0,Male,White,MIDDLETOWN,MIDDLESEX,CT,MIDDLETOWN,MIDDLESEX,Residence,,,Residence,MIDDLETOWN,MIDDLESEX,CT,"Acute Intoxication ethanol, hydrocodone, and b...",,,,,,,,Y,Y,Y,,,,,,,,Y,Accident,"MIDDLETOWN, CT\n(41.544654, -72.651713)","MIDDLETOWN, CT\n(41.544654, -72.651713)","MIDDLETOWN, CT\n(41.544654, -72.651713)"
2,15-0007,01/04/2015 12:00:00 AM,DateReported,28.0,Male,White,MILFORD,NEW HAVEN,CT,MILFORD,NEW HAVEN,Residence,,,Residence,,,,Acute intoxication due to the combined effects...,,Y,,,,,,,Y,Y,,,,,,,,Y,Accident,"MILFORD, CT\n(41.224276, -73.057564)","MILFORD, CT\n(41.224276, -73.057564)","CT\n(41.575155, -72.738288)"
3,17-0967,12/09/2017 12:00:00 AM,DateReported,38.0,Female,White,WARREN,LITCHFIELD,CT,NEW MILFORD,LITCHFIELD,Hospital,,Substance Abuse,Residence,WARREN,LITCHFIELD,CT,Acute Intoxication due to the Combined Effects...,,,,,,,,,Y,Y,,,,,,,,,Accident,"New Milford, CT\n(41.576633, -73.408713)","WARREN, CT\n(41.744496, -73.348848)","WARREN, CT\n(41.744496, -73.348848)"
4,13-0150,05/01/2013 12:00:00 AM,DateofDeath,48.0,Male,White,DARIEN,FAIRFIELD,,DARIEN,FAIRFIELD,Residence,,Ingestion,Residence,,,,Hydrocodone and Methadone Intoxication,,,,,,,,,Y,,Y,,,,,,,,Accident,"DARIEN, CT\n(41.080002, -73.467371)","DARIEN, CT\n(41.080002, -73.467371)","CT\n(41.575155, -72.738288)"


## Case 3: A % Wildcards <font color="red">in the middle of a </font> string pattern <font color="red">example: 'pat%tern'</font>

 - This type of search is rarely useful as it will return all product that have that pattern around the wildcard.
 
<strong><font color="red">NOTE: '%%' are used here only because we are using python SqlAlchemy to run our SQL query. But when using MySql, only use a single '%' for this type of case.</font></strong>

In [10]:
sql_wildcard3 = """SELECT * FROM accidentaldrugdeaths 
                    WHERE Sex LIKE 'F%%e' LIMIT 5;"""

In [11]:
# Run query
view_data3 = pd.read_sql(sql_wildcard3, conn)
# Displaying subset of data
view_data3

Unnamed: 0,ID,Date,DateType,Age,Sex,Race,ResidenceCity,ResidenceCounty,ResidenceState,DeathCity,DeathCounty,Location,LocationifOther,DescriptionofInjury,InjuryPlace,InjuryCity,InjuryCounty,InjuryState,COD,OtherSignifican,Heroin,Cocaine,Fentanyl,FentanylAnalogue,Oxycodone,Oxymorphone,Ethanol,Hydrocodone,Benzodiazepine,Methadone,Amphet,Tramad,Morphine_NotHeroin,Hydromorphone,Other,OpiateNOS,AnyOpioid,MannerofDeath,DeathCityGeo,ResidenceCityGeo,InjuryCityGeo
0,16-0165,03/13/2016 12:00:00 AM,DateofDeath,30,Female,White,SANDY HOOK,FAIRFIELD,CT,DANBURY,,Hospital,,Substance Abuse,Unknown,UNKNOWN,,,Acute Heroin and Cocaine Intoxication,,Y,Y,,,,,,,,,,,,,,,Y,Accident,"Danbury, CT\n(41.393666, -73.451539)","SANDY HOOK, CT\n(41.419998, -73.282501)",
1,12-0205,08/12/2012 12:00:00 AM,DateofDeath,21,Female,White,WEST HARTFORD,HARTFORD,,WEST HARTFORD,HARTFORD,Residence,,Used Heroin,Residence,,,,Heroin Toxicity,,Y,,,,,,,,,,,,,,,,,Accident,"WEST HARTFORD, CT\n(41.762008, -72.741807)","WEST HARTFORD, CT\n(41.762008, -72.741807)","CT\n(41.575155, -72.738288)"
2,13-0404,11/10/2013 12:00:00 AM,DateofDeath,40,Female,White,HAMDEN,NEW HAVEN,,NEW HAVEN,NEW HAVEN,Hospital,,,,,,,Complications of Cocaine Intoxication,,,Y,,,,,,,,,,,,,,,,Accident,"New Haven, CT\n(41.308252, -72.924161)","HAMDEN, CT\n(41.382918, -72.907743)","CT\n(41.575155, -72.738288)"
3,12-0218,08/23/2012 12:00:00 AM,DateofDeath,26,Female,"Hispanic, White",,,,STRATFORD,FAIRFIELD,Other,Honey Spot,Injection/Inhalation,Hotel or Motel,,,,Multiple Drug Toxicity,,Y,Y,,,,,,,,,,,,,,,,Accident,"STRATFORD, CT\n(41.200888, -73.131323)","CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)"
4,15-0334,07/05/2015 12:00:00 AM,DateReported,49,Female,White,,,,NEW HAVEN,NEW HAVEN,Hospital,,,Unknown,,,,Acute intoxication from the combined effects o...,,Y,,,,,,,,,Y,,,,,,,Y,Accident,"New Haven, CT\n(41.308252, -72.924161)","CT\n(41.575155, -72.738288)","CT\n(41.575155, -72.738288)"


# The  <font color="red">(_) Underscore</font> Wildcard

 - Is used just like the %, but is used to only match a single character.
 - Not supported by DB2
 - Microsoft Access uses the ? instead
 
<strong><font color="red">NOTE: '%%' are used here only because we are using python SqlAlchemy to run our SQL query. But when using MySql, only use a single '%' for this type of case.</font></strong>

In [12]:
# USE _ wildcard to gather data where ResidenceCity starts with some
# some charater in the first work, but ends with HOOK in the second word
sql_wildcard4 = """SELECT * FROM accidentaldrugdeaths 
                    WHERE ResidenceCity LIKE '_%% HOOK' LIMIT 5;"""

In [13]:
# Run query
view_data4 = pd.read_sql(sql_wildcard4, conn)
# Displaying subset of data
view_data4

Unnamed: 0,ID,Date,DateType,Age,Sex,Race,ResidenceCity,ResidenceCounty,ResidenceState,DeathCity,DeathCounty,Location,LocationifOther,DescriptionofInjury,InjuryPlace,InjuryCity,InjuryCounty,InjuryState,COD,OtherSignifican,Heroin,Cocaine,Fentanyl,FentanylAnalogue,Oxycodone,Oxymorphone,Ethanol,Hydrocodone,Benzodiazepine,Methadone,Amphet,Tramad,Morphine_NotHeroin,Hydromorphone,Other,OpiateNOS,AnyOpioid,MannerofDeath,DeathCityGeo,ResidenceCityGeo,InjuryCityGeo
0,16-0165,03/13/2016 12:00:00 AM,DateofDeath,30,Female,White,SANDY HOOK,FAIRFIELD,CT,DANBURY,,Hospital,,Substance Abuse,Unknown,UNKNOWN,,,Acute Heroin and Cocaine Intoxication,,Y,Y,,,,,,,,,,,,,,,Y,Accident,"Danbury, CT\n(41.393666, -73.451539)","SANDY HOOK, CT\n(41.419998, -73.282501)",
1,18-0317,04/28/2018 12:00:00 AM,DateReported,24,Male,White,SANDY HOOK,FAIRFIELD,CT,SANDY HOOK,FAIRFIELD,Other,Friend's Apartment,Drug abuse,Residence,SANDY HOOK,FAIRFIELD,,"Multidrug Toxicity Including Ethanol, Heroin, ...",,Y,,Y,,,,Y,,,Y,,,,,,,Y,Accident,"Sandy Hook, CT\n(41.419998, -73.282501)","SANDY HOOK, CT\n(41.419998, -73.282501)","SANDY HOOK, CT\n(41.419998, -73.282501)"
2,16-0510,07/23/2016 12:00:00 AM,DateofDeath,26,Male,White,RED HOOK,DUTCHESS,NY,SHARON,,Hospital,,Substance Abuse,Residential Building,STORMVILLE,,,Acute Heroin Intoxication and Recent Use of Co...,,Y,Y,,,,,,,,,,,,,,,Y,Accident,"Sharon, CT\n(41.87943, -73.478873)","RED HOOK, CT\n(41.0667, -73.556099)",
3,18-0125,02/23/2018 12:00:00 AM,DateReported,48,Male,White,SANDY HOOK,FAIRFIELD,CT,SANDY HOOK,FAIRFIELD,Residence,,Prescription Medicine Misuse,Residence,SANDY HOOK,FAIRFIELD,,Acute Intoxication From the Combined Effects o...,,,,,,,,Y,Y,,,,,,,,,Y,Accident,"Sandy Hook, CT\n(41.419998, -73.282501)","SANDY HOOK, CT\n(41.419998, -73.282501)","SANDY HOOK, CT\n(41.419998, -73.282501)"
4,14-0275,06/28/2014 12:00:00 AM,DateofDeath,53,Male,White,SANDY HOOK,,,MIDDLETOWN,MIDDLESEX,Other,YMCA-Parking Lot,Substance abuse,Automobile,,,,Acute intoxication due to the combined effects...,,Y,Y,,,,,Y,,,,,,,,,,,Accident,"MIDDLETOWN, CT\n(41.544654, -72.651713)","SANDY HOOK, CT\n(41.419998, -73.282501)","CT\n(41.575155, -72.738288)"


# The  <font color="red">Brackets ([])</font> Wildcard

 - Is used to specify a set of characters that filters data if it matches a character in a specified position. 
 - Is not supported by many DBMSs including MySql