# Import Dependencies

In [2]:
from sqlalchemy import create_engine
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
# Create MySQL Database Connection
# ----------------------------------
engine = create_engine('mysql+pymysql://user:password@host/cdc_dnpao', pool_recycle=3600)
conn = engine.connect()

In [4]:
# Confirm connection by printing table in database
engine.table_names()

['cdc_npaol_data']

# Using the <font color="red">WHERE</font> Clause

<br>
<strong>SQL Syntax</strong><br>
SELECT column(s)<br>
FROM table
WHERE column = SOMEVALUE;

 - <font color="red">Note: You can filter on a column that is not explicitly included in the SELECT statement.</font>

In [13]:
# Save SQL query in variable
sql_view = "SELECT LocationDesc, HealthTopic, PolicyTopic, Setting FROM cdc_npaol_data WHERE LocationAbbr = 'CA';"

### Python pandas is used here to display sql query

In [14]:
# Run query
view_data = pd.read_sql(sql_view, conn)
# Displaying subset of data
view_data.head(5)

Unnamed: 0,LocationDesc,HealthTopic,PolicyTopic,Setting
0,California,Nutrition,Agriculture and Farming,Community
1,California,Nutrition,Appropriations,School/After School
2,California,Nutrition,Farm Direct Foods,School/After School
3,California,Nutrition,Appropriations,Community
4,California,Physical Activity,School Siting,School/After School


Note that LocationAbbr was not included in the SELECT statement, but since it is in the same table, we can filter by this column.

# Using the <font color="red">WHERE</font> Clause and the <font color="blue">ORDER BY</font> Clause

<br>
<strong>SQL Syntax</strong><br>
SELECT column(s)<br>
FROM table
WHERE column = SOMEVALUE
ODER BY column;

 - <font color="red">Note: The ORDER BY clause must come after the WHERE BY clause, or error will be generated.</font>

In [17]:
# Save SQL query in variable
sql_view_2 = "SELECT * FROM cdc_npaol_data WHERE Setting = 'Community' ORDER BY LocationDesc;"

In [18]:
# Run query
view_data_2 = pd.read_sql(sql_view_2, conn)
# Displaying subset of data
view_data_2.head(5)

Unnamed: 0,Year,Quarter,LocationAbbr,LocationDesc,HealthTopic,PolicyTopic,Setting,Title,Status,Citation,StatusAltValue
0,2011,1,AL,Alabama,Physical Activity,Pedestrians/Walking,Community,Traffic Control Legends,Dead,H528,3
1,2011,1,AL,Alabama,Obesity,Menu Labeling,Community,Nutrition Labeling of Food,Enacted,H195,1
2,2009,1,AL,Alabama,Obesity,Appropriations,Community,Government Appropriations,Dead,H379,3
3,2010,1,AZ,Arizona,Obesity,Appropriations,Community,Bicycles,Dead,H2633,3
4,2011,1,AR,Arkansas,Obesity,Food Restrictions,Community,Farmers Market Regulation,Dead,S820,3


Note that we filtered the data by the Setting column at 'Community', and also ordered (sorted) the data alphanumeric by LocationAbbr.

# The WHERE Clause Operators

 - Some operators or DBMS specific. Check your documentation.
<table>
    <tr>
        <th>Operators</th>
        <th>Descriptions</th>
    </tr>
    <tr>
        <td>=</td>
        <td>Equality</td>    
    </tr> 
    <tr>
        <td><></td>
        <td>Not Equal</td>    
    </tr> 
    <tr>
        <td>!=</td>
        <td>Not Equal</td>    
    </tr> 
     <tr>
        <td><</td>
        <td>Less than</td>    
    </tr> 
     <tr>
        <td><=</td>
        <td>Less than or Equal</td>    
    </tr> 
    <tr>
        <td>!<</td>
        <td>Not less than</td>    
    </tr>     
    <tr>
        <td>></td>
        <td>Greater than</td>    
    </tr> 
    <tr>
        <td>>=</td>
        <td>Greater than or Equal</td>    
    </tr> 
    <tr>
        <td>!></td>
        <td>Not Greater than</td>    
    </tr>     
    <tr>
        <td>BETWEEN</td>
        <td>Between two specific values</td>    
    </tr> 
        <tr>
        <td>IS NULL</td>
        <td>Is a NULL value</td>    
    </tr> 
</table>

# Using the <font color="red">WHERE</font> Clause Against a Single Value

<br>
<strong>SQL Syntax</strong><br>
SELECT column(s)<br>
FROM table
WHERE column operator SOMEVALUE;

 - In this example we will use the less than operator

In [22]:
# Save SQL query in variable
sql_view_3 = "SELECT * FROM cdc_npaol_data WHERE StatusAltValue < 3 ORDER BY LocationDesc;"

In [23]:
# Run query
view_data_3 = pd.read_sql(sql_view_3, conn)
# Displaying subset of data
view_data_3.head(5)

Unnamed: 0,Year,Quarter,LocationAbbr,LocationDesc,HealthTopic,PolicyTopic,Setting,Title,Status,Citation,StatusAltValue
0,2011,1,AL,Alabama,Obesity,Menu Labeling,Community,Nutrition Labeling of Food,Enacted,H195,1
1,2011,1,AR,Arkansas,Nutrition,Appropriations,Community,Cottage Food and Farmers Market Permit Exemptions,Enacted,H1323,1
2,2015,3,CA,California,Nutrition,Agriculture and Farming,Community,Relative to Farmworker Appreciation Day,Enacted,SR46,1
3,2010,1,CA,California,Physical Activity,School Siting,School/After School,Greene Facilities Act- General Site- Extension,Enacted,R19849,1
4,2005,1,CA,California,Nutrition,Medical Care,Community,Parks: The Great California Delta Trail System,Enacted,S1556,1


Note that the StatusAltValue is less than 3

# Using the <font color="red">WHERE</font> to Check for Nonmatches

<br>
<strong>SQL Syntax</strong><br>
SELECT column(s)<br>
FROM table
WHERE column <font color="red"><></font> SOMEVALUE;
    
 - Returns all data but those that contain the value after the operator.

In [24]:
# Save SQL query in variable
sql_view_4 = "SELECT * FROM cdc_npaol_data WHERE LocationDesc <> 'Arkansas';"

In [26]:
# Run query
view_data_4 = pd.read_sql(sql_view_4, conn)
# Displaying subset of data
view_data_4.head(10)

Unnamed: 0,Year,Quarter,LocationAbbr,LocationDesc,HealthTopic,PolicyTopic,Setting,Title,Status,Citation,StatusAltValue
0,2015,1,FL,Florida,Physical Activity,Pedestrians/Walking,Community,Florida Shared-Use Nonmotorized Trail Network,Dead,SB1186,3
1,2015,1,NY,New York,Nutrition,School Nutrition,School/After School,"Health Education Regarding Food, Agriculture, ...",Introduced,AB544,0
2,2011,1,NY,New York,Obesity,Appropriations,School/After School,Tax Increment Bonds Payable From Real Property...,Dead,A5296,3
3,2009,1,NY,New York,Obesity,Access to Drinking Water,School/After School,Healthy Schools Act,Dead,A7804,3
4,2009,1,MN,Minnesota,Physical Activity,Built Environment and Street-Scale Design,Community,Transportation,Dead,S1484,3
5,2007,1,OK,Oklahoma,Nutrition,Food Assistance Programs,Community,State Food Security Act,Enacted,H2833,1
6,2010,1,PA,Pennsylvania,Nutrition,Food Security,Restaurant/Retail,Agricultural Surplus System,Enacted,H2139,1
7,2010,1,NJ,New Jersey,Nutrition,Agriculture and Farming,Community,Economic Development Promotion Act,Dead,S543,3
8,2010,1,WA,Washington,Physical Activity,Pedestrians/Walking,School/After School,Enforcement of Crosswalk Violations,Enacted,S6363,1
9,2011,1,GA,Georgia,Obesity,Appropriations,Community,Georgia Grown Farm Products,Dead,H367,3


Note that the data returned LocationDesc that is not Arkansas

# Checking for a RANGE of Data using the <font color="red">BETWEEN</font> Operator

<br>
<strong>SQL Syntax</strong><br>
SELECT column(s)<br>
FROM table
WHERE column <font color="red">BETWEEN</font> SOMEVALUE <font color="red">AND</font> SOMEVALUE;

 - In the example below, we also used the ORDER BY clause to sort the values by year.

In [32]:
# Save SQL query in variable
sql_view_5 = "SELECT * FROM cdc_npaol_data WHERE Year BETWEEN 2009 AND 2011 ORDER BY Year;"

In [33]:
# Run query
view_data_5 = pd.read_sql(sql_view_5, conn)
# Displaying subset of data
view_data_5.head(10)

Unnamed: 0,Year,Quarter,LocationAbbr,LocationDesc,HealthTopic,PolicyTopic,Setting,Title,Status,Citation,StatusAltValue
0,2009,1,NY,New York,Obesity,Access to Drinking Water,School/After School,Healthy Schools Act,Dead,A7804,3
1,2009,1,MN,Minnesota,Physical Activity,Built Environment and Street-Scale Design,Community,Transportation,Dead,S1484,3
2,2009,1,MA,Massachusetts,Obesity,Appropriations,Community,Livable Communities and Zoning Reform Act,Dead,S97,3
3,2009,1,MA,Massachusetts,Obesity,Disparities/Equity,Community,School Nutrition,Dead,H2092,3
4,2009,1,IL,Illinois,Obesity,Appropriations,School/After School,Department of Juvenile Justice School District,Dead,H2254,3
5,2009,1,ME,Maine,Nutrition,Appropriations,Community,Soft Drinks,Dead,H511,3
6,2009,1,MA,Massachusetts,Nutrition,Food Security,Community,School Nutrition,Dead,H2092,3
7,2009,1,MN,Minnesota,Nutrition,Fruits and Vegetables,Community,State Government,Vetoed,S2081,2
8,2009,1,WA,Washington,Physical Activity,Safe Routes to Schools,School/After School,Student Transportation Funding,Dead,S5914,3
9,2009,1,AL,Alabama,Obesity,Appropriations,Community,Government Appropriations,Dead,H379,3


# Checking for <font color="red">Missing Values</font> Values 

<br>
<strong>SQL Syntax</strong><br>
SELECT column(s)<br>
FROM table
WHERE column <font color="red">IS NULL</font>;

 - Returns a dataset with missing values.  Entries with '0' are not missing values. '0' is a value.

In [36]:
# Save SQL query in variable
sql_view_6 = "SELECT * FROM cdc_npaol_data WHERE Year IS NULL;"

In [37]:
# Run query
view_data_6 = pd.read_sql(sql_view_6, conn)
# Displaying subset of data
view_data_6

Unnamed: 0,Year,Quarter,LocationAbbr,LocationDesc,HealthTopic,PolicyTopic,Setting,Title,Status,Citation,StatusAltValue


All instances in the Year column contain a value, so the output is empty.