# SQL Alchemy
---
# Useful resources
[SQL Alchemy documentation](https://docs.sqlalchemy.org/en/latest/core/sqlelement.html#module-sqlalchemy.sql.expression)
# Setup Dependencies

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

In [12]:
from sqlalchemy import create_engine 
from sqlalchemy import Table
from sqlalchemy import MetaData
from sqlalchemy import select
from sqlalchemy import and_
from sqlalchemy import desc
from sqlalchemy import func
from sqlalchemy import case, cast, Float

# Connect to remote database

In [3]:
dialect = 'mysql'
driver = 'pymysql'
username = 'student'
password = 'datacamp'
host = 'courses.csrrinzqubik.us-east-1.rds.amazonaws.com'
port = '3306'
db_name = 'census'

# AWS
connection_string = '{}+{}://{}:{}@{}:{}/{}'.format(
    dialect, driver, username, password, host, port, db_name)
# local
# connection_string = 'sqlite:///data/census.sqlite'
connection_string

'mysql+pymysql://student:datacamp@courses.csrrinzqubik.us-east-1.rds.amazonaws.com:3306/census'

In [4]:
engine = create_engine(connection_string)

# Use the .table_names() method on the engine to print the table names
print(engine.table_names())

['census', 'state_fact']


# Reflection
For automatic loading of tables SQL Alchemy uses reflection.

In [5]:
metadata = MetaData()

In [6]:
census = Table('census', metadata, autoload=True, autoload_with=engine)

In [7]:
repr(census)

"Table('census', MetaData(bind=None), Column('state', VARCHAR(length=30), table=<census>), Column('sex', VARCHAR(length=1), table=<census>), Column('age', INTEGER(display_width=11), table=<census>), Column('pop2000', INTEGER(display_width=11), table=<census>), Column('pop2008', INTEGER(display_width=11), table=<census>), schema=None)"

# Connection

In [9]:
connection = engine.connect()

# Calculating difference

In [10]:
# Build query to return state names by population difference from 2008 to 2000: stmt
statement = select([census.columns.state, (census.columns.pop2008-census.columns.pop2000).label('pop_change')])

# Append group by for the state: stmt
statement = statement.group_by(census.columns.state)

# Append order by for pop_change descendingly: stmt
statement = statement.order_by(desc('pop_change'))

# Return only 5 results: stmt
statement = statement.limit(5)

# Use connection to execute the statement and fetch all results
results = connection.execute(statement).fetchall()

# Print the state and population change for each record
for result in results:
    print('{}:{}'.format(result.state, result.pop_change))


Texas:40137
California:35406
Florida:21954
Arizona:14377
Georgia:13357


# Case, Cast and Float 

In [16]:
# Build an expression to calculate female population in 2000
female_pop2000 = func.sum(
    case([
        (census.columns.sex == 'F', census.columns.pop2000)
    ], else_=0))

# Cast an expression to calculate total population in 2000 to Float
total_pop2000 = cast(func.sum(census.columns.pop2000), Float)

# Build a query to calculate the percentage of females in 2000: stmt
statement = select([female_pop2000 / total_pop2000 * 100])

# Execute the query and store the scalar result: percent_female
percent_female = connection.execute(statement).scalar()

# Print the percentage
percent_female


50.7455

# Relationships

In [22]:
state_fact = Table('state_fact', metadata, autoload=True, autoload_with=engine)
repr(state_fact)

"Table('state_fact', MetaData(bind=None), Column('id', VARCHAR(length=256), table=<state_fact>), Column('name', VARCHAR(length=256), table=<state_fact>), Column('abbreviation', VARCHAR(length=256), table=<state_fact>), Column('country', VARCHAR(length=256), table=<state_fact>), Column('type', VARCHAR(length=256), table=<state_fact>), Column('sort', VARCHAR(length=256), table=<state_fact>), Column('status', VARCHAR(length=256), table=<state_fact>), Column('occupied', VARCHAR(length=256), table=<state_fact>), Column('notes', VARCHAR(length=256), table=<state_fact>), Column('fips_state', VARCHAR(length=256), table=<state_fact>), Column('assoc_press', VARCHAR(length=256), table=<state_fact>), Column('standard_federal_region', VARCHAR(length=256), table=<state_fact>), Column('census_region', VARCHAR(length=256), table=<state_fact>), Column('census_region_name', VARCHAR(length=256), table=<state_fact>), Column('census_division', VARCHAR(length=256), table=<state_fact>), Column('census_divisi

In [20]:
# Build a statement to join census and state_fact tables: stmt
statement = select([census.columns.pop2000, state_fact.columns.abbreviation])

# Execute the statement and get the first result: result
result = connection.execute(statement).first()

# Loop over the keys in the result object and print the key and value
for key in result.keys():
    print(key, getattr(result, key))


pop2000 89600
abbreviation IL


# Join select_from()
## table_name.join(table_name_to_join, t1.keys == t2.keys)

In [25]:
# automatically join based on sql table
statement = select([census, state_fact])

# join 
statement = statement.select_from(
    census.join(state_fact, census.columns.state == state_fact.columns.name))

result = connection.execute(statement).first()

# Loop over the keys in the result object and print the key and value
for key in result.keys():
    print(key, getattr(result, key))


state Illinois
sex M
age 0
pop2000 89600
pop2008 95012
id 13
name Illinois
abbreviation IL
country USA
type state
sort 10
status current
occupied occupied
notes 
fips_state 17
assoc_press Ill.
standard_federal_region V
census_region 2
census_region_name Midwest
census_division 3
census_division_name East North Central
circuit_court 7
