# SQL Alchemy
---
# Useful resources
[SQL Alchemy documentation](https://docs.sqlalchemy.org/en/latest/core/sqlelement.html#module-sqlalchemy.sql.expression)
# Setup Dependencies

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
from sqlalchemy import create_engine 
from sqlalchemy import Table
from sqlalchemy import MetaData
from sqlalchemy import select
from sqlalchemy import and_
from sqlalchemy import desc
from sqlalchemy import func

# Connect to remote database

In [3]:
dialect = 'mysql'
driver = 'pymysql'
username = 'student'
password = 'datacamp'
host = 'courses.csrrinzqubik.us-east-1.rds.amazonaws.com'
port = '3306'
db_name = 'census'

# AWS
connection_string = '{}+{}://{}:{}@{}:{}/{}'.format(
    dialect, driver, username, password, host, port, db_name)
# local
# connection_string = 'sqlite:///data/census.sqlite'
connection_string

'mysql+pymysql://student:datacamp@courses.csrrinzqubik.us-east-1.rds.amazonaws.com:3306/census'

In [4]:
engine = create_engine(connection_string)

# Use the .table_names() method on the engine to print the table names
print(engine.table_names())

['census', 'state_fact']


# Reflection
For automatic loading of tables SQL Alchemy uses reflection.

In [5]:
metadata = MetaData()

In [6]:
census = Table('census', metadata, autoload=True, autoload_with=engine)

In [7]:
repr(census)

"Table('census', MetaData(bind=None), Column('state', VARCHAR(length=30), table=<census>), Column('sex', VARCHAR(length=1), table=<census>), Column('age', INTEGER(display_width=11), table=<census>), Column('pop2000', INTEGER(display_width=11), table=<census>), Column('pop2008', INTEGER(display_width=11), table=<census>), schema=None)"

# Connection

In [9]:
connection = engine.connect()

# Calculating difference

In [10]:
# Build query to return state names by population difference from 2008 to 2000: stmt
statement = select([census.columns.state, (census.columns.pop2008-census.columns.pop2000).label('pop_change')])

# Append group by for the state: stmt
statement = statement.group_by(census.columns.state)

# Append order by for pop_change descendingly: stmt
statement = statement.order_by(desc('pop_change'))

# Return only 5 results: stmt
statement = statement.limit(5)

# Use connection to execute the statement and fetch all results
results = connection.execute(statement).fetchall()

# Print the state and population change for each record
for result in results:
    print('{}:{}'.format(result.state, result.pop_change))


Texas:40137
California:35406
Florida:21954
Arizona:14377
Georgia:13357
