### Creating Databases and Tables

### Creating Databases
- Varies by the database type
- Databases like PostgreSQL and MySQL have command line tools to initialize the database
- With SQLite, the `create_engine()` statement will create the database and file if they do not already exist

### Building a Table

In [1]:
from sqlalchemy import (Table, Column, String, Integer, Float, Boolean)
from sqlalchemy import create_engine, MetaData
engine = create_engine('sqlite:///:memory:')
metadata = MetaData()

In [2]:
# employees = Table('employees', metadata,
#                  Column('id', Integer()),
#                  Column('name', String(225)),
#                  Column('salary', DECIMAL()),
#                  Column('active', Boolean()))

# metadata.create_all(engine)
# engine.table_names()

### Creating Tables
- Still uses the Table object like we did for reflection
- Replaces the autoload keyword arguments with Column objects
- Creates the tables in the actual database by using the `create_all()` method on the MetaData instance
- Other tools are needed to handle database table updates, such as Alembic or raw SQL

### Creating Tables - Additional Column Options
- `unique` forces all values for the data in a column to be unique
- `nullable` determines if a column can be empty in a row
- `default` sets a default value if one isn't supplied

In [3]:
employees = Table('employees', metadata,
                 Column('id', Integer()),
                 Column('name', String(225), unique=True,
                       nullable=False),
                 Column('salary', Float(), default=100.00),
                 Column('active', Boolean(), default=True))

employees.constraints

{CheckConstraint(<sqlalchemy.sql.elements.BinaryExpression object at 0x0000000005AFEEF0>, name='_unnamed_', table=Table('employees', MetaData(bind=None), Column('id', Integer(), table=<employees>), Column('name', String(length=225), table=<employees>, nullable=False), Column('salary', Float(), table=<employees>, default=ColumnDefault(100.0)), Column('active', Boolean(), table=<employees>, default=ColumnDefault(True)), schema=None), _create_rule=<sqlalchemy.util.langhelpers.portable_instancemethod object at 0x0000000005B1E168>, _type_bound=True),
 PrimaryKeyConstraint(),
 UniqueConstraint(Column('name', String(length=225), table=<employees>, nullable=False))}

In [4]:
metadata.create_all(engine)
engine.table_names()

['employees']

### Inserting Data into a Table

### Adding Data to a Table
- Done with the `insert()` statement
- `Insert()` takes the table we are loading data into as the argument
- We add all the values we want to insert in with the `values` clause as `column=value` pairs
- Doesn't return any rows, so no need for a fetch method.

### Inserting One Row

In [5]:
connection = engine.connect()

In [6]:
from sqlalchemy import insert

stmt = insert(employees).values(id=1,
                               name='Jason',
                               salary=1.00,
                               active=True)

result_proxy = connection.execute(stmt)

In [7]:
print(result_proxy.rowcount)

1


### Inserting Multiple Rows
- Build an insert statement without any values
- Build a list of dictionaries that represent all the values clauses for the rows you want to insert
- Pass both the stmt and the values list to the execute method on connection

In [8]:
stmt = insert(employees)

values_list = [ 
            {'id': 2, 'name': 'Rebecca', 'salary': 2.00, 'active': True},
            {'id': 3, 'name': 'Bob', 'salary': 0.00, 'active': False}]

result_proxy = connection.execute(stmt, values_list)

print(result_proxy.rowcount)

2


In [9]:
from sqlalchemy import select
stmt = select([employees])
print(connection.execute(stmt).fetchall())

[(1, 'Jason', 1.0, True), (2, 'Rebecca', 2.0, True), (3, 'Bob', 0.0, False)]


### Removing Data from a Database

### Deleting Specific Rows

In [11]:
from sqlalchemy import delete, func
stmt = delete(employees).where(
        employees.columns.id==3)

result_proxy = connection.execute(stmt)

result_proxy.rowcount

1

### Deleting Data from a Table
- Done with the `delete()` statement
- `delete()` takes the table we are loading data into as the argument
- A `where()` clause is used to choose which rows to delete
- Hard to undo so BE CAREFUL!!!

In [12]:
from sqlalchemy import delete, func
stmt = select([func.count(employees.columns.id)])
connection.execute(stmt).scalar()
delete_stmt = delete(employees)
result_proxy = connection.execute(delete_stmt)
result_proxy.rowcount

2

### Dropping a Table Completely
- Uses `drop` method on the table
- Accepts the engine as an argument so it know where to remove the table from
- Won't remove it from the metadata until the python process is restarted

In [24]:
# employees.drop(engine)
# print(employees.exists(engine))

### Dropping all the Tables
- Uses the `drop_all()` method on MetaData

In [25]:
#metadata.drop_all(engine)
#engine.table_names()

In [13]:
census = Table('census', metadata,
            Column('state', String(50)), 
            Column('sex', String(10)), 
            Column('age', String(10)),
            Column('pop2000', String(50)),
            Column('pop2008', String(50)))

metadata.create_all(engine)
engine.table_names()

['census', 'employees']

In [14]:
import csv 

In [15]:
# Create a insert statement for census: stmt
stmt = insert(census)

# Create an empty list and zeroed row count: values_list, total_rowcount
values_list = []
total_rowcount = 0

# Enumerate the rows of csv_reader
with open('census.csv', newline='\n') as csvfile:
    csv_reader = csv.reader(csvfile, delimiter=',')
    for idx, row in enumerate(csv_reader):
        data = {'state': row[0], 'sex': row[1], 'age': row[2], 'pop2000': row[3], 'pop2008': row[4]}
        values_list.append(data)

        if idx % 51 == 0:
            results = connection.execute(stmt, values_list)
            total_rowcount += results.rowcount
            values_list = []
print(total_rowcount)


8722


In [16]:
stmt = select([census])
print(connection.execute(stmt).first())

('Illinois', 'M', '0', '89600', '95012')


### Updating Data in a Table
- Done with the `update` statement
- Similar to the insert statement but includes a `where` clause to determine what record will be updated
- We add all the values we want to update with the `values` clause as `column=value` pairs

In [17]:
from sqlalchemy import update

stmt = update(employees)

stmt = stmt.where(employees.columns.id == 3)

stmt = stmt.values(active=True)

result_proxy = connection.execute(stmt)

In [18]:
print(result_proxy.rowcount)

0


### Correlated Updates

In [19]:
from sqlalchemy import desc
new_salary = select([employees.columns.salary])
new_salary = new_salary.order_by(desc(
            employees.columns.salary)
            )

new_salary = new_salary.limit(1)
stmt = update(employees)
stmt = stmt.values(salary=new_salary)
result_proxy = connection.execute(stmt)
print(result_proxy.rowcount)

0


###  Correlated Updates
- Uses a `select()` statement to find the value for the column we are updating 
- Commonly used to update records to a maximum value or change a string to match an abbreviation from another table. 

### Updating Multiple Rows
- Build where clause that will select all the records you want to update

In [20]:
stmt = update(employees)

stmt = stmt.where(
        employees.columns.active == True
)

stmt = stmt.values(active=False, salary=0.00)

result_proxy = connection.execute(stmt)

print(result_proxy.rowcount)

0


In [21]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///census.sqlite')
connection = engine.connect()
from sqlalchemy import Table, MetaData, select
metadata = MetaData()
census = Table('census', metadata, autoload=True, autoload_with=engine)
state_fact = Table('state_fact', metadata, autoload=True, autoload_with=engine)

In [22]:
# Build a select statement: select_stmt
select_stmt = select([state_fact]).where(state_fact.columns.name == 'New York')

# Print the results of executing the select_stmt
print(connection.execute(select_stmt).fetchall())

# Build a statement to update the fips_state to 36: stmt
stmt = update(state_fact).values(fips_state = 36)

# Append a where clause to limit it to records for New York state
stmt = stmt.where(state_fact.columns.name == 'New York')

# Execute the statement: results
results = connection.execute(stmt)

# Print rowcount
print(results.rowcount)

# Execute the select_stmt again to view the changes
print(connection.execute(select_stmt).fetchall())

[('32', 'New York', 'NY', 'USA', 'state', '10', 'current', 'occupied', '', '36', 'N.Y.', 'II', '1', 'Northeast', '2', 'Mid-Atlantic', '2')]
1
[('32', 'New York', 'NY', 'USA', 'state', '10', 'current', 'occupied', '', '36', 'N.Y.', 'II', '1', 'Northeast', '2', 'Mid-Atlantic', '2')]


In [23]:
# Build a statement to update the notes to 'The Wild West': stmt
stmt = update(state_fact).values(notes = 'The Wild West')

# Append a where clause to match the West census region records
stmt = stmt.where(state_fact.columns.census_region_name == 'West')

# Execute the statement: results
results = connection.execute(stmt)

# Print rowcount
print(results.rowcount)

13


In [None]:
# # Build a statement to select name from state_fact: stmt
# fips_stmt = select([state_fact.columns.name])

# # Append a where clause to Match the fips_state to flat_census fips_code
# fips_stmt = fips_stmt.where(
#     state_fact.columns.fips_state == flat_census.columns.fips_code)

# # Build an update statement to set the name to fips_stmt: update_stmt
# update_stmt = update(fips_stmt).values(flat_census.columns.state_name = fips_stmt)

# # Execute update_stmt: results
# results = connection.execute(update_stmt)

# # Print rowcount
# print(results.rowcount)