In [8]:
import psycopg2
import os

Add username and password information: 

In [9]:
from config import USER, PASSWORD

In [10]:
conn = psycopg2.connect(database='ecomm',
                        user=USER,
                        password=PASSWORD,
                        port=5433)

In [11]:
def getProductsByType(db_conn, prod_type):
    cursor = db_conn.cursor()
    sql_string = "SELECT * FROM products WHERE product_type = %s"
    cursor.execute(sql_string,[prod_type])
    return(cursor.fetchall())

In [12]:
all_ovens = getProductsByType(conn, 'oven')

In [13]:
all_ovens

[(1, 'Reflector oven', 'oven'),
 (2, 'Convection microwave', 'oven'),
 (15, 'Masonry oven', 'oven'),
 (24, 'Microwave oven', 'oven'),
 (32, 'Russian oven', 'oven'),
 (33, 'Clome oven', 'oven'),
 (34, 'Convection oven', 'oven'),
 (37, 'Beehive oven', 'oven'),
 (38, 'Toaster and toaster ovens', 'oven'),
 (41, 'Self-cleaning oven', 'oven'),
 (42, 'Wood-fired oven', 'oven'),
 (43, 'Kitchener range', 'oven'),
 (47, 'Oven', 'oven'),
 (48, 'Hot box (appliance)', 'oven'),
 (58, 'Tabun oven', 'oven'),
 (67, 'Chorkor oven', 'oven'),
 (72, 'Communal oven', 'oven'),
 (74, 'Halogen oven', 'oven'),
 (78, 'Stove', 'oven'),
 (80, 'Earth oven', 'oven')]

In [14]:
len(all_ovens)

20

Avoiding N+1 Queries - having to look up data from more than one table

We want to get info from product orders to see which all have ovens. 
Two ways to do this... 

In [15]:
cursor = conn.cursor()
fetchall_count = 0

for oven in all_ovens: 
    oven_id = oven[0]
    sql_string = "SELECT * FROM product_orders WHERE product_id = %s"
    cursor.execute(sql_string, [oven_id])
    oven_order = cursor.fetchall()
    fetchall_count += 1
    print(oven_order, "\n")
    print("fetchall_count:", fetchall_count)


[(4, 1, 1, Decimal('296.47')), (23, 1, 4, Decimal('30.99')), (57, 1, 4, Decimal('124.53')), (63, 1, 2, Decimal('131.79'))] 

fetchall_count: 1
[(5, 2, 2, Decimal('270.24')), (21, 2, 2, Decimal('196.39'))] 

fetchall_count: 2
[(9, 15, 3, Decimal('53.26')), (38, 15, 2, Decimal('299.62')), (41, 15, 2, Decimal('158.61'))] 

fetchall_count: 3
[(3, 24, 5, Decimal('131.20')), (32, 24, 5, Decimal('60.54')), (36, 24, 5, Decimal('93.13')), (49, 24, 4, Decimal('228.74')), (81, 24, 2, Decimal('283.90'))] 

fetchall_count: 4
[(24, 32, 3, Decimal('70.57'))] 

fetchall_count: 5
[(38, 33, 4, Decimal('236.11')), (40, 33, 1, Decimal('227.86')), (61, 33, 2, Decimal('45.44')), (82, 33, 3, Decimal('29.96'))] 

fetchall_count: 6
[(12, 34, 3, Decimal('251.16')), (28, 34, 4, Decimal('72.18')), (69, 34, 5, Decimal('73.06')), (71, 34, 2, Decimal('75.19'))] 

fetchall_count: 7
[(35, 37, 4, Decimal('211.71')), (49, 37, 1, Decimal('162.63')), (51, 37, 5, Decimal('271.15')), (61, 37, 2, Decimal('256.34'))] 

fetcha

This is very inefficient - it is going back and fetching every single iteration...

NOTE:   the %s indicates a string input

In [16]:
def getProductOrdersByType(db_conn, prod_type):
    cursor = db_conn.cursor()
    sql_string =    'SELECT po.*   \
                    FROM product_orders po \
                    INNER JOIN products p  \
                        ON po.product_id = p.product_id \
                    WHERE p.product_type = %s'
    cursor.execute(sql_string, [prod_type])
    return(cursor.fetchall())

In [17]:
getProductOrdersByType(conn, 'oven')

[(2, 58, 5, Decimal('102.97')),
 (2, 80, 1, Decimal('181.54')),
 (3, 24, 5, Decimal('131.20')),
 (4, 1, 1, Decimal('296.47')),
 (5, 2, 2, Decimal('270.24')),
 (8, 42, 2, Decimal('99.52')),
 (9, 15, 3, Decimal('53.26')),
 (12, 34, 3, Decimal('251.16')),
 (12, 48, 4, Decimal('20.08')),
 (14, 72, 2, Decimal('118.64')),
 (16, 47, 1, Decimal('246.68')),
 (18, 42, 1, Decimal('281.35')),
 (21, 2, 2, Decimal('196.39')),
 (21, 48, 5, Decimal('52.86')),
 (23, 1, 4, Decimal('30.99')),
 (23, 78, 3, Decimal('245.55')),
 (24, 32, 3, Decimal('70.57')),
 (24, 78, 4, Decimal('21.83')),
 (25, 58, 4, Decimal('85.03')),
 (26, 80, 3, Decimal('30.75')),
 (27, 74, 1, Decimal('202.45')),
 (28, 34, 4, Decimal('72.18')),
 (28, 58, 2, Decimal('289.46')),
 (32, 24, 5, Decimal('60.54')),
 (35, 37, 4, Decimal('211.71')),
 (36, 24, 5, Decimal('93.13')),
 (38, 15, 2, Decimal('299.62')),
 (38, 33, 4, Decimal('236.11')),
 (38, 58, 3, Decimal('255.12')),
 (40, 33, 1, Decimal('227.86')),
 (40, 58, 2, Decimal('247.79')),


When working with pulling from multiple tables, just use a join.

CHALLENGE: 
Write a function that executes a query to retrieve all product orders for 'fryer' with quantity > 3

In [18]:
def getProductOrdersOverThree(db_conn, prod_type):
    cursor = db_conn.cursor()
    sql_string =    '   SELECT po.* \
                        FROM product_orders po \
                            INNER JOIN products p \
                            ON po.product_id = p.product_id \
                        WHERE   po.quantity > %s \
                                AND p.product_type = %s'
    cursor.execute(sql_string, [3, prod_type])
    return(cursor.fetchall())

In [19]:
getProductOrdersOverThree(conn, 'fryer')

[(1, 3, 5, Decimal('72.69')),
 (11, 3, 4, Decimal('296.83')),
 (18, 26, 5, Decimal('253.36')),
 (53, 3, 5, Decimal('161.05')),
 (53, 17, 4, Decimal('125.62')),
 (59, 54, 5, Decimal('272.47')),
 (70, 3, 4, Decimal('261.91'))]

## Indexes!

- B-Tree (find key values we're interested in)
- Hash (hashing values)
- Bitmap (large volumes of rows, data warehouses and bigger)
- Specialized (geolocation, etc)


### B-Tree (Balanced Tree)

Top node splits, and all smaller nodes must add up to the top 
- most common
- large number of distinct values (high cardinality)
- rebalances as needed
- Time to access is based on the depth (log time of nodes in tree)

### Hash Index

- Function for mapping arbitrary length data to a fixed-size string
- Hash values vitrually unique
- Slight changes in input create a new hash
- ONLY used for equality comparisons
- Smaller size than B-Tree, so faster
- Advantage is size and may fit in memory

### Covering Index

Normal query processing
- Query plan builder determines which indexes to use
- Location of rows retrieved from index
- Rows retrieved from cache or persistent storage
- Filtering, joining, functions, and so on are applied
- Results returned

Covering indexes
- All columns referenced in query are in an index
- No need to retrieve data from table
- Saves a seek operation
- Thus, it bypasses the Rows Retrived from Cache or persistent storage step, which is very costly

### Indexing while loading
- Large number of rows inserted
- Each time a row is inserted, and index is updated (2 processes each time)
- Alternating Table and Index updating 

### Alternate Sequence of Operations
- Add the table completely, then insert data ...
- Drop index before bulk load
- Insert data
- Create index
More efficient and can take advantage of bulk load optimizations

### Avoiding Index Locks
Rebuild the index
- Index may become corrupted (from bugs or whatever)
- Index parameters change
- Fragmented pages in b-tree

During Indexing on Postgres
- Locks table for writes (INSERT, UPDATE, DELETE)
- Allows reads (SELECT)
- Can lead to index lock errors in production

To fix this
- CREATE INDEX CONCURRENTLY
- builds a new index on the table without blocking writes (has 2 indexes temporarily until the new one is established)


Challenge

Create 2 indexes, the first being a B-Tree for last/first name, and then second being a Hash index for product names

In [21]:
# CREATE INDEX idx_lname_fname 
# ON customers USING tree
# (last_name, first_name)

In [22]:
# CREATE INDEX idx_product_name
# ON products USING hash
# (product_name)

## Object Relational Mapping

- Relational database model entities using ordered set of attributes 
    - These attributes are tuples or rows
    - They are organized into collections called Tables
- Object Oriented models are based on Class and Instance
    - Classes describe the structure of an object
    - Instanceds are objects that store data about a particular entity

Similarities: 
- Both are used to represent entities
    - Relational: entity is described by a row, which has attributes
    - In OO models, entity is described by an instance, such as instance variables
    

## Introduction to SQLAlchemy
Object-Relational Mapping (ORM)

- Python SQL toolkit and ORM
- Tables are mapped to classes
- Performes many low-level tasks, but deves can still use SQL
- SQLAlchemy has both Core and ORM components

High-Level Operations
- Connecting to a db
- Declaring mapping
- Creating sessions
- Add, update, delete data, 
- Query data
- Commit and rollback 

1. builds updon DBAPI (Database Apis)
2. Generates SQL - maps from objects and functions to SQL statements
3. Abstracts DB specifics - more easily move between different relational DBs (postgres/mysql/server)



In [23]:
import psycopg2
import os

In [24]:
import sqlalchemy as db
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import Column, String, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker


In [26]:
# Create the database connection string
db_conn_string = 'postgresql+psycopg2://'+USER+':'+PASSWORD+'@localhost:5433/ecomm'

# Creater the engine
engine = create_engine(db_conn_string)

In [27]:
# Create the session function
Session = sessionmaker(engine)

# invoke the session
session = Session()

In [28]:
# Return the base class I can work with
base = declarative_base()

In [29]:
class Product(base):
    __tablename__ = 'products'
    product_id = Column(Integer, primary_key=True)
    product_name = Column(String)
    product_type = Column(String) 

In [30]:
products = session.query(Product)

In [31]:
products

<sqlalchemy.orm.query.Query at 0x2c423067348>

In [32]:
for product in products: 
    print(product.product_name)

Reflector oven
Convection microwave
Pressure fryer
Multicooker
Food steamer
Chapati maker
Mess kit
Rotisserie
Sous-vide cooker
Rocket mass heater
Cheesemelter
Hot plate
Flattop grill
Wet grinder
Masonry oven
Chocolatera
Turkey fryer
Bread machine
Roasting jack
Brasero (heater)
Susceptor
Slow cooker
Butane torch
Microwave oven
Solar cooker
Deep fryer
Popcorn maker
Russian oven
Clome oven
Convection oven
Beehive oven
Toaster and toaster ovens
Field kitchen
Corn roaster
Self-cleaning oven
Wood-fired oven
Kitchener range
Rice polisher
Soy milk maker
Crepe maker
Oven
Hot box (appliance)
Combi steamer
Rice cooker
Fire pot
Salamander broiler
Vacuum fryer
Fufu Machine
Tabun oven
Pancake machine
Barbecue grill
Panini sandwich grill
Air fryer
Chorkor oven
Communal oven
Pressure cooker
Halogen oven
Instant Pot
Waffle iron
Stove
Earth oven
Electric cooker
Espresso machine
Coffee pot


In [33]:
# How to filter, like using a Where clause
products = session.query(Product).filter(Product.product_type == 'fryer')

for product in products: 
    print(product.product_name)

Pressure fryer
Turkey fryer
Deep fryer
Vacuum fryer
Air fryer


### Limitations of ORMs

- When working with a complex data model
    - many joins
    - increasing complexity
    - May create performance issues
- Cross-database transactions
    - multiphase commits
- Fine-grained transaction control
    - rollback logic
- Query tuning
- Security considerations

These are all instances when using SQL are generally better than using an ORM


#### Challenge
Define a class using SQLAlchemy that corresponds to a suppliers table which has 4 columns from the Suppliers table:

In [None]:
class Supplier(base):
    __tablename__ = 'suppliers'   # Must always apply a tablename  
    supplier_id = Column(Integer, primary_key=True)
    supplier_name = Column(String)
    supplier_region = Column(String)
    supplier_level = Column(Integer)
