# 1_data_modeling_postgres_2_denormalization
<img src="https://upload.wikimedia.org/wikipedia/commons/2/29/Postgresql_elephant.svg" width="100" height="100">

In [None]:
# Import libraries
import psycopg2
from dotenv import load_dotenv
import os

# Load environment variables from the .env file
dotenv_path = "../.env"
load_dotenv()


# Retrieve database connection details from the .env file
user = os.getenv("postgres_username")
password = os.getenv("postgres_password")

# Connect to database
try: 
    conn = psycopg2.connect(f"host=127.0.0.1 dbname=music user={user} password={password}")
except psycopg2.Error as e: 
    print("Error: Could not make connection to the database")
    print(e)

# Get a cursor
try: 
    cur = conn.cursor()
except psycopg2.Error as e: 
    print("Error: Could not get cursor to the database")
    print(e)

# Set automatic commit to be true
conn.set_session(autocommit=True)

# Create these tables
<img src="images/1_data_modeling_postgres_2_denormalization_1.jpg" width="20%">

In [None]:
# CREATE TABLE
try: 
    cur.execute("CREATE TABLE IF NOT EXISTS transactions (transaction_id int, \
                                                           customer_name varchar, cashier_id int, \
                                                           year int);")
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

try: 
    cur.execute("CREATE TABLE IF NOT EXISTS albums_sold (album_id int, \
                                                           transaction_id int, \
                                                           album_name varchar);")
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

try: 
    cur.execute("CREATE TABLE IF NOT EXISTS employees (employee_id int, \
                                                           employee_name varchar);")
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

try: 
    cur.execute("CREATE TABLE IF NOT EXISTS sales (transaction_id int, \
                                                           amount_spent int);")
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

In [None]:
# INSERT INTO
try: 
    cur.execute("INSERT INTO transactions (transaction_id, customer_name, cashier_id, year) \
                 VALUES (%s, %s, %s, %s)", \
                 (1, "Amanda", 1, 2000))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute("INSERT INTO transactions (transaction_id, customer_name, cashier_id, year) \
                 VALUES (%s, %s, %s, %s)", \
                 (2, "Toby", 1, 2000))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute("INSERT INTO transactions (transaction_id, customer_name, cashier_id, year) \
                 VALUES (%s, %s, %s, %s)", \
                 (3, "Max", 2, 2018))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute("INSERT INTO albums_sold (album_id, transaction_id, album_name) \
                 VALUES (%s, %s, %s)", \
                 (1, 1, "Rubber Soul"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute("INSERT INTO albums_sold (album_id, transaction_id, album_name) \
                 VALUES (%s, %s, %s)", \
                 (2, 1, "Let It Be"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute("INSERT INTO albums_sold (album_id, transaction_id, album_name) \
                 VALUES (%s, %s, %s)", \
                 (3, 2, "My Generation"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute("INSERT INTO albums_sold (album_id, transaction_id, album_name) \
                 VALUES (%s, %s, %s)", \
                 (4, 3, "Meet the Beatles"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute("INSERT INTO albums_sold (album_id, transaction_id, album_name) \
                 VALUES (%s, %s, %s)", \
                 (5, 3, "Help!"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute("INSERT INTO employees (employee_id, employee_name) \
                 VALUES (%s, %s)", \
                 (1, "Sam"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute("INSERT INTO employees (employee_id, employee_name) \
                 VALUES (%s, %s)", \
                 (2, "Bob"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)    
    
try: 
    cur.execute("INSERT INTO sales (transaction_id, amount_spent) \
                 VALUES (%s, %s)", \
                 (1, 40))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)    
    
try: 
    cur.execute("INSERT INTO sales (transaction_id, amount_spent) \
                 VALUES (%s, %s)", \
                 (2, 19))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e) 

try: 
    cur.execute("INSERT INTO sales (transaction_id, amount_spent) \
                 VALUES (%s, %s)", \
                 (3, 45))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

In [None]:
# QUERY
print("Table: transactions\n")
try: 
    cur.execute("SELECT * FROM transactions;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

print("\nTable: albums_sold\n")
try: 
    cur.execute("SELECT * FROM albums_sold;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

print("\nTable: employees\n")
try: 
    cur.execute("SELECT * FROM employees;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()
    
print("\nTable: sales\n")
try: 
    cur.execute("SELECT * FROM sales;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

`JOIN` the 4 tables to get:  
- `transaction_id`  
- `customer_name`  
- `employee_name`  
- `year`  
- `album_name`  
- `amount_spent`  

In [None]:
try: 
    cur.execute("""
    SELECT transactions.transaction_id, customer_name, employees.employee_name,
           year, albums_sold.album_name, sales.amount_spent
    FROM ((transactions JOIN employees ON
           transactions.cashier_id = employees.employee_id) JOIN
           albums_sold ON albums_sold.transaction_id=transactions.transaction_id) JOIN
           sales ON transactions.transaction_id=sales.transaction_id;
    """)
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

# That's awesome, but JOINS are slow.
In a read-heavy workload that required low latency queries the aim is to have as little JOINS as possible.  
With denormalization we want to think about the queries we are running and how we can reduce our number of JOINS, even if that means duplicating data.

- Query 1: `SELECT transaction_id, customer_name, amount_sent FROM <min number of tables>`
- It should generate amount spent on each transaction by using as few tables possible.

- Query 2: `SELECT cashier_name, SUM(amount_spent) FROM <min number of tables> GROUP BY cashier_name`
- It should generate total sales by cashier.

#  Query 1 
`SELECT transaction_id, customer_name, amount_spent FROM <min number of tables>`

One way to do this would be to do a JOIN `sales` with `transactions` table, or with 0 JOINs, by adding `amount_spent` attribute to `transactions` table.  

<img src="images/1_data_modeling_postgres_2_denormalization_2.jpg" width="30%">

In [None]:
# CREATE TABLE
try: 
    cur.execute("CREATE TABLE IF NOT EXISTS transactions (transaction_id int, \
                                                           customer_name varchar, cashier_id int, \
                                                           year int, amount_spent int);")
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

# INSERT INTO
try: 
    cur.execute("INSERT INTO transactions (transaction_id, customer_name, cashier_id, year, amount_spent) \
                 VALUES (%s, %s, %s, %s, %s)", \
                 (1, "Amanda", 1, 2000, 40))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute("INSERT INTO transactions (transaction_id, customer_name, cashier_id, year, amount_spent) \
                 VALUES (%s, %s, %s, %s, %s)", \
                 (2, "Toby", 1, 2000, 19))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute("INSERT INTO transactions (transaction_id, customer_name, cashier_id, year, amount_spent) \
                 VALUES (%s, %s, %s, %s, %s)", \
                 (3, "Max", 2, 2018, 45))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

# QUERY
try: 
    cur.execute("SELECT transaction_id, customer_name, amount_spent FROM transactions")
        
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

If the output for the above cell is:  

`- (1, 'Amanda', 40)`  
`- (2, 'Toby', 19)`  
`- (3, 'Max', 45)`  

we can move on to Query 2.

# Query 2  
`select cashier_name, SUM(amount_spent) FROM <min number of tables> GROUP BY cashier_name` 

To avoid JOINs, create a new table with just the information we need.

<img src="images/1_data_modeling_postgres_2_denormalization_3.jpg" width="25%">

In [None]:
# CREATE TABLE
try: 
    cur.execute("CREATE TABLE IF NOT EXISTS cashier_sales (transaction_id int, \
                                                           cashier_name varchar, cashier_id int, \
                                                           amount_spent int);")
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)


# INSERT INTO
try: 
    cur.execute("INSERT INTO cashier_sales (transaction_id, cashier_name, cashier_id, amount_spent) \
                 VALUES (%s, %s, %s, %s)", \
                 (1, "Sam", 1, 40))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute("INSERT INTO cashier_sales (transaction_id, cashier_name, cashier_id, amount_spent) \
                 VALUES (%s, %s, %s, %s)", \
                 (2, "Sam", 1, 19))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute("INSERT INTO cashier_sales (transaction_id, cashier_name, cashier_id, amount_spent) \
                 VALUES (%s, %s, %s, %s)", \
                 (3, "Bob", 2, 45))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

# QUERY
try: 
    cur.execute("SELECT cashier_name, SUM(amount_spent) FROM cashier_sales GROUP BY cashier_name")
        
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

If the output for the above cell is:  

`- ('Sam', 59)`  
`- ('Bob', 45)`  

we're done for today. Drop everything and close down the cursor and connection.

In [None]:
# DROP TABLES
try: 
    cur.execute("DROP table transactions")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table albums_sold")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table employees")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table sales")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table transactions")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table cashier_sales")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)

In [None]:
# cursor, connection close
try: 
    cur.close()
    conn.close()
except psycopg2.Error as e:
    print(e)