# ETL the data from 3NF tables to Facts & Dimension Tables

In [23]:
%load_ext sql

DB_ENDPOINT = "127.0.0.1"
DB = 'pagila'
DB_USER = '*****'
DB_PASSWORD = '******'
DB_PORT = '5432'

# postgresql://username:password@host:port/database
conn_string = "postgresql://{}:{}@{}:{}/{}" \
                        .format(DB_USER, DB_PASSWORD, DB_ENDPOINT, DB_PORT, DB)

print(conn_string)

The sql extension is already loaded. To reload it, use:
  %reload_ext sql
postgresql://*****:******@127.0.0.1:5432/pagila


In [None]:
%sql $conn_string

## `Extract` data from the normalized database, `transform` it, and `load` it into the new tables. 

In [13]:
%%sql
INSERT INTO dimDate (date_key, date, year, quarter, month, day, week, is_weekend)
SELECT DISTINCT(TO_CHAR(payment_date :: DATE, 'yyyyMMDD')::integer) AS date_key,
       date(payment_date)                                           AS date,
       EXTRACT(year FROM payment_date)                              AS year,
       EXTRACT(quarter FROM payment_date)                           AS quarter,
       EXTRACT(month FROM payment_date)                             AS month,
       EXTRACT(day FROM payment_date)                               AS day,
       EXTRACT(week FROM payment_date)                              AS week,
       CASE WHEN EXTRACT(ISODOW FROM payment_date) IN (6, 7) THEN true ELSE false END AS is_weekend
FROM payment;

 * postgresql://postgres:***@127.0.0.1:5432/pagila
40 rows affected.


[]

In [14]:
%%sql
INSERT INTO dimCustomer (customer_key, customer_id, first_name, last_name, email, address, 
                         address2, district, city, country, postal_code, phone, active, 
                         create_date, start_date, end_date)
SELECT c.customer_id AS customer_key,
       c.customer_id AS customer_id,
       c.first_name  AS first_name,
       c.last_name   AS last_name,
       c.email       AS email,
       a.address     AS address,
       a.address2    AS address2,
       a.district    AS district,
       ci.city       AS city,
       co.country    AS country,
       a.postal_code AS postal_code,
       a.phone       AS phone,
       c.active      AS active,
       c.create_date AS creat_date,
       now()         AS start_date,
       now()         AS end_date
FROM customer c
JOIN address a  ON (c.address_id = a.address_id)
JOIN city ci    ON (a.city_id = ci.city_id)
JOIN country co ON (ci.country_id = co.country_id);

 * postgresql://postgres:***@127.0.0.1:5432/pagila
599 rows affected.


[]

In [16]:
%%sql
INSERT INTO dimMovie(movie_key, film_id, title, description, release_year, 
                     language, original_language, rental_duration, length, rating, 
                     special_features)
SELECT 
        f.film_id           AS movie_key,
        f.film_id           AS film_id,
        f.title             AS title,
        f.description       AS description,
        f.release_year      AS release_year,
        l.name              AS language,  
        orig_lang.name      AS original_language,
        f.rental_duration   AS rental_duration,
        f.length            AS length,
        f.rating            AS rating,
        f.special_features  AS special_features
FROM film f
JOIN language l              ON (f.language_id=l.language_id)
LEFT JOIN language orig_lang ON (f.original_language_id = orig_lang.language_id);

 * postgresql://postgres:***@127.0.0.1:5432/pagila
1000 rows affected.


[]

In [21]:
%%sql
INSERT INTO dimStore(store_key, store_id, address, address2, district, city, country, 
                     postal_code, manager_first_name, manager_last_name, 
                     start_date, end_date)
SELECT s.store_id  AS store_key,
       s.store_id  AS store_id,
       a.address   AS address,
       a.address2  AS address2,
       a.district  AS district,
       ci.city     AS city,
       co.country  AS country,
       a.postal_code AS postal_code,
       st.first_name AS manager_first_name,
       st.last_name  AS manager_last_name,
       now()         AS start_date,
       now()         AS end_date
FROM store s
JOIN staff st   ON (s.manager_staff_id = st.staff_id)
JOIN address a  ON (s.address_id = a.address_id)
JOIN city ci    ON (a.city_id = ci.city_id)
JOIN country co ON (ci.country_id = co.country_id);

 * postgresql://postgres:***@127.0.0.1:5432/pagila
2 rows affected.


[]

In [22]:
%%sql
INSERT INTO factSales(date_key, customer_key, movie_key, store_key, sales_amount)
SELECT TO_CHAR(p.payment_date :: DATE, 'yyyyMMDD')::integer AS date_key,
       p.customer_id  AS customer_key,
       i.film_id    AS movie_key,
       i.store_id   AS store_key,
       p.amount     AS sales_amount
FROM payment p
JOIN rental r    ON (p.rental_id = r.rental_id)
JOIN inventory i ON (r.inventory_id = i.inventory_id)

 * postgresql://postgres:***@127.0.0.1:5432/pagila
16049 rows affected.


[]