# Data Transformation

In [0]:
-- customer_dim

select
  customer_id,
  name as customer_name,
  email as email_address,
  city
from customers

In [0]:
-- normalized fact table orders_fact

select
  oi.item_id as line_item_id,
  oi.order_id,
  oi.product_id,
  oi.quantity,
  o.order_date,
  o.customer_id,
  pr.price as unit_price,
  round(pr.price * oi.quantity, 2) as total_price
from order_items as oi 
left join orders as o 
  on oi.order_id = o.order_id
left join products as pr 
  on oi.product_id = pr.product_id

In [0]:
-- products_dim

select
  product_id,
  product_name,
  category as product_category
from products

In [0]:
-- date_dim

select
  order_date as date,
  year(order_date) as year,
  month(order_date) as month,
  day(order_date) as day,
  weekofyear(order_date) as week_of_year,
  dayofweek(order_date) as day_of_week,
  case 
    when dayofweek(order_date) in (1, 7) then 'Weekend'
    else 'Weekday'
  end as day_type,
  date_format(order_date, 'MMMM') as month_text,
  date_format(order_date, 'EEEE') as day_of_week_text
from orders
group by order_date

In [0]:
create schema if not exists transformed_data;

create table if not exists transformed_data.products_dim as
select
  product_id,
  product_name,
  category as product_category
from workspace.default.products;

create table if not exists transformed_data.date_dim as
select
  order_date as date,
  year(order_date) as year,
  month(order_date) as month,
  day(order_date) as day,
  weekofyear(order_date) as week_of_year,
  dayofweek(order_date) as day_of_week,
  case 
    when dayofweek(order_date) in (1, 7) then 'Weekend'
    else 'Weekday'
  end as day_type,
  date_format(order_date, 'MMMM') as month_name,
  date_format(order_date, 'EEEE') as day_of_week_name
from workspace.default.orders
group by order_date;


create table if not exists transformed_data.orders_fact as
select
  oi.item_id as line_item_id,
  oi.order_id,
  oi.product_id,
  oi.quantity,
  o.order_date,
  o.customer_id,
  pr.price as unit_price,
  round(pr.price * oi.quantity, 2) as total_price
from workspace.default.order_items as oi 
left join workspace.default.orders as o 
  on oi.order_id = o.order_id
left join products as pr 
  on oi.product_id = pr.product_id;


create table if not exists transformed_data.customer_dim as
select
  customer_id,
  name as customer_name,
  email as email_address,
  city
from workspace.default.customers
