In [4]:
!pip install faker polars spark

Collecting polars
  Downloading polars-1.32.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (15 kB)
Collecting spark
  Downloading spark-0.2.1.tar.gz (41 kB)
  Preparing metadata (setup.py) ... [?25ldone
Downloading polars-1.32.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.4/38.4 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hBuilding wheels for collected packages: spark
[33m  DEPRECATION: Building 'spark' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'spark'. Discussion can be found at https://github.com/pypa/pip/issues/6334[0m[33m
[0m  Building wheel for s

In [1]:
# --- Persiapan Lingkungan dan Direktori ---
import pandas as pd
import os
import random # Untuk injeksi "raw" data
from datetime import datetime, timedelta
from faker import Faker # Untuk injeksi "raw" data
import polars as pl # Jika Anda masih ingin generate data dengan Polars
from pyspark.sql.functions import col, to_timestamp
from pyspark.sql.functions import col, to_date


# Inisialisasi Faker jika Anda menghasilkan data di notebook ini
fake = Faker("id_ID")
Faker.seed(42)

base_output_dir = 'Data_Medalion_Architecture'
bronze_dir = os.path.join(base_output_dir, 'bronze')
silver_dir = os.path.join(base_output_dir, 'silver') # Mungkin tidak digunakan jika langsung ke Spark DB
gold_dir = os.path.join(base_output_dir, 'gold')     # Mungkin tidak digunakan jika langsung ke Spark DB

# Create local directories (for temporary CSVs if generated here)
os.makedirs(bronze_dir, exist_ok=True)
os.makedirs(silver_dir, exist_ok=True)
os.makedirs(gold_dir, exist_ok=True)

# --- Pastikan Database Spark Tersedia ---
# Asumsi SparkSession sudah aktif

# Bronze Layer Database
spark.sql("CREATE DATABASE IF NOT EXISTS brz_coffeeshop_db;")

# Silver Layer Database
spark.sql("CREATE DATABASE IF NOT EXISTS slv_coffeeshop_db;")

# Gold Layer Database
spark.sql("CREATE DATABASE IF NOT EXISTS gld_coffeeshop_db;")

print("Directories and Spark Databases ensured.")

Directories and Spark Databases ensured.


In [2]:
# --- Lapisan Bronze: Load CSV ke Spark Iceberg Tables ---

# Definisikan path ke CSV
transactions_csv_path = 'transactions.csv'
products_csv_path = 'products.csv'
stores_csv_path = 'stores.csv'
categories_csv_path = 'categories.csv'

# --- Load dan Simpan ke BRONZE SPARK DATABASE ---
spark.sql("USE brz_coffeeshop_db;")
print("Using Spark database: brz_coffeeshop_db")

# Fungsi untuk memuat CSV ke Spark dan menyimpannya sebagai tabel Iceberg
def load_csv_to_bronze_iceberg(file_path: str, table_name: str):
    print(f"\n--- Loading {file_path} into brz_coffeeshop_db.{table_name} (Bronze Layer) ---")
    
    full_table_name = f"brz_coffeeshop_db.{table_name}"
    
    # Hapus tabel Iceberg yang ada untuk menghindari konflik skema
    # terutama jika skema partisi berubah.
    spark.sql(f"DROP TABLE IF EXISTS {full_table_name};")

    # Baca CSV ke Spark DataFrame
    # inferSchema=True akan secara otomatis mendeteksi tipe data
    df_bronze = spark.read.csv(file_path, header=True, inferSchema=True)
    
    print(f"Schema for {table_name}:")
    df_bronze.printSchema()
    print(f"First 5 rows from {table_name}:")
    df_bronze.show(5)

    if table_name == "transactions":
        # Untuk tabel transactions, kita partisi berdasarkan kolom 'date'
        # yang sekarang ada di data.
        df_bronze.write.format("iceberg").mode("overwrite").partitionBy("date").saveAsTable(full_table_name)
    else:
        # Untuk tabel dimensi lainnya, kita tidak perlu partisi
        df_bronze.write.format("iceberg").mode("overwrite").saveAsTable(full_table_name)
    
    print(f"Data from {file_path} loaded to {full_table_name}.")

# Panggil fungsi untuk setiap file
load_csv_to_bronze_iceberg(stores_csv_path, "stores")
load_csv_to_bronze_iceberg(categories_csv_path, "categories")
load_csv_to_bronze_iceberg(products_csv_path, "products")
load_csv_to_bronze_iceberg(transactions_csv_path, "transactions")

# Verifikasi
spark.sql("SHOW TABLES IN brz_coffeeshop_db;").show()

Using Spark database: brz_coffeeshop_db

--- Loading stores.csv into brz_coffeeshop_db.stores (Bronze Layer) ---
Schema for stores:
root
 |-- store_id: integer (nullable = true)
 |-- store_name: string (nullable = true)
 |-- city_name: string (nullable = true)

First 5 rows from stores:
+--------+--------------------+--------------------+
|store_id|          store_name|           city_name|
+--------+--------------------+--------------------+
|       1|Jue Coffee Kuning...|Kota Jakarta Selatan|
|       2|Jue Coffee Grand ...|  Kota Jakarta Pusat|
|       3|Jue Coffee Senaya...|  Kota Jakarta Pusat|
|       4|Jue Coffee Pondok...|Kota Jakarta Selatan|
|       5|Jue Coffee Gandar...|Kota Jakarta Selatan|
+--------+--------------------+--------------------+
only showing top 5 rows



                                                                                

Data from stores.csv loaded to brz_coffeeshop_db.stores.

--- Loading categories.csv into brz_coffeeshop_db.categories (Bronze Layer) ---
Schema for categories:
root
 |-- category_id: integer (nullable = true)
 |-- category_name: string (nullable = true)

First 5 rows from categories:
+-----------+----------------+
|category_id|   category_name|
+-----------+----------------+
|          1|          Coffee|
|          2|      Non-Coffee|
|          3|          Snacks|
|          4|Pastries & Cakes|
|          5|  Breakfast Menu|
+-----------+----------------+
only showing top 5 rows

Data from categories.csv loaded to brz_coffeeshop_db.categories.

--- Loading products.csv into brz_coffeeshop_db.products (Bronze Layer) ---
Schema for products:
root
 |-- product_id: integer (nullable = true)
 |-- product_name: string (nullable = true)
 |-- category_id: integer (nullable = true)
 |-- unit_price: integer (nullable = true)
 |-- base_price: integer (nullable = true)

First 5 rows from produc

                                                                                

Schema for transactions:
root
 |-- transaction_id: string (nullable = true)
 |-- date: date (nullable = true)
 |-- store_id: integer (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- product_id: integer (nullable = true)
 |-- quantity: integer (nullable = true)
 |-- payment_method: string (nullable = true)
 |-- price: integer (nullable = true)

First 5 rows from transactions:
+--------------------+----------+--------+-----------+----------+--------+--------------+-----+
|      transaction_id|      date|store_id|customer_id|product_id|quantity|payment_method|price|
+--------------------+----------+--------+-----------+----------+--------+--------------+-----+
|b2b9437a-28df-4ec...|2023-07-20|      88|       NULL|       206|       1|         Gopay|36000|
|18c26797-6142-4a7...|2023-07-16|      89|       NULL|       503|       1|          Cash|55000|
|ec1b8ca1-f91e-4d4...|2023-07-17|      21|       NULL|       411|       1|          DANA|33000|
|0bbb2599-11ce-4dd...|2023-07

                                                                                

Data from transactions.csv loaded to brz_coffeeshop_db.transactions.
+-----------------+------------+-----------+
|        namespace|   tableName|isTemporary|
+-----------------+------------+-----------+
|brz_coffeeshop_db|  categories|      false|
|brz_coffeeshop_db|    products|      false|
|brz_coffeeshop_db|      stores|      false|
|brz_coffeeshop_db|transactions|      false|
+-----------------+------------+-----------+



In [3]:
# --- Lapisan Silver: Transformasi dan Load ke Silver Spark Database ---
spark.sql("CREATE DATABASE IF NOT EXISTS slv_coffeeshop_db;")
spark.sql("USE slv_coffeeshop_db;")
print("\nUsing Spark database: slv_coffeeshop_db")

from pyspark.sql.functions import col, to_date, regexp_replace, trim, upper, when, lit, coalesce, row_number
from pyspark.sql.types import IntegerType, DecimalType
from pyspark.sql.window import Window

# --- 1. Transformasi slv_stores (dimensi toko) ---
print("\n--- Transforming stores data into slv_stores ---")
df_bronze_stores = spark.table("brz_coffeeshop_db.stores")

df_slv_stores = df_bronze_stores.select(
    col("store_id").cast(IntegerType()).alias("store_id"),
    trim(col("store_name")).alias("store_name"),
    trim(col("city_name")).alias("city_name")
)

df_slv_stores.write.format("iceberg").mode("overwrite").saveAsTable("slv_stores")
print("slv_stores created.")

# --- 2. Transformasi slv_categories (dimensi kategori) ---
print("\n--- Transforming categories data into slv_categories ---")
df_bronze_categories = spark.table("brz_coffeeshop_db.categories")

df_slv_categories = df_bronze_categories.select(
    col("category_id").cast(IntegerType()).alias("category_id"),
    trim(col("category_name")).alias("category_name")
)

df_slv_categories.write.format("iceberg").mode("overwrite").saveAsTable("slv_categories")
print("slv_categories created.")

# --- 3. Transformasi slv_products (dimensi produk) ---
print("\n--- Transforming products data into slv_products ---")
df_bronze_products = spark.table("brz_coffeeshop_db.products")

df_slv_products = df_bronze_products.select(
    col("product_id").cast(IntegerType()).alias("product_id"),
    trim(col("product_name")).alias("product_name"),
    col("category_id").cast(IntegerType()).alias("category_id"),
    regexp_replace(col("unit_price"), '[^0-9]', '').cast(IntegerType()).alias("unit_price"),
    regexp_replace(col("base_price"), '[^0-9]', '').cast(IntegerType()).alias("base_price")
)

df_slv_products.write.format("iceberg").mode("overwrite").saveAsTable("slv_products")
print("slv_products created.")

# --- 4. Transformasi slv_transactions (fakta transaksi) ---
print("\n--- Transforming transactions data into slv_transactions ---")
df_bronze_transactions = spark.table("brz_coffeeshop_db.transactions")

# 1. Deduplicate Transactions
# Perbaiki: Ubah 'datetime' menjadi 'date'
window_spec_transactions = Window.partitionBy("transaction_id").orderBy(col("date").desc())
df_transactions_dedup = df_bronze_transactions.withColumn("row_num", row_number().over(window_spec_transactions)).filter(col("row_num") == 1).drop("row_num")

# 3. Recalculate/Clean Price and Cast other columns
df_slv_products_clean = spark.table("slv_coffeeshop_db.slv_products")

df_slv_transactions_final = df_transactions_dedup.alias("t").join(
    df_slv_products_clean.alias("p"),
    col("t.product_id").cast(IntegerType()) == col("p.product_id"),
    "left"
).select(
    col("t.transaction_id"),
    # Perbaiki: Ubah 'datetime' menjadi 'date' dan beri alias 'transaction_date'
    col("t.date").alias("transaction_date"),
    col("t.store_id").cast(IntegerType()).alias("store_id"),
    col("t.customer_id"),
    col("t.product_id").cast(IntegerType()).alias("product_id"),
    col("t.quantity").cast(IntegerType()).alias("quantity"),
    trim(col("t.payment_method")).alias("payment_method"),
    (col("t.quantity").cast(IntegerType()) * col("p.unit_price").cast(IntegerType())).alias("total_item_price")
)

# Menulis final cleaned transactions data ke Silver table
df_slv_transactions_final.write.format("iceberg").mode("overwrite").saveAsTable("slv_transactions")
print("slv_transactions created.")

spark.sql("SHOW TABLES IN slv_coffeeshop_db;").show()


Using Spark database: slv_coffeeshop_db

--- Transforming stores data into slv_stores ---
slv_stores created.

--- Transforming categories data into slv_categories ---
slv_categories created.

--- Transforming products data into slv_products ---
slv_products created.

--- Transforming transactions data into slv_transactions ---


                                                                                

slv_transactions created.
+-----------------+----------------+-----------+
|        namespace|       tableName|isTemporary|
+-----------------+----------------+-----------+
|slv_coffeeshop_db|  slv_categories|      false|
|slv_coffeeshop_db|    slv_products|      false|
|slv_coffeeshop_db|      slv_stores|      false|
|slv_coffeeshop_db|slv_transactions|      false|
+-----------------+----------------+-----------+



In [5]:
from pyspark.sql.functions import col, current_date, lit, sum, countDistinct, avg, to_date, year, month, dayofmonth, expr, sha2, concat, lit
from pyspark.sql.types import DateType
from datetime import datetime

# --- Lapisan Gold: Transformasi dan Load ke Gold Spark Database ---
spark.sql("CREATE DATABASE IF NOT EXISTS gld_coffeeshop_db;")
spark.sql("USE gld_coffeeshop_db;")
print("\nUsing Spark database: gld_coffeeshop_db")

# --- 1. dim_store (dimensi toko) ---
print("\n--- Transforming slv_stores into dim_store (Gold Layer) ---")
df_slv_stores = spark.table("slv_coffeeshop_db.slv_stores")

df_gld_dim_store = df_slv_stores.select(
    col("store_id"),
    col("store_name"),
    col("city_name"),
    current_date().alias("loaded_date")
)

surrogate_key_sql_expression_store = f"SHA2(CONCAT(CAST(store_id AS STRING), CAST(loaded_date AS STRING)), 256)"
df_gld_dim_store = df_gld_dim_store.withColumn(
    "dim_store_key", expr(surrogate_key_sql_expression_store)
)
df_gld_dim_store.write.format("iceberg").mode("overwrite").saveAsTable("dim_store")
print("dim_store created.")

# --- 2. dim_category (dimensi kategori) ---
print("\n--- Transforming slv_categories into dim_category (Gold Layer) ---")
df_slv_categories = spark.table("slv_coffeeshop_db.slv_categories")

df_gld_dim_category = df_slv_categories.select(
    col("category_id"),
    col("category_name"),
    current_date().alias("loaded_date")
)
surrogate_key_sql_expression_category = f"SHA2(CONCAT(CAST(category_id AS STRING), CAST(loaded_date AS STRING)), 256)"
df_gld_dim_category = df_gld_dim_category.withColumn(
    "dim_category_key", expr(surrogate_key_sql_expression_category)
)
df_gld_dim_category.write.format("iceberg").mode("overwrite").saveAsTable("dim_category")
print("dim_category created.")

# --- 3. dim_product (dimensi produk) ---
print("\n--- Transforming slv_products into dim_product (Gold Layer) ---")
df_slv_products = spark.table("slv_coffeeshop_db.slv_products")

df_gld_dim_product = df_slv_products.select(
    col("product_id"),
    col("product_name"),
    col("category_id"),
    col("unit_price"),
    col("base_price"),
    current_date().alias("loaded_date")
)
surrogate_key_sql_expression_product = f"SHA2(CONCAT(CAST(product_id AS STRING), CAST(loaded_date AS STRING)), 256)"
df_gld_dim_product = df_gld_dim_product.withColumn(
    "dim_product_key", expr(surrogate_key_sql_expression_product)
)
df_gld_dim_product.write.format("iceberg").mode("overwrite").saveAsTable("dim_product")
print("dim_product created.")


# --- 4. fact_sales (fakta penjualan) ---
print("\n--- Transforming slv_transactions into fact_sales (Gold Layer) ---")
df_slv_transactions = spark.table("slv_coffeeshop_db.slv_transactions")

df_gld_dim_store = spark.table("gld_coffeeshop_db.dim_store")
df_gld_dim_product = spark.table("gld_coffeeshop_db.dim_product")
df_gld_dim_category = spark.table("gld_coffeeshop_db.dim_category")

df_gld_fact_sales = df_slv_transactions.alias("t").join(
    df_gld_dim_store.alias("ds"), col("t.store_id") == col("ds.store_id"), "left"
).join(
    df_gld_dim_product.alias("dp"), col("t.product_id") == col("dp.product_id"), "left"
).join(
    df_gld_dim_category.alias("dc"), col("dp.category_id") == col("dc.category_id"), "left"
).select(
    col("t.transaction_id"),
    # Perbaikan di sini: Ganti "transaction_timestamp" dengan "transaction_date"
    col("t.transaction_date"),
    col("ds.dim_store_key"),
    col("t.customer_id"),
    col("dp.dim_product_key"),
    col("dc.dim_category_key"),
    col("t.quantity"),
    col("t.total_item_price"),
    col("t.payment_method"),
    current_date().alias("loaded_date")
)

df_gld_fact_sales.write.format("iceberg").mode("overwrite").saveAsTable("fact_sales")
print("fact_sales created.")


# --- 5. gold_daily_sales_summary (fakta agregat harian) ---
print("\n--- Aggregating fact_sales into gold_daily_sales_summary (Gold Layer) ---")

# Perbaikan di sini: Ganti "transaction_timestamp" dengan "transaction_date"
df_gold_daily_sales_summary = df_gld_fact_sales.groupBy(
    col("transaction_date").alias("sale_date")
).agg(
    sum("total_item_price").alias("total_revenue"),
    countDistinct("transaction_id").alias("total_transactions"),
    sum("quantity").alias("total_quantity_sold"),
    current_date().alias("loaded_date")
)

df_gold_daily_sales_summary.write.format("iceberg").mode("overwrite").saveAsTable("gold_daily_sales_summary")
print("gold_daily_sales_summary created.")

# --- 6. gold_product_performance (fakta agregat produk) ---
print("\n--- Aggregating fact_sales into gold_product_performance (Gold Layer) ---")

df_gld_fact_sales = spark.table("gld_coffeeshop_db.fact_sales")
df_gld_dim_product = spark.table("gld_coffeeshop_db.dim_product")
df_gld_dim_category = spark.table("gld_coffeeshop_db.dim_category")

df_gold_product_performance = df_gld_fact_sales.alias("fs").join(
    df_gld_dim_product.alias("dp"), col("fs.dim_product_key") == col("dp.dim_product_key"), "left"
).join(
    df_gld_dim_category.alias("dc"),
    col("dp.category_id") == col("dc.category_id"),
    "left"
).groupBy(
    col("dp.product_id"),
    col("dp.product_name"),
    col("dc.category_name")
).agg(
    sum("fs.quantity").alias("total_quantity_sold"),
    sum("fs.total_item_price").alias("total_revenue_from_product"),
    avg("dp.unit_price").alias("average_unit_price")
)

df_gold_product_performance.write.format("iceberg").mode("overwrite").saveAsTable("gold_product_performance")
print("gold_product_performance created.")

spark.sql("SHOW TABLES IN gld_coffeeshop_db;").show()


Using Spark database: gld_coffeeshop_db

--- Transforming slv_stores into dim_store (Gold Layer) ---
dim_store created.

--- Transforming slv_categories into dim_category (Gold Layer) ---
dim_category created.

--- Transforming slv_products into dim_product (Gold Layer) ---
dim_product created.

--- Transforming slv_transactions into fact_sales (Gold Layer) ---


                                                                                

fact_sales created.

--- Aggregating fact_sales into gold_daily_sales_summary (Gold Layer) ---


                                                                                

gold_daily_sales_summary created.

--- Aggregating fact_sales into gold_product_performance (Gold Layer) ---


                                                                                

gold_product_performance created.
+-----------------+--------------------+-----------+
|        namespace|           tableName|isTemporary|
+-----------------+--------------------+-----------+
|gld_coffeeshop_db|        dim_category|      false|
|gld_coffeeshop_db|         dim_product|      false|
|gld_coffeeshop_db|           dim_store|      false|
|gld_coffeeshop_db|          fact_sales|      false|
|gld_coffeeshop_db|gold_daily_sales_...|      false|
|gld_coffeeshop_db|gold_product_perf...|      false|
+-----------------+--------------------+-----------+



In [6]:
%%sql

show databases

namespace
brz_coffeeshop_db
brz_hospital_db
coffeeshop
gld_coffeeshop_db
gld_hospital_db
slv_coffeeshop_db
slv_hospital_db
coffeeshop_medalion


In [7]:
%%sql

use gld_coffeeshop_db


In [8]:
%%sql

show tables

namespace,tableName,isTemporary
gld_coffeeshop_db,dim_category,False
gld_coffeeshop_db,dim_product,False
gld_coffeeshop_db,dim_store,False
gld_coffeeshop_db,fact_sales,False
gld_coffeeshop_db,gold_daily_sales_summary,False
gld_coffeeshop_db,gold_product_performance,False


In [9]:
%%sql

select * from dim_category

category_id,category_name,loaded_date,dim_category_key
1,Coffee,2025-08-24,c171c6b2916db1f219a64ef9169fa0a8bb4d6ffed5358373d6f6ca17845d32f4
2,Non-Coffee,2025-08-24,d7284a34c0c6c6eb571897ec50a1b4d368fa56584d9761a05640f8a8dd34f98d
3,Snacks,2025-08-24,e8acf12538a4b8632be1dd51f97941d901955ad16f51ceade5d90fdcd0cb51bd
4,Pastries & Cakes,2025-08-24,08d147bc2ff77439989df091a3806ff3698e6831fb5f45812762f945bc47c176
5,Breakfast Menu,2025-08-24,afe068f7bc3fd0dd48105515b3438f3ba31a202b4087c12948e50ec74db66667
6,Lunch & Dinner,2025-08-24,3ce55d9f690943bbe7801f04ff7eacfcd91d18cb54ae40b6cab7290dda523777
7,Desserts,2025-08-24,f04f66e60bec4e7006540ab225ea35e773bcc79a36dd68ea9ecb67e433b6e43b
8,Merchandise,2025-08-24,fd234f917e6795a426d85a0d8b32fa797f8767ac0a56bae1dfc544985760ea5c
9,Brewing Equipment,2025-08-24,b0152885697ea4add316970a11f97852a201f9611caaeea206f405b3508be08f
10,Packaged Beans,2025-08-24,a1841529a13b41b1e76613c2e8c1e11bdf48c50cdeb9c2cc46bd2e09c24a64f4


In [10]:
%%sql

select * from dim_product

product_id,product_name,category_id,unit_price,base_price,loaded_date,dim_product_key
101,Kopi Telur Tradisional,1,18000,13320,2025-08-24,ace309ba10a5b90e6d83555c6ab8ed349e0e8d736ba1b042a6756262cbe99f69
102,Kopi Kelapa Khas Vietnam,1,22000,14960,2025-08-24,6475a141de3c438dc7112e3b2c34eb87937240d5a8db76ae0784d07f46877cc4
103,Kopi Vietnam Drip Original,1,20000,13600,2025-08-24,669cab0f52fd2fd4ecb8339bbba93ab5259cc32f77461f004e38055ec3306eaf
104,Kopi Butter Gurih,1,19000,12350,2025-08-24,b1f55fbd40b20d8eda049dcdda98dd2d64ae5cfbc0709016fcd21c13893452e2
105,Kopi Susu Kampung Kental Manis,1,15000,10800,2025-08-24,3982c838bcffdd975a44358cd695ed95b5162f23ec3d0cafb5edf56030cd1ead
106,Kopi Coklat Spesial,1,21000,15750,2025-08-24,d631ad5e0d0198ea22672257612289b273aef66e7a57122f2e93cbdcf75ef4ca
107,Es Kopi Susu Aren,1,23000,16100,2025-08-24,0f884f990dcd800d2621ff831b0a2c67760246debaecbddc673df8e2158ca397
108,Es Kopi Hitam Dingin,1,16000,11040,2025-08-24,4938ed72c650f1d43f415d38b8492a1fc59e87648005d6079bc1b4db8ff31c31
109,Es Kopi Hitam Lemon Segar,1,18000,12600,2025-08-24,30c6f3d24e39926f6694e7762b7b40da33714515fb6f2c1314fe0eeb9fda2c68
110,Drip Bag Coffee Lokal Blend,1,25000,17250,2025-08-24,c2f257efd9d3bd351f3bec6b30109f70204f9fa39e225cec292e8cf9eeb5de7c


In [11]:
%%sql

select * from dim_store

store_id,store_name,city_name,loaded_date,dim_store_key
1,Jue Coffee Kuningan City,Kota Jakarta Selatan,2025-08-24,c171c6b2916db1f219a64ef9169fa0a8bb4d6ffed5358373d6f6ca17845d32f4
2,Jue Coffee Grand Indonesia,Kota Jakarta Pusat,2025-08-24,d7284a34c0c6c6eb571897ec50a1b4d368fa56584d9761a05640f8a8dd34f98d
3,Jue Coffee Senayan City,Kota Jakarta Pusat,2025-08-24,e8acf12538a4b8632be1dd51f97941d901955ad16f51ceade5d90fdcd0cb51bd
4,Jue Coffee Pondok Indah Mall,Kota Jakarta Selatan,2025-08-24,08d147bc2ff77439989df091a3806ff3698e6831fb5f45812762f945bc47c176
5,Jue Coffee Gandaria City,Kota Jakarta Selatan,2025-08-24,afe068f7bc3fd0dd48105515b3438f3ba31a202b4087c12948e50ec74db66667
6,Jue Coffee Pacific Place,Kota Jakarta Selatan,2025-08-24,3ce55d9f690943bbe7801f04ff7eacfcd91d18cb54ae40b6cab7290dda523777
7,Jue Coffee Kota Kasablanka,Kota Jakarta Selatan,2025-08-24,f04f66e60bec4e7006540ab225ea35e773bcc79a36dd68ea9ecb67e433b6e43b
8,Jue Coffee Lotte Avenue,Kota Jakarta Selatan,2025-08-24,fd234f917e6795a426d85a0d8b32fa797f8767ac0a56bae1dfc544985760ea5c
9,Jue Coffee Plaza Senayan,Kota Jakarta Pusat,2025-08-24,b0152885697ea4add316970a11f97852a201f9611caaeea206f405b3508be08f
10,Jue Coffee Sarinah Thamrin,Kota Jakarta Pusat,2025-08-24,a1841529a13b41b1e76613c2e8c1e11bdf48c50cdeb9c2cc46bd2e09c24a64f4


In [12]:
%%sql

select * from fact_sales

transaction_id,transaction_date,dim_store_key,customer_id,dim_product_key,dim_category_key,quantity,total_item_price,payment_method,loaded_date
000107a3-d998-457b-94aa-edca3cda4a48,2023-07-11,d06caa12e32a792f9348232c19002e16e31285fe1942f8d2656fa6007c3429f2,,4b09fe46783da237bc37ce7c424f8ed49483345a5c92afafd49713efbd8cd3ef,c171c6b2916db1f219a64ef9169fa0a8bb4d6ffed5358373d6f6ca17845d32f4,2,64000,Ovo,2025-08-24
00034772-5f44-42c6-a2fc-51371f5aa398,2023-07-11,cbf824220ec3d2938a92005b51926c9ce4f2036ad248843786860e8ef5d793a3,,c515dcfca7c8aed1839f58e30c133c706002881d08d47a2c77dfef2010afdd98,d7284a34c0c6c6eb571897ec50a1b4d368fa56584d9761a05640f8a8dd34f98d,1,29000,Credit card,2025-08-24
00037c55-1777-48f5-a584-74e34eadc78b,2023-07-14,bc33f3e306e87f4df47a9cab29745d2d1a62d2b4c3c798777f58bcde2b6bf087,,91e291d3ce171a2167d0be0b46c66253b053d4e75adfc7796b8da9f41e5bbe3d,3ce55d9f690943bbe7801f04ff7eacfcd91d18cb54ae40b6cab7290dda523777,3,165000,ShopeePay,2025-08-24
00059b2c-d939-42b6-b762-3a832728d222,2023-07-04,2d443bb2ba39cdf786f365dc9724472126001f31ad213040dcd8bd4da53304ef,,a06948db0b8eedc3d434d5fb7e43da69b87bb6776eb3dcc2265d5158e3f1d732,a1841529a13b41b1e76613c2e8c1e11bdf48c50cdeb9c2cc46bd2e09c24a64f4,2,300000,QRIS,2025-08-24
0007e0ff-ba64-44d3-ad47-13e2e20ea00e,2023-07-21,f9cd7837ff1244b3c4f79b7ed91f5bd6a8a69ffc6f502f36f689111068aaffa5,,520762ba9b53a995f311606bf16d0a76a3fe4efb90f70d64bad7212d676509c0,d7284a34c0c6c6eb571897ec50a1b4d368fa56584d9761a05640f8a8dd34f98d,1,15000,Debit card,2025-08-24
0009d30c-9f1e-4017-a550-64bc4743d771,2023-07-24,864944560c10eeee2b29ad2ff4d75c3d10ad723963cd1894d440c913a4e0661a,,5dc4f92087b417a0a28a6f86123a5467b7aca141c9223c7b2bb0fd4f5d3ff8c7,e8acf12538a4b8632be1dd51f97941d901955ad16f51ceade5d90fdcd0cb51bd,3,66000,ShopeePay,2025-08-24
000d90b6-9944-4adc-83b7-4b6fbb2ecc9a,2023-07-05,864944560c10eeee2b29ad2ff4d75c3d10ad723963cd1894d440c913a4e0661a,,44bdb3a4acf71c7dfabf1f848252fabddc33d6b3a627b3df84c53a7e50019041,d7284a34c0c6c6eb571897ec50a1b4d368fa56584d9761a05640f8a8dd34f98d,2,70000,Credit card,2025-08-24
000e28eb-eed0-406f-959e-bad99b23a620,2023-07-03,a1841529a13b41b1e76613c2e8c1e11bdf48c50cdeb9c2cc46bd2e09c24a64f4,,c2f257efd9d3bd351f3bec6b30109f70204f9fa39e225cec292e8cf9eeb5de7c,c171c6b2916db1f219a64ef9169fa0a8bb4d6ffed5358373d6f6ca17845d32f4,2,50000,Debit card,2025-08-24
000f4c5c-351f-48a9-aa12-e7a215d2061a,2023-07-26,669cab0f52fd2fd4ecb8339bbba93ab5259cc32f77461f004e38055ec3306eaf,,91e291d3ce171a2167d0be0b46c66253b053d4e75adfc7796b8da9f41e5bbe3d,3ce55d9f690943bbe7801f04ff7eacfcd91d18cb54ae40b6cab7290dda523777,2,110000,Credit card,2025-08-24
000fc3c1-ce18-40fb-8bc0-791ab696c50a,2023-07-08,909a6db46a46330ec7136a804e1dc17df6a8f81b0bc6cdd0c2fa6bb69f392f69,,50b510bae41e4dcf71a7c3825cef381ee5ab1ac0120f09206da403066739e53d,3ce55d9f690943bbe7801f04ff7eacfcd91d18cb54ae40b6cab7290dda523777,1,95000,Cash,2025-08-24


In [13]:
%%sql

select * from gold_daily_sales_summary

sale_date,total_revenue,total_transactions,total_quantity_sold,loaded_date
2023-07-15,6131339000,73609,122884,2025-08-24
2023-07-29,3094671000,37189,61962,2025-08-24
2023-07-18,2059619000,24573,41053,2025-08-24
2023-07-04,2137552000,25245,42123,2025-08-24
2023-07-19,2059623000,24673,41416,2025-08-24
2023-07-07,2066095000,24711,41210,2025-08-24
2023-07-22,3091339000,37114,61588,2025-08-24
2023-07-12,4068564000,49133,81783,2025-08-24
2023-07-10,2014267000,24433,40675,2025-08-24
2023-07-02,3102719000,37302,62161,2025-08-24


In [14]:
%%sql

select * from gold_product_performance	

product_id,product_name,category_name,total_quantity_sold,total_revenue_from_product,average_unit_price
1004,Decaf Blend (250g),Packaged Beans,14526,1307340000,90000.0
802,Jue Coffee Tumbler (Large),Merchandise,14301,1716120000,120000.0
411,Scones with Jam & Cream,Pastries & Cakes,14486,478038000,33000.0
403,Almond Croissant,Pastries & Cakes,14595,510825000,35000.0
307,Onion Rings,Snacks,14611,394497000,27000.0
113,Latte Panas,Coffee,14553,407484000,28000.0
1003,Single Origin Robusta Lampung (250g),Packaged Beans,14495,1087125000,75000.0
120,Cold Brew Black,Coffee,14772,413616000,28000.0
123,Manual Brew V60,Coffee,14801,518035000,35000.0
414,Donat Gula,Pastries & Cakes,14299,257382000,18000.0
