### Setup instructions
1. Create catelog
2. create schemas (Gold, silver, and bronze)
3. create tables for parent company

In [0]:
from pyspark.sql import functions as f
from pyspark.sql import types as t
import logging
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('fmcg').getOrCreate()

# setup loggers

In [0]:
%sql
-- create catelog and schema
CREATE CATALOG IF NOT EXISTS fmcg;
USE CATALOG fmcg;

CREATE SCHEMA IF NOT EXISTS gold;
CREATE SCHEMA IF NOT EXISTS silver;
CREATE SCHEMA IF NOT EXISTS bronze;

In [0]:
# build the date fact table

def build_dates_fact(start_date, end_date, intrval):

    # create sequence of dates (start and end)
    df_dates = (
        spark.sql(f"""
                SELECT explode(
                    sequence(
                    to_date('{start_date}'), 
                    to_date('{end_date}'),
                    interval 1 {intrval})
                ) as date
                """)
    )

    # create other date facts columns
    return (
        df_dates.withColumns({
            'DateKey': f.date_format('date', 'yyyyMM'),
            'Year': f.year('date'),
            'Month': f.month('date'),
            'Day': f.day('date'),
            'MonthName': f.date_format('date', 'MMMM'),
            'MonthShortName': f.date_format('date', 'MMM'),
            'Quarter': f.quarter('date'),
            'Week': f.weekofyear('date'),
            'DayOfWeek': f.dayofweek('date'),
            'DayOfWeekName': f.date_format('date', 'EEEE')
        })
    )
    

# call function
df_dates = build_dates_fact('2024-01-01', '2025-12-01', 'month')
#df_dates.limit(5).display()
df_dates.write.mode('overwrite').saveAsTable('fmcg.gold.fact_dates')
