**Author: Miguel Félix**

Date: November 2024

###### Parameters definition for Calendar Start Date, Calendar End Date and Name of the table to be created in the lakehouse

In [57]:
beginDate = '2024-12-31'
endDate = '2024-12-31'
LakehouseTableName = 'Calendar'

StatementMeta(, 39b87a04-f300-4621-add5-05c2e21de50f, 59, Finished, Available, Finished)

###### Creation of Calendar date sequence

In [51]:
from pyspark.sql.functions import explode, sequence, to_date
from pyspark.sql.functions import to_date, year, quarter, month, dayofmonth, date_format, weekofyear

#beginDate = '2012-01-01'
#endDate = '2024-12-31'

(
  spark.sql(f"select explode(sequence(to_date('{beginDate}'), to_date('{endDate}'), interval 1 day)) as calendarDate")
    .createOrReplaceTempView('dates')
)

StatementMeta(, 39b87a04-f300-4621-add5-05c2e21de50f, 53, Finished, Available, Finished)

###### SQL code to confirm the date sequence created

In [52]:
%%sql
select * from dates
order by calendarDate asc

StatementMeta(, 39b87a04-f300-4621-add5-05c2e21de50f, 54, Finished, Available, Finished)

<Spark SQL result set with 1 rows and 1 fields>

###### SQL Code Sample using date sequence from previous cell

In [53]:
%%sql
select
    CalendarDate as Date,
    year(calendarDate) as Year,
    case when
        month(calendarDate) <4 then "Q1"
    when 
        month(calendarDate) <7 then "Q2"
    when 
        month(calendarDate) <10 then "Q3"
    else
        "Q4" end

         as Quarter,
    month(calendarDate) as Month_Number,
    date_format(calendarDate, 'MMMM') as Month,
    extract(week FROM calendarDate) Week,
    date_format(calendarDate, 'EEEE') as CalendarDay,
    dayofweek(calendarDate) AS WeekDay,
    concat(year(calendarDate) ,"-",  date_format(calendarDate, 'MM')) as Period
    
    from
    dates
    order by
        calendarDate

StatementMeta(, 39b87a04-f300-4621-add5-05c2e21de50f, 55, Finished, Available, Finished)

<Spark SQL result set with 1 rows and 9 fields>

###### SQL Code example for table generation (hard coded table name)

In [56]:
%%sql
CREATE OR REPLACE TABLE calendar

select
    CalendarDate as Date,
    year(calendarDate) as Year,
    case when
        month(calendarDate) <4 then "Q1"
    when 
        month(calendarDate) <7 then "Q2"
    when 
        month(calendarDate) <10 then "Q3"
    else
        "Q4" end

         as Quarter,
    month(calendarDate) as Month_Number,
    date_format(calendarDate, 'MMMM') as Month,
    extract(week FROM calendarDate) Week,
    date_format(calendarDate, 'EEEE') as CalendarDay,
    dayofweek(calendarDate) AS WeekDay,
    concat(year(calendarDate) ,"-",  date_format(calendarDate, 'MM')) as Period
    
from
    dates
order by
    calendarDate
;

StatementMeta(, 39b87a04-f300-4621-add5-05c2e21de50f, 58, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

###### SQL Code example for table generation (usage of parameter name of table)

In [47]:
df = spark.sql(f"""

CREATE OR REPLACE TABLE {LakehouseTableName}

select
    CalendarDate as Date,
    year(calendarDate) as Year,
    case when
        month(calendarDate) <4 then "Q1"
    when 
        month(calendarDate) <7 then "Q2"
    when 
        month(calendarDate) <10 then "Q3"
    else
        "Q4" end

         as Quarter,
    month(calendarDate) as Month_Number,
    date_format(calendarDate, 'MMMM') as Month,
    extract(week FROM calendarDate) Week,
    date_format(calendarDate, 'EEEE') as CalendarDay,
    dayofweek(calendarDate) AS WeekDay,
    concat(year(calendarDate) ,"-",  date_format(calendarDate, 'MM')) as Period
    
from
    dates
order by
    calendarDate """)


StatementMeta(, 39b87a04-f300-4621-add5-05c2e21de50f, 49, Finished, Available, Finished)

###### SQL Code to check data in table (hard coded table name)

In [25]:
df = spark.sql("SELECT COUNT(*) FROM LH_Fabric.calendar")
display(df)

StatementMeta(, 39b87a04-f300-4621-add5-05c2e21de50f, 27, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, de8543ec-ab73-49a3-b689-e21486e21495)

###### SQL Code to check data in table (usage of parameter name of table)

In [49]:
df = spark.sql(f"""SELECT * FROM LH_Fabric.{LakehouseTableName} order by Date desc LIMIT 1000 """)
display(df)

StatementMeta(, 39b87a04-f300-4621-add5-05c2e21de50f, 51, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, ced51ece-aca4-40ef-b9d8-e622370329f9)