In [0]:
#Instalando Biblioteca holidays
!pip install holidays

In [0]:
#Import python
import holidays
import datetime as dt
import locale

In [0]:
#Import pyspark
from pyspark.sql.functions import when, col, lit, concat
from pyspark.sql.types import StructType, StructField, StringType, DateType, IntegerType

In [0]:
#Data de inicio e fim
start = dt.datetime.strptime("2020-01-01", "%Y-%m-%d")
end = dt.datetime.strptime("2100-01-01", "%Y-%m-%d")

#Gerando uma lista com um range de data do inicio até o fim
date_generated = [start + dt.timedelta(days=x) for x in range(0, (end-start).days)]

In [0]:
# Meses em português
month_br = {1:'Janeiro',2:'Fevereiro',3:'Marco',4:'Abril',5:'Maio',6:'Junho',
            7:'Julho',8:'Agosto',9:'Setembro',10:'Outubro',11:'Novembro',12:'Dezembro'}

In [0]:
# Dia da semana em português
week_br = {'Monday':'Segunda-feira','Tuesday':'Terça-feira','Wednesday':'Quarta-feira',
           'Thursday':'Quinta-feira','Friday':'Sexta-feira','Saturday':'Sabado','Sunday':'Domingo'}

In [0]:
#Estruturando os dados do calendario
list_dates = []
for data in date_generated:
    list_dates.append((data.strftime("%Y%m%d"),
                  data.date(),
                  data.strftime("%d-%m-%Y"),
                  data.year,
                  data.month,
                  data.strftime("%d"),
                  data.strftime("%A"),
                  data.strftime("%B"),
                  data.date().isocalendar()[1],
                  int(data.date() in holidays.Brazil()),
                  int(data.date() in holidays.US())))

In [0]:
#Traduzindo os meses de ingles para português
dates = [day + (month_br[day[4]], week_br[day[6]]) for day in list_dates]

In [0]:
#Criando o schema do dataframe
dcalendar_schema = StructType([
  StructField('SK_DATETIME', StringType(), True),
  StructField('DATE', DateType(), True),
  StructField('BR_DATE', StringType(), True),
  StructField('YEAR', IntegerType(), True),
  StructField('MONTH', IntegerType(), True),
  StructField('DAY', StringType(), True),
  StructField('USA_DAY_OF_WEEK_NAME', StringType(), True),
  StructField('USA_MONTH_NAME', StringType(), True),
  StructField('WEEK_NUMBER', IntegerType(), True),
  StructField('BR_HOLIDAY', IntegerType(), True),
  StructField('USA_HOLIDAY', IntegerType(), True),
  StructField('BR_MONTH_NAME', StringType(), True),
  StructField('BR_DAY_OF_WEEK_NAME', StringType(), True)])

In [0]:
#Criando o dataframe
dcalendar_df = spark.createDataFrame(data = dates, schema = dcalendar_schema)

In [0]:
#Ordenando as colunas 
dcalendar_df = dcalendar_df.select(
    col('SK_DATETIME'),
    col('YEAR'),
    col('MONTH'),
    col('DAY').cast(IntegerType()),
    col('WEEK_NUMBER'),
    col('DATE'),
    col('USA_DAY_OF_WEEK_NAME'),
    col('USA_MONTH_NAME'),col('USA_HOLIDAY'),
    col('BR_DATE'),
    col('BR_DAY_OF_WEEK_NAME'),
    col('BR_MONTH_NAME'),col('BR_HOLIDAY')
)

In [0]:
display(dcalendar_df.limit(10))

SK_DATETIME,YEAR,MONTH,DAY,WEEK_NUMBER,DATE,USA_DAY_OF_WEEK_NAME,USA_MONTH_NAME,USA_HOLIDAY,BR_DATE,BR_DAY_OF_WEEK_NAME,BR_MONTH_NAME,BR_HOLIDAY
20200101,2020,1,1,1,2020-01-01,Wednesday,January,1,01-01-2020,Quarta-feira,Janeiro,1
20200102,2020,1,2,1,2020-01-02,Thursday,January,0,02-01-2020,Quinta-feira,Janeiro,0
20200103,2020,1,3,1,2020-01-03,Friday,January,0,03-01-2020,Sexta-feira,Janeiro,0
20200104,2020,1,4,1,2020-01-04,Saturday,January,0,04-01-2020,Sabado,Janeiro,0
20200105,2020,1,5,1,2020-01-05,Sunday,January,0,05-01-2020,Domingo,Janeiro,0
20200106,2020,1,6,2,2020-01-06,Monday,January,0,06-01-2020,Segunda-feira,Janeiro,0
20200107,2020,1,7,2,2020-01-07,Tuesday,January,0,07-01-2020,Terça-feira,Janeiro,0
20200108,2020,1,8,2,2020-01-08,Wednesday,January,0,08-01-2020,Quarta-feira,Janeiro,0
20200109,2020,1,9,2,2020-01-09,Thursday,January,0,09-01-2020,Quinta-feira,Janeiro,0
20200110,2020,1,10,2,2020-01-10,Friday,January,0,10-01-2020,Sexta-feira,Janeiro,0
