In [0]:
#Instalando Biblioteca holidays
!pip install holidays

In [0]:
#Import python
import holidays
import datetime as dt

In [0]:
#Import pyspark
from pyspark.sql.functions import when, col, lit, concat
from pyspark.sql.types import StructType, StructField, StringType, DateType, IntegerType

In [0]:
#Data de inicio e fim
start = dt.datetime.strptime("2021-01-01", "%Y-%m-%d")
end = dt.datetime.strptime("2100-01-01", "%Y-%m-%d")

#Gerando uma lista com um range de data do inicio até o fim
date_generated = [start + dt.timedelta(days=x) for x in range(0, (end-start).days)]

In [0]:
#Estruturando os dados do calendario
list_dates = []
for data in date_generated:
    list_dates.append((data.strftime("%Y%m%d"),
                  data.date(),
                  data.strftime("%d-%m-%Y"),
                  data.year,
                  data.month,
                  data.strftime("%d"),
                  data.strftime("%B %d, %Y"),
                  data.strftime("%A"),
                  data.strftime("%B"),
                  data.date().isocalendar()[1],
                  int(data.date() in holidays.Brazil()),
                  int(data.date() in holidays.US())))
    

In [0]:
#Criando o schema do dataframe
dcalendar_schema = StructType([
  StructField('SK_DATETIME', StringType(), True),
  StructField('USA_DATE', DateType(), True),
  StructField('BR_DATE', StringType(), True),
  StructField('YEAR', IntegerType(), True),
  StructField('MONTH', IntegerType(), True),
  StructField('DAY', StringType(), True),
  StructField('USA_DATE_NAME', StringType(), True),
  StructField('USA_DAY_OF_WEEK_NAME', StringType(), True),
  StructField('USA_MONTH_NAME', StringType(), True),
  StructField('WEEK_NUMBER', IntegerType(), True),
  StructField('BR_HOLYDAY', StringType(), True),
  StructField('USA_HOLYDAY', StringType(), True)])

In [0]:
#Criando o dataframe
dcalendar_df = spark.createDataFrame(data = list_dates, schema = dcalendar_schema)

In [0]:
#Gerando um nova coluna chamada 'BR_MONTH_NAME'que traduz os nomes dos meses para o portugues do Brasil
dcalendar_df = dcalendar_df.withColumn(
    'BR_MONTH_NAME', 
    when(col('MONTH')=='01','Janeiro').
    when(col('MONTH')=='02','Fevereiro').
    when(col('MONTH')=='03','Marco').
    when(col('MONTH')=='04','Abril').
    when(col('MONTH')=='05','Maio').
    when(col('MONTH')=='06','Junho').
    when(col('MONTH')=='07','Julho').
    when(col('MONTH')=='08','Agosto').
    when(col('MONTH')=='09','Setembro').
    when(col('MONTH')=='10','Outubro').
    when(col('MONTH')=='11','Novembro').
    when(col('MONTH')=='12','Dezembro').otherwise(col('MONTH')))

In [0]:
#Gerando um nova coluna chamada 'BR_DAY_OF_WEEK_NAME' que traduz os nomes dos dias para o portugues do Brasil
dcalendar_df = dcalendar_df.withColumn(
    'BR_DAY_OF_WEEK_NAME', 
    when(col('USA_DAY_OF_WEEK_NAME')=='Sunday','Segunda-feira').
    when(col('USA_DAY_OF_WEEK_NAME')=='Tuesday','Terca-Feira').
    when(col('USA_DAY_OF_WEEK_NAME')=='Wednesday','Quarta-feira').
    when(col('USA_DAY_OF_WEEK_NAME')=='Thursday','Quinta-feira').
    when(col('USA_DAY_OF_WEEK_NAME')=='Friday','Sexta-feira').
    when(col('USA_DAY_OF_WEEK_NAME')=='Saturday','Sabado').
    when(col('USA_DAY_OF_WEEK_NAME')=='Monday','Domingo').otherwise(col('USA_DAY_OF_WEEK_NAME')))

In [0]:
#Gerando um nova coluna chamada 'BR_DATE_NAME' que traduz os nomes das datas para o portugues do Brasil
dcalendar_df = dcalendar_df.withColumn('BR_DATE_NAME',
    concat(col('DAY'), lit(' '),col('BR_MONTH_NAME'),lit(' de '),col('YEAR')))

In [0]:
#Ordenando as colunas 
dcalendar_df = dcalendar_df.select(
    col('SK_DATETIME'),col('YEAR'),col('MONTH'),col('DAY').cast(IntegerType()),
    col('USA_DATE'),col('USA_DATE_NAME'),
    col('USA_DAY_OF_WEEK_NAME'),col('USA_MONTH_NAME'),col('USA_HOLYDAY'),
    col('BR_DATE'),col('BR_MONTH_NAME'),col('BR_DAY_OF_WEEK_NAME'),
    col('BR_DATE_NAME'),col('BR_HOLYDAY')
)

In [0]:
#Dataframe finalizado
display(dcalendar_df.limit(10))

SK_DATETIME,YEAR,MONTH,DAY,USA_DATE,USA_DATE_NAME,USA_DAY_OF_WEEK_NAME,USA_MONTH_NAME,USA_HOLYDAY,BR_DATE,BR_MONTH_NAME,BR_DAY_OF_WEEK_NAME,BR_DATE_NAME,BR_HOLYDAY
20210101,2021,1,1,2021-01-01,"January 01, 2021",Friday,January,1,01-01-2021,Janeiro,Sexta-feira,01 Janeiro de 2021,1
20210102,2021,1,2,2021-01-02,"January 02, 2021",Saturday,January,0,02-01-2021,Janeiro,Sabado,02 Janeiro de 2021,0
20210103,2021,1,3,2021-01-03,"January 03, 2021",Sunday,January,0,03-01-2021,Janeiro,Segunda-feira,03 Janeiro de 2021,0
20210104,2021,1,4,2021-01-04,"January 04, 2021",Monday,January,0,04-01-2021,Janeiro,Domingo,04 Janeiro de 2021,0
20210105,2021,1,5,2021-01-05,"January 05, 2021",Tuesday,January,0,05-01-2021,Janeiro,Terca-Feira,05 Janeiro de 2021,0
20210106,2021,1,6,2021-01-06,"January 06, 2021",Wednesday,January,0,06-01-2021,Janeiro,Quarta-feira,06 Janeiro de 2021,0
20210107,2021,1,7,2021-01-07,"January 07, 2021",Thursday,January,0,07-01-2021,Janeiro,Quinta-feira,07 Janeiro de 2021,0
20210108,2021,1,8,2021-01-08,"January 08, 2021",Friday,January,0,08-01-2021,Janeiro,Sexta-feira,08 Janeiro de 2021,0
20210109,2021,1,9,2021-01-09,"January 09, 2021",Saturday,January,0,09-01-2021,Janeiro,Sabado,09 Janeiro de 2021,0
20210110,2021,1,10,2021-01-10,"January 10, 2021",Sunday,January,0,10-01-2021,Janeiro,Segunda-feira,10 Janeiro de 2021,0
