# dim_daten luonti
- Oma jobi -> päivittymään päivittäin

In [0]:
from pyspark.sql import functions as F
from delta.tables import DeltaTable
from datetime import datetime, date
from pyspark.sql import Window

In [0]:
parameter_table = spark.table('vr_hopea.pipeline_parameters')

dim_date_is_first_run = (
    parameter_table    
    .filter(parameter_table.param_name == 'dim_date_is_first_run')
    .select('param_value')
    .first()[0]
)

print(f'Parametri: {dim_date_is_first_run = }')

Parametri: dim_date_is_first_run = 1


In [0]:
if dim_date_is_first_run == 1:
    min_date = datetime.strptime('2025-01-01', '%Y-%m-%d').date()
    print(min_date)

2025-01-01


In [0]:
max_date = date.today()

In [0]:
if dim_date_is_first_run == 1:
    days = (max_date - min_date).days


In [0]:
if dim_date_is_first_run == 1:
    dim_date = (
        spark.range(0, days + 1)
        .withColumn('date', F.expr(f'date_add("{min_date}", cast(id as int))'))
        .withColumn('year', F.year('date'))
        .withColumn('month', F.month('date'))
        .withColumn('day', F.dayofmonth('date'))
        .withColumn('weekday', F.dayofweek('date'))
        .withColumn('weekday_name', F.date_format('date', 'EEEE'))
        .withColumn('is_weekend', (F.col('weekday').isin(1, 7)))
        .withColumn('week_number', F.weekofyear('date'))
        .withColumn('quarter', F.quarter('date'))
        .withColumn('updated_at', F.current_timestamp())
        .withColumn('date_id', F.row_number().over(Window.orderBy('date')))
        .drop('id')
    )

    dim_date.write.option('overwriteSchema', 'true').mode('overwrite').saveAsTable('vr_hopea.dim_date')
    
    # Change pipeline parameter value
    delta = DeltaTable.forName(spark, 'vr_hopea.pipeline_parameters')
    delta.update(
        condition = "param_name = 'dim_date_is_first_run'", set = {'param_value': F.lit(0)}
    )
    
else:
    row_number = spark.table('vr_hopea.dim_date').agg(F.max('date_id').alias('max_date_id')).first()['max_date_id'] + 1
    new_date = (
        spark.range(1).select(
            max_date.alias('date'),
            F.year(max_date).alias('year'),
            F.month(max_date).alias('month'),
            F.dayofmonth(max_date).alias('day'),
            F.dayofweek(max_date).alias('weekday'),
            F.date_format(max_date, 'EEEE').alias('weekday_name'),
            F.dayofweek(max_date).isin(1, 7).alias('is_weekend'),
            F.weekofyear(max_date).alias('week_number'),
            F.quarter(max_date).alias('quarter'),
            F.current_timestamp().alias('updated_at'),
            F.lit(row_number).alias('date_id')
        )
    )

    new_date.write.mode('append').saveAsTable('vr_hopea.dim_date')




In [0]:
display(dim_date.orderBy(F.desc('date_id')).limit(10))



date,year,month,day,weekday,weekday_name,is_weekend,week_number,quarter,updated_at,date_id
2026-02-24,2026,2,24,3,Tuesday,False,9,1,2026-02-24T08:14:13.884Z,420
2026-02-23,2026,2,23,2,Monday,False,9,1,2026-02-24T08:14:13.884Z,419
2026-02-22,2026,2,22,1,Sunday,True,8,1,2026-02-24T08:14:13.884Z,418
2026-02-21,2026,2,21,7,Saturday,True,8,1,2026-02-24T08:14:13.884Z,417
2026-02-20,2026,2,20,6,Friday,False,8,1,2026-02-24T08:14:13.884Z,416
2026-02-19,2026,2,19,5,Thursday,False,8,1,2026-02-24T08:14:13.884Z,415
2026-02-18,2026,2,18,4,Wednesday,False,8,1,2026-02-24T08:14:13.884Z,414
2026-02-17,2026,2,17,3,Tuesday,False,8,1,2026-02-24T08:14:13.884Z,413
2026-02-16,2026,2,16,2,Monday,False,8,1,2026-02-24T08:14:13.884Z,412
2026-02-15,2026,2,15,1,Sunday,True,7,1,2026-02-24T08:14:13.884Z,411


In [0]:
display(parameter_table)

id,param_name,param_value,updated_at
2,dim_date_is_first_run,0,2026-02-22T12:39:41.454Z
1,is_first_run,1,2026-02-19T09:58:38.443Z
