# Identify first available weekdays and save them as JSON.

In [0]:
%run "../utils/mount_configuration"

In [0]:
tickets_price_df = spark.read \
    .format("delta") \
    .load(f"{processed_folder_path}/tickers_price")

In [0]:
from pyspark.sql.functions import date_format, year, weekofyear, when, col, row_number
from pyspark.sql.window import Window

# Creating 'priority' column (Monday -> 1, Tuesday -> 2, etc.)
tickets_price_df = tickets_price_df \
    .withColumn("day_of_week", date_format("date", "E")) \
    .withColumn("year", year("date")) \
    .withColumn("week_of_year", weekofyear("date")) \
    .withColumn("priority",
        when(col("day_of_week") == "Mon", 1)
        .when(col("day_of_week") == "Tue", 2)
        .when(col("day_of_week") == "Wed", 3)
        .when(col("day_of_week") == "Thu", 4)
        .when(col("day_of_week") == "Fri", 5)
        .when(col("day_of_week") == "Sat", 6)
        .when(col("day_of_week") == "Sun", 7))

window_spec = Window.partitionBy("year", "week_of_year").orderBy("priority", "date")

# Assign rank to each row per week and select the first available trading day.
final_result = tickets_price_df \
    .withColumn("rank", row_number().over(window_spec)) \
    .filter(col("rank") == 1) \
    .select("year", "week_of_year", "date")

# display(final_result)

In [0]:
final_result.write \
    .mode("overwrite") \
    .format("json") \
    .saveAsTable("engineering_presentation.ticker_details_dates")

In [0]:
%sql
SELECT * FROM engineering_presentation.ticker_details_dates;