In [0]:
query = f"""
SELECT 
    t.emp_no,
    e.full_name,
    t.from_date AS title_start_date,
    t.to_date AS title_end_date,
    LAG(t.title) OVER (PARTITION BY t.emp_no ORDER BY t.from_date) AS previous_title,
    t.title as current_title,
    CASE 
        WHEN LAG(t.title) OVER (PARTITION BY t.emp_no ORDER BY t.from_date) IS NULL THEN 'Initial Title'
        WHEN t.title <> LAG(t.title) OVER (PARTITION BY t.emp_no ORDER BY t.from_date) THEN 'Promoted'
        ELSE 'No Change'
    END AS promotion_status,
    CASE 
        WHEN t.to_date = '9999-01-01' 
        THEN 'Current'
        ELSE 'Exited'
    END AS employment_status
FROM employee_catalog.silver.titles t
LEFT JOIN employee_catalog.silver.employees e 
ON t.emp_no = e.emp_no
ORDER BY
    1

"""


In [0]:
promotion_history_df = spark.sql(query)
display(promotion_history_df)

emp_no,full_name,title_start_date,title_end_date,previous_title,current_title,promotion_status,employment_status
100000,Hiroyasu Emden,1991-07-02,9999-01-01,,Senior Staff,Initial Title,Current
100000,Hiroyasu Emden,1991-07-02,9999-01-01,Senior Staff,Senior Staff,No Change,Current
100001,Jasminko Antonakopoulos,1999-07-17,9999-01-01,,Engineer,Initial Title,Current
100001,Jasminko Antonakopoulos,1999-07-17,9999-01-01,Engineer,Engineer,No Change,Current
100002,Claudi Kolinko,1988-02-20,9999-01-01,,Senior Staff,Initial Title,Current
100002,Claudi Kolinko,1988-02-20,9999-01-01,Senior Staff,Senior Staff,No Change,Current
100003,Marsja Trogemann,1987-08-26,1995-08-26,,Engineer,Initial Title,Exited
100003,Marsja Trogemann,1987-08-26,1995-08-26,Engineer,Engineer,No Change,Exited
100003,Marsja Trogemann,1995-08-26,1997-06-08,Engineer,Senior Engineer,Promoted,Exited
100003,Marsja Trogemann,1995-08-26,1997-06-08,Senior Engineer,Senior Engineer,No Change,Exited


In [0]:
spark.sql("CREATE SCHEMA IF NOT EXISTS employee_catalog.gold")

DataFrame[]

In [0]:
promotion_history_df.write \
    .option("mergeSchema", True) \
    .mode("overwrite") \
    .saveAsTable("employee_catalog.gold.promotion_history")

In [0]:
promotion_history_df.write \
    .format("parquet") \
    .mode("overwrite") \
    .save("mnt/gold/promotion_history")