In [0]:
query = f"""
WITH employees_cte AS (
    SELECT 
        emp_no, 
        full_name, 
        gender, 
        hire_date
    FROM employee_catalog.silver.employees e
),

salaries_cte AS (
    SELECT 
        emp_no, 
        salary, 
        title, 
        is_current_salary, 
        salary_tenure_years,
        from_date AS salary_start_date, 
        to_date AS salary_end_date,
        ROW_NUMBER() OVER (PARTITION BY emp_no ORDER BY to_date DESC) AS rn
    FROM employee_catalog.silver.salaries s
),

titles_cte AS (
    SELECT 
        emp_no, 
        title, 
        is_current_title, 
        title_tenure_Years, 
        from_date AS title_start_date, 
        to_date AS title_end_date,
        ROW_NUMBER() OVER (PARTITION BY emp_no ORDER BY to_date DESC) AS rn
    FROM employee_catalog.silver.titles t
),

dept_emp_cte AS (
    SELECT 
        emp_no, 
        dept_no AS employee_dept_no, 
        is_current_department, 
        department_tenure_Years, 
        from_date AS employee_dept_start_date, 
        to_date AS employee_dept_end_date,
        ROW_NUMBER() OVER (PARTITION BY emp_no ORDER BY to_date DESC) AS rn
    FROM employee_catalog.silver.employee_departmemnt de
),

departments_cte AS (
    SELECT 
        dept_no, 
        dept_name 
    FROM employee_catalog.silver.departments d
)

SELECT DISTINCT
    e.emp_no, 
    e.full_name,
    e.gender, 
    e.hire_date, 
    t.title,
    t.is_current_title,
    t.title_tenure_Years, 
    t.title_start_date, 
    t.title_end_date,
    t.rn as title_rn,
    s.salary, 
    s.salary_start_date, 
    s.salary_end_date,
    s.is_current_salary,
    s.salary_tenure_years,
    s.rn as salary_rn,
    de.employee_dept_no,
    de.is_current_department,
    de.department_tenure_Years,
    de.employee_dept_start_date,
    de.employee_dept_end_date,
    de.rn as department_rn,
    d.dept_no,
    d.dept_name AS employee_dept_name,
    CASE WHEN GREATEST(
            COALESCE(s.salary_end_date, '1900-01-01'), 
            COALESCE(t.title_end_date, '1900-01-01'), 
            COALESCE(de.employee_dept_end_date, '1900-01-01')
        ) = '9999-01-01' 
        THEN 'Current'
        ELSE 'Exited'
    END AS employment_status
FROM employees_cte e
LEFT JOIN salaries_cte s ON e.emp_no = s.emp_no 
LEFT JOIN titles_cte t ON e.emp_no = t.emp_no 
LEFT JOIN dept_emp_cte de ON e.emp_no = de.emp_no 
LEFT JOIN departments_cte d ON de.employee_dept_no = d.dept_no
ORDER BY e.emp_no;

"""


In [0]:
employee_details_df = spark.sql(query)
display(employee_details_df)

emp_no,full_name,gender,hire_date,title,is_current_title,title_tenure_Years,title_start_date,title_end_date,title_rn,salary,salary_start_date,salary_end_date,is_current_salary,salary_tenure_years,salary_rn,employee_dept_no,is_current_department,department_tenure_Years,employee_dept_start_date,employee_dept_end_date,department_rn,dept_no,employee_dept_name,employment_status
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,64961,2002-06-29,9999-01-01,Y,22.64,1,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,61633,2001-06-29,2002-06-29,N,1.0,2,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,59534,2000-06-29,2001-06-29,N,1.0,3,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,55956,1999-06-30,2000-06-29,N,1.0,4,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,54693,1998-06-30,1999-06-30,N,1.0,5,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,52930,1997-06-30,1998-06-30,N,1.0,6,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,51503,1996-06-30,1997-06-30,N,1.0,7,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,52001,1995-07-01,1996-06-30,N,1.0,8,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,48751,1994-07-01,1995-07-01,N,1.0,9,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current
100000,Hiroyasu Emden,M,1991-07-02,Senior Staff,Y,33.64,1991-07-02,9999-01-01,1,46991,1993-07-01,1994-07-01,N,1.0,10,d008,Y,33.64,1991-07-02,9999-01-01,1,d008,Research,Current


In [0]:
spark.sql("CREATE SCHEMA IF NOT EXISTS employee_catalog.gold")

DataFrame[]

In [0]:
employee_details_df.write \
    .option("mergeSchema", True) \
    .mode("overwrite") \
    .saveAsTable("employee_catalog.gold.employee_details")

In [0]:
employee_details_df.write \
    .format("parquet") \
    .mode("overwrite") \
    .save("mnt/gold/employee_details")