### Importing the Needed Modules

In [0]:
import sys
import os

# Absolute path to the repo root
PROJECT_ROOT = "/Workspace/Users/thiruvengadamk16@gmail.com/Retail-And-Ecommerce-Analytics-Platform"

# Add repo root to PYTHONPATH (only once)
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

# Debug checks (safe to remove later)
print("Current working directory:", os.getcwd())
print("Repo root added to path:", PROJECT_ROOT in sys.path)
from src.paths import *

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

import sys
sys.path.append('/Workspace/Users/thiruvengadamk16@gmail.com/Retail-And-Ecommerce-Analytics-Platform')

from src.paths import SILVER_EMPLOYEES_PATH, DIM_EMPLOYEES_PATH
from src.schema_definitions import DIM_EMPLOYEES_SCHEMA
from src.utils import add_gold_metadata
from delta.tables import DeltaTable

### Querying Silver Employees Table

In [0]:
emp_silver_df = spark.read.table(SILVER_EMPLOYEES_PATH)
emp_silver_df.limit(5).display()

### Dim_employees Schema Reference

In [0]:
DIM_EMPLOYEES_SCHEMA

### Selecting the Needed columns for dim_employees

In [0]:
emp_silver_df = emp_silver_df.select("employee_id", "store_id", "name", "position")

### Creating metadata columns : _created_at and _updated_at

In [0]:
dim_emp_df = add_gold_metadata(emp_silver_df)

### Creating Dim_employees Table with surrogate key

In [0]:
spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {DIM_EMPLOYEES_PATH} (
        employee_sk LONG GENERATED ALWAYS AS IDENTITY,
        employee_id INT,
        store_id INT,
        name STRING,
        position STRING,
        _created_at TIMESTAMP,
        _updated_at TIMESTAMP
    )
    USING DELTA
""")

### Updating the Dim_employees Table

In [0]:
dim_employee_tbl = DeltaTable.forName(spark, DIM_EMPLOYEES_PATH)

dim_employee_tbl.alias("tgt").merge(
    dim_emp_df.alias("src"),
    "tgt.employee_id = src.employee_id"
).whenMatchedUpdate(set={
    "tgt.store_id": "src.store_id",
    "tgt.name": "src.name",
    "tgt.position": "src.position",
    "tgt._updated_at": "src._updated_at"
}).whenNotMatchedInsert(values={
    "tgt.employee_id": "src.employee_id",
    "tgt.store_id": "src.store_id",
    "tgt.name": "src.name",
    "tgt.position": "src.position",
    "tgt._created_at": "src._created_at",
    "tgt._updated_at": "src._updated_at"
}).execute()

In [0]:
spark.read.table(DIM_EMPLOYEES_PATH).limit(5).display()

In [0]:
spark.read.table(DIM_EMPLOYEES_PATH).count()