In [0]:
import dlt
from pyspark.sql.functions import current_timestamp, col, lit

# ========================================
# BRONZE LAYER - RAW IMDB DATA INGESTION (PARQUET)
# ========================================

# Volume path
VOLUME_PATH = "/Volumes/workspace/damg7370_imdb/volume_final_project"

# ========================================
# 1. TITLE BASICS
# ========================================
@dlt.table(
    name="raw_title_basics",
    comment="Raw title basics data from IMDB",
    table_properties={
        "delta.columnMapping.mode": "name",
        "quality": "bronze"
    }
)
def raw_title_basics():
    return (
        spark.read
            .format("parquet")
            .load(f"{VOLUME_PATH}/title.basics.parquet")
            .withColumn("load_dt", current_timestamp())
            .withColumn("loaded_by", lit("Julio Sepulveda"))
            .withColumn("source_file_name", lit("title.basics.parquet"))
    )

# ========================================
# 2. TITLE RATINGS
# ========================================
@dlt.table(
    name="raw_title_ratings",
    comment="Raw title ratings data from IMDB",
    table_properties={
        "delta.columnMapping.mode": "name",
        "quality": "bronze"
    }
)
def raw_title_ratings():
    return (
        spark.read
            .format("parquet")
            .load(f"{VOLUME_PATH}/title.ratings.parquet")
            .withColumn("load_dt", current_timestamp())
            .withColumn("loaded_by", lit("Julio Sepulveda"))
            .withColumn("source_file_name", lit("title.ratings.parquet"))
    )

# ========================================
# 3. NAME BASICS
# ========================================
@dlt.table(
    name="raw_name_basics",
    comment="Raw name basics data from IMDB",
    table_properties={
        "delta.columnMapping.mode": "name",
        "quality": "bronze"
    }
)
def raw_name_basics():
    return (
        spark.read
            .format("parquet")
            .load(f"{VOLUME_PATH}/name.basics.parquet")
            .withColumn("load_dt", current_timestamp())
            .withColumn("loaded_by", lit("Julio Sepulveda"))
            .withColumn("source_file_name", lit("name.basics.parquet"))
    )

# ========================================
# 4. TITLE CREW
# ========================================
@dlt.table(
    name="raw_title_crew",
    comment="Raw title crew data from IMDB",
    table_properties={
        "delta.columnMapping.mode": "name",
        "quality": "bronze"
    }
)
def raw_title_crew():
    return (
        spark.read
            .format("parquet")
            .load(f"{VOLUME_PATH}/title.crew.parquet")
            .withColumn("load_dt", current_timestamp())
            .withColumn("loaded_by", lit("Julio Sepulveda"))
            .withColumn("source_file_name", lit("title.crew.parquet"))
    )

# ========================================
# 5. TITLE PRINCIPALS
# ========================================
@dlt.table(
    name="raw_title_principals",
    comment="Raw title principals data from IMDB",
    table_properties={
        "delta.columnMapping.mode": "name",
        "quality": "bronze"
    }
)
def raw_title_principals():
    return (
        spark.read
            .format("parquet")
            .load(f"{VOLUME_PATH}/title.principals.parquet")
            .withColumn("load_dt", current_timestamp())
            .withColumn("loaded_by", lit("Julio Sepulveda"))
            .withColumn("source_file_name", lit("title.principals.parquet"))
    )

# ========================================
# 6. TITLE AKAS
# ========================================
@dlt.table(
    name="raw_title_akas",
    comment="Raw title akas (alternate titles) data from IMDB",
    table_properties={
        "delta.columnMapping.mode": "name",
        "quality": "bronze"
    }
)
def raw_title_akas():
    return (
        spark.read
            .format("parquet")
            .load(f"{VOLUME_PATH}/title.akas.parquet")
            .withColumn("load_dt", current_timestamp())
            .withColumn("loaded_by", lit("Julio Sepulveda"))
            .withColumn("source_file_name", lit("title.akas.parquet"))
    )

# ========================================
# 7. TITLE EPISODE
# ========================================
@dlt.table(
    name="raw_title_episode",
    comment="Raw title episode data from IMDB",
    table_properties={
        "delta.columnMapping.mode": "name",
        "quality": "bronze"
    }
)
def raw_title_episode():
    return (
        spark.read
            .format("parquet")
            .load(f"{VOLUME_PATH}/title.episode.parquet")
            .withColumn("load_dt", current_timestamp())
            .withColumn("loaded_by", lit("Julio Sepulveda"))
            .withColumn("source_file_name", lit("title.episode.parquet"))
    )