In [0]:
import dlt
from pyspark.sql.functions import col

In [0]:
@dlt.table(
    name="bronze_members",
    comment="Ingested bulk members data.",    
    table_properties={
        "quality": "bronze",
        "pipelines.autoOptimize.managed": "true"
    }
)
def load_members():
    return (
        spark.read.format("csv")
            .option("header", True)
            .option("inferSchema", True)
            .load("/Volumes/capstone/medisure/volume/members.csv")
    )

In [0]:
@dlt.table(
    name="bronze_diagnosis_ref",
    comment="Ingested bulk diagnosis data.",
    table_properties={
        "quality": "bronze",
        "pipelines.autoOptimize.managed": "true"
    }
)
def load_diagnosis_ref():
    return (
        spark.read.format("csv")
            .option("header", True)
            .option("inferSchema", True)
            .load("/Volumes/capstone/medisure/volume/diagnosis_ref.csv")
    )

In [0]:
@dlt.table(
    name="bronze_claims_batch",
    comment="Ingested bulk claims data.",
    table_properties={
        "quality": "bronze",
        "pipelines.autoOptimize.managed": "true"
    }
)
def load_claims_batch():
    return (
        spark.read.format("csv")
            .option("header", True)
            .option("inferSchema", True)
            .load("/Volumes/capstone/medisure/volume/claims_batch.csv")
    )

In [0]:
@dlt.table(
    name="bronze_claims_stream",
    comment="Ingested new claims data using Autoloader as streaming source.",
    table_properties={
        "quality": "bronze",
        "pipelines.autoOptimize.managed": "true"
    }
)
def ingest_claims_stream():
    return (
        spark.readStream
            .format("cloudFiles")
            .option("cloudFiles.format", "json")
            .option("inferSchema", True)
            .load("/Volumes/capstone/medisure/volume/claims_stream/")
    )

In [0]:
@dlt.table(
    name="bronze_providers",
    comment="Ingested new providers data using Autoloader as streaming source.",
    table_properties={
        "quality": "bronze",
        "pipelines.autoOptimize.managed": "true"
    }
)
def ingest_providers():
    return (
        spark.readStream
            .format("cloudFiles")
            .option("cloudFiles.format", "json")
            .option("inferSchema", True)
            .load("/Volumes/capstone/medisure/volume/providers/")
    )