In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import DeltaTable
from datetime import datetime

In [0]:
incremental_load = ['orders','payments']
fullbatch_load = ['customers','products','categories']

### paths

In [0]:
base_path = '/Volumes/main/volume/task/auto_loader_1/raw/'
checkpoint_path = '/Volumes/main/volume/task/auto_loader_1/checkpoint/'
schema_path = '/Volumes/main/volume/task/auto_loader_1/schema/'
bronze_table = 'main.bronze_al'

### full batch


In [0]:
for table in fullbatch_load:
    df = spark.read.format('csv')\
        .option('header',True)\
        .option('inferSchema',True)\
        .load(f'{base_path}{table}')\
        .withColumn('ingest_time',current_timestamp())\
        .withColumn('source_file',col('_metadata.file_path'))
        
    
    df.write.format('delta').mode('overwrite')\
        .option('overwriteSchema',True)\
        .saveAsTable(f'{bronze_table}.{table}')

### incremental

In [0]:
for table in incremental_load:
    df = spark.readStream.format('cloudFiles')\
        .option('cloudFiles.format','csv')\
        .option('header',True)\
        .option('cloudFiles.inferColumnTypes',True)\
        .option('cloudFiles.schemaLocation',f'{schema_path}{table}')\
        .load(f'{base_path}{table}')\
        .withColumn('ingest_time',current_timestamp())\
        .withColumn('source_file',col('_metadata.file_path'))

    df.writeStream.format('delta').outputMode('append')\
        .option('checkpointLocation',f'{checkpoint_path}{table}')\
        .option('mergeSchema',True)\
        .trigger(availableNow = True)\
        .table(f'{bronze_table}.{table}')