# Autoloader to incrementally load the latest file

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number
from pyspark.sql.functions import current_timestamp, from_utc_timestamp

# read data from landing zone
dataset = ["fingrid_solar_power_generation_forecast_updated_every_15_minutes",
           "fingrid_wind_power_generation_forecast_updated_every_15_minutes",
           "Electricity_consumption_by_customer_type"]

for dataset in dataset:
    load_path = f"abfss://landing@fingridtest.dfs.core.windows.net/{dataset}/"

    df = spark.readStream.format("cloudFiles")\
                .option("cloudFiles.format", "json")\
                .option("cloudFiles.schemaLocation","abfss://bronze@fingridtest.dfs.core.windows.net/autoloader_checkpoint")\
                .option("cloudFiles.schemaEvolutionMode", "rescue")\
                .load(load_path)

    # add ingestion timestamp
    df = df.withColumn("ingestion_timestamp", current_timestamp())

    write_path = f"abfss://bronze@fingridtest.dfs.core.windows.net/{dataset}/"

    df.writeStream.trigger(availableNow=True)\
        .format("delta")\
        .outputMode("append")\
        .option("checkpointLocation", "abfss://bronze@fingridtest.dfs.core.windows.net/autoloader_checkpoint")\
        .start(write_path)