### Example of Auto Loader with PySpark

In [0]:
spark.sql("USE CATALOG catalog")
spark.sql("USE schema")

DataFrame[]

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
schema = StructType([
   StructField("Id", IntegerType(), True),
   StructField("name", StringType(), True),
   StructField("age", IntegerType(), True),
   StructField("money", IntegerType(), True),
   StructField("sales", IntegerType(), True),
   StructField("units", IntegerType(), True),
])

In [0]:
# Define the path where new data files will arrive
path = "/Volumes/autoloader/json2/"
checkpoint = "/Volumes/autoloader/json2/checkpoint/"

# Define the Auto Loader options
autoLoaderOptions = {
  "cloudFiles.format": "json",
  "multiline": "true"
}
      
# Create a streaming DataFrame using Auto Loader
streamingDF = (
    spark.readStream
    .format("cloudFiles")
    .options(**autoLoaderOptions)
    .schema(schema)
    .load(path)
)

# Example processing logic
processedDF = streamingDF.select("Id","name", "age").na.drop()

# Start the Streaming Query
query = (
    processedDF.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", checkpoint)
    .toTable("autoloader2")
)


In [0]:
query.stop()

In [0]:
%sql
SELECT * FROM autoloader2

Id,name,age
10,Julia,26
11,Kevin,39
12,Laura,31
7,George,30
8,Hannah,28
9,Ian,36
1,Alice,25
2,Bob,32
3,Charlie,29
4,Diana,41
