### reading data from device event contsiner


In [0]:
from pyspark.sql.functions import col, from_json
from pyspark.sql.types import StructType, StringType, StructField, IntegerType, DateType, TimestampType

# Read Avro files
df = spark.read.format('avro') \
    .option('ignoreExtension', 'true') \
    .option("recursiveFileLookup", "true") \
    .load('abfss://device-events@telcostoragelayer.dfs.core.windows.net/')

# Decode binary body
decoded_df = df.select(
    col("body").cast("string").alias("body_decoded")
)
# convert json data into data frame
json_schema=StructType([
    StructField("device_id", StringType(), False),      # unique device id
    StructField("customer", IntegerType(), False),   # linked customer id
    StructField("imei_number", StringType(), True),     # IMEI of the device
    StructField("devicebrand", StringType(), True),    # e.g. Samsung, Apple, OnePlus
    StructField("model", StringType(), True),    # model name
    StructField("os", StringType(), True),             # OS version
    StructField("sim_number", StringType(), True),      # SIM card number
    StructField("imsi", StringType(), True),           # international mobile subscriber identity
    StructField("network_type", StringType(), True),    # 3G, 4G, 5G
    StructField("activation_date", DateType(), True),   # when device/SIM was activated
    StructField("status", StringType(), True),         # Active, Suspended, Deactivated
    StructField("last_synced", TimestampType(), True)   # last sync timestamp
])
data=decoded_df.select(from_json(col("body_decoded"),json_schema).alias("data")).select("data.*").withColumnRenamed('customer','customer_id')
#display(data)
data=data.coalesce(1)
data.write.mode('overwrite').format('parquet').option('path','abfss://bronze@telcostoragelayer.dfs.core.windows.net/device/').save()

In [0]:
data.display()
