### reading data from order event contsiner


In [0]:
from pyspark.sql.functions import col, from_json
from pyspark.sql.types import StructType, StringType, StructField, IntegerType, DateType, TimestampType

# Read Avro files
df = spark.read.format('avro') \
    .option('ignoreExtension', 'true') \
    .option("recursiveFileLookup", "true") \
    .load('abfss://usage-events@telcostoragelayer.dfs.core.windows.net/')

# Decode binary body
decoded_df = df.select(
    col("body").cast("string").alias("body_decoded")
)
# convert json data into data frame
json_schema=StructType([
    StructField("UsageID", StringType(), True),
    StructField("CustomerID", StringType(), True),
    StructField("DataUsedMB", IntegerType(), True),
    StructField("UsageDate", StringType(), True),  # You can cast to DateType later
    StructField("CallMinutes", IntegerType(), True),
    StructField("SMSCount", IntegerType(), True),
    StructField("RoamingMinutes",IntegerType(),True)
])
data=decoded_df.select(from_json(col("body_decoded"),json_schema).alias("data")).select("data.*").withColumnRenamed('UsageID','usage_id').withColumnRenamed('CustomerID','customer_id').withColumnRenamed('DataUsedMB','data_used_mb').withColumnRenamed('UsageDate','usage_date').withColumnRenamed('CallMinutes','call_minutes').withColumnRenamed('SMSCount','sms_count').withColumnRenamed('RoamingMinutes','roaming_minutes')
#display(data)
data=data.coalesce(1)
data.write.mode('overwrite').format('parquet').option('path','abfss://bronze@telcostoragelayer.dfs.core.windows.net/usage/').save()

In [0]:
data.display()
