In [0]:
%python
# Set up and connect to Azure Data Lake Store Gen2

access_key = dbutils.widgets.get("access_key")
storage_account = dbutils.widgets.get("storage_account")
input_container_name = dbutils.widgets.get("input_container_name")
crm_input_path = dbutils.widgets.get("crm_input_path")

spark.conf.set(f"fs.azure.account.key.{storage_account}.dfs.core.windows.net", access_key)

file_path_crm = f"abfss://{input_container_name}@{storage_account}.dfs.core.windows.net/{crm_input_path}"

# Read the data from Azure Data Lake Store Gen2 
crm_df = spark.read.format("delta").load(file_path_crm)

In [0]:
#group the dataset by revenue_segment and aggregate total revenue and number of customers
from pyspark.sql.functions import round

revenue_segments = crm_df.groupBy("revenue_segment").agg({"total_revenue" : "sum", "msisdn" : "count"})
revenue_segments = revenue_segments.withColumnRenamed("sum(total_revenue)", "total_revenue").withColumnRenamed("count(msisdn)", "no_of_Customers")
revenue_segments = revenue_segments.withColumn("total_revenue", round("total_revenue", 2))

In [0]:
#group the dataset by iot_usage_tier and aggregate total revenue and number of customers
iot_usage_tier = crm_df.groupBy("iot_usage_segment").agg({"total_revenue":"sum","msisdn" : "count"})
iot_usage_tier = iot_usage_tier.withColumnRenamed("sum(total_revenue)", "total_revenue").withColumnRenamed("count(msisdn)", "no_of_customers")
iot_usage_tier = iot_usage_tier.withColumn("total_revenue", round("total_revenue", 2))


iot_usage_segment,total_revenue,no_of_customers
Heavy Usage,15986592.69,103287
Minimal Usage,313356.42,6712
Moderate Usage,1086826.21,12022


In [0]:
output_container_name = dbutils.widgets.get("output_container_name")

revenue_segments.write.format("delta").mode("overwrite").save(f"abfss://{output_container_name}@{storage_account}.dfs.core.windows.net/telecom_iot/revenue_segments")
iot_usage_tier.write.format("delta").mode("overwrite").save(f"abfss://{output_container_name}@{storage_account}.dfs.core.windows.net/telecom_iot/iot_usage_tier")