In [0]:
%sql
use catalog sales;
select current_catalog();

Create schema if not exists bronze;

In [0]:
from pyspark.sql.functions import * 
from pyspark.sql.types import *

In [0]:
bronze_df = spark.read.format('parquet')\
    .option('inferSchema',True)\
        .load('abfss://bronze@dlsendtoendproject001.dfs.core.windows.net/raw_data')

display(bronze_df)

In [0]:
#for practice 
bronze_df.withColumn('Units_Sold',col('Units_Sold').cast('String')).display()

In [0]:
bronze_df = bronze_df.withColumn('model_category',split(col("Model_ID"),'-')[0]) 

In [0]:
#Arithmetic ops
bronze_df = bronze_df.withColumn('RevenuePerUnit',col('Revenue')/col('Units_Sold'))

In [0]:
#Some AD-HOC Analysis
#q Home many units sold for each branch for each year 

bronze_df.groupBy('BranchName', 'Year') \
    .agg(sum('Units_Sold').alias('TotalUnitsSoldPerYearPerBranch')) \
        .sort('Year','TotalUnitsSoldPerYearPerBranch',ascending = [1,0])\
            .display()

Databricks visualization. Run in Databricks to view.

In [0]:
display(bronze_df)

## Writing this bronze data into silver container -----

In [0]:
bronze_df.write.format('parquet')\
    .mode('overwrite')\
        .option('path','abfss://silver@dlsendtoendproject001.dfs.core.windows.net/silver_data')\
            .save()

Querying silver data

In [0]:
silver_df = spark.read.format('parquet')\
    .option('inferSchema',True)\
        .load('abfss://silver@dlsendtoendproject001.dfs.core.windows.net/silver_data')

display(silver_df)