In [0]:
dbutils.fs.ls('mnt/silver/Sales/')      #Connecting to silver container

[FileInfo(path='dbfs:/mnt/silver/Sales/CountryRegionCurrency/', name='CountryRegionCurrency/', size=0, modificationTime=1712187216000),
 FileInfo(path='dbfs:/mnt/silver/Sales/CreditCard/', name='CreditCard/', size=0, modificationTime=1712187234000),
 FileInfo(path='dbfs:/mnt/silver/Sales/Currency/', name='Currency/', size=0, modificationTime=1712187237000),
 FileInfo(path='dbfs:/mnt/silver/Sales/CurrencyRate/', name='CurrencyRate/', size=0, modificationTime=1712187240000),
 FileInfo(path='dbfs:/mnt/silver/Sales/Customer/', name='Customer/', size=0, modificationTime=1712187243000),
 FileInfo(path='dbfs:/mnt/silver/Sales/PersonCreditCard/', name='PersonCreditCard/', size=0, modificationTime=1712187246000),
 FileInfo(path='dbfs:/mnt/silver/Sales/SalesOrderDetail/', name='SalesOrderDetail/', size=0, modificationTime=1712187248000),
 FileInfo(path='dbfs:/mnt/silver/Sales/SalesOrderHeader/', name='SalesOrderHeader/', size=0, modificationTime=1712187251000),
 FileInfo(path='dbfs:/mnt/silver/S

In [0]:
dbutils.fs.ls('mnt/gold/')        #Connecting to gold container. At present, it will return nothing as its empty. 

[FileInfo(path='dbfs:/mnt/gold/Sales/', name='Sales/', size=0, modificationTime=1712212590000)]

In [0]:
input_path = '/mnt/silver/Sales/Customer'       #We have the taken the input path of the silver container

In [0]:
df = spark.read.format('delta').load(input_path)      #Creating a PySpark dataframe for the input file

In [0]:
df.show(5)     

+----------+--------+-------+-----------+-------------+--------------------+------------+
|CustomerID|PersonID|StoreID|TerritoryID|AccountNumber|             rowguid|ModifiedDate|
+----------+--------+-------+-----------+-------------+--------------------+------------+
|         1|    NULL|    934|          1|   AW00000001|3f5ae95e-b87d-4ae...|  2014-09-12|
|         2|    NULL|   1028|          1|   AW00000002|e552f657-a9af-4a7...|  2014-09-12|
|         3|    NULL|    642|          4|   AW00000003|130774b1-db21-4ef...|  2014-09-12|
|         4|    NULL|    932|          4|   AW00000004|ff862851-1daa-404...|  2014-09-12|
|         5|    NULL|   1026|          4|   AW00000005|83905bdc-6f5e-4f7...|  2014-09-12|
+----------+--------+-------+-----------+-------------+--------------------+------------+
only showing top 5 rows



In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, regexp_replace

#Obtaining list of columns
column_name = df.columns


for old_col_name in column_name:
    #Convert ColumnName -> Column_Name format
    new_col_name = "".join(["_" + char if char.isupper() and not old_col_name[i - 1].isupper() else char for i, char in enumerate(old_col_name)]).lstrip("_")

    #Change the column name using withColumnRename and regexp.replace
    df = df.withColumnRenamed(old_col_name, new_col_name)

In [0]:
df.show(5)

+-----------+---------+--------+------------+--------------+--------------------+-------------+
|Customer_ID|Person_ID|Store_ID|Territory_ID|Account_Number|             rowguid|Modified_Date|
+-----------+---------+--------+------------+--------------+--------------------+-------------+
|          1|     NULL|     934|           1|    AW00000001|3f5ae95e-b87d-4ae...|   2014-09-12|
|          2|     NULL|    1028|           1|    AW00000002|e552f657-a9af-4a7...|   2014-09-12|
|          3|     NULL|     642|           4|    AW00000003|130774b1-db21-4ef...|   2014-09-12|
|          4|     NULL|     932|           4|    AW00000004|ff862851-1daa-404...|   2014-09-12|
|          5|     NULL|    1026|           4|    AW00000005|83905bdc-6f5e-4f7...|   2014-09-12|
+-----------+---------+--------+------------+--------------+--------------------+-------------+
only showing top 5 rows



# Doing transformation for all tables (Changing column names)

In [0]:
table_name = []

# Iterating through the silver container, getting the directory name, and appending it to the table name array.
for i in dbutils.fs.ls('mnt/silver/Sales/'):        
    table_name.append(i.name.split('/')[0])

In [0]:
table_name

['CountryRegionCurrency',
 'CreditCard',
 'Currency',
 'CurrencyRate',
 'Customer',
 'PersonCreditCard',
 'SalesOrderDetail',
 'SalesOrderHeader',
 'SalesOrderHeaderSalesReason',
 'SalesPerson',
 'SalesPersonQuotaHistory',
 'SalesReason',
 'SalesTaxRate',
 'SalesTerritory',
 'SalesTerritoryHistory',
 'ShoppingCartItem',
 'SpecialOffer',
 'SpecialOfferProduct',
 'Store']

## Level2 Transformation

In [0]:
for name in table_name:
    path = '/mnt/silver/Sales/' + name
    print(path)
    df = spark.read.format('delta').load(path)

    #Obtaining list of columns
    column_name = df.columns


    for old_col_name in column_name:
        #Convert ColumnName -> Column_Name format
        new_col_name = "".join(["_" + char if char.isupper() and not old_col_name[i - 1].isupper() else char for i, char in enumerate(old_col_name)]).lstrip("_")

        #Change the column name using withColumnRename and regexp.replace
        df = df.withColumnRenamed(old_col_name, new_col_name)

    output_path = '/mnt/gold/Sales/' +name +'/'
    df.write.format('delta').mode("overwrite").save(output_path)

/mnt/silver/Sales/CountryRegionCurrency
/mnt/silver/Sales/CreditCard
/mnt/silver/Sales/Currency
/mnt/silver/Sales/CurrencyRate
/mnt/silver/Sales/Customer
/mnt/silver/Sales/PersonCreditCard
/mnt/silver/Sales/SalesOrderDetail
/mnt/silver/Sales/SalesOrderHeader
/mnt/silver/Sales/SalesOrderHeaderSalesReason
/mnt/silver/Sales/SalesPerson
/mnt/silver/Sales/SalesPersonQuotaHistory
/mnt/silver/Sales/SalesReason
/mnt/silver/Sales/SalesTaxRate
/mnt/silver/Sales/SalesTerritory
/mnt/silver/Sales/SalesTerritoryHistory
/mnt/silver/Sales/ShoppingCartItem
/mnt/silver/Sales/SpecialOffer
/mnt/silver/Sales/SpecialOfferProduct
/mnt/silver/Sales/Store


In [0]:
#It will display the last item from th data frame
df.show(5)

+------------------+--------------------+---------------+--------------------+--------------------+-------------+
|Business_Entity_ID|                Name|Sales_Person_ID|        Demographics|             rowguid|Modified_Date|
+------------------+--------------------+---------------+--------------------+--------------------+-------------+
|               292|Next-Door Bike Store|            279|<StoreSurvey xmln...|a22517e3-848d-4eb...|   2014-09-12|
|               294|Professional Sale...|            276|<StoreSurvey xmln...|b50ca50b-c601-4a1...|   2014-09-12|
|               296|      Riders Company|            277|<StoreSurvey xmln...|337c3688-1339-4e1...|   2014-09-12|
|               298|  The Bike Mechanics|            275|<StoreSurvey xmln...|7894f278-f0c8-4d1...|   2014-09-12|
|               300|   Nationwide Supply|            286|<StoreSurvey xmln...|c3fc9705-a8c4-4f3...|   2014-09-12|
+------------------+--------------------+---------------+--------------------+----------