In [0]:
# Load the data with default column names (_c0, _c1, etc.)
df = spark.table("default.top_1000_wealthiest_people_3_csv")

In [0]:
# Rename columns to meaningful names
df = df.withColumnRenamed("_c0", "Name") \
       .withColumnRenamed("_c1", "Country") \
       .withColumnRenamed("_c2", "Industry") \
       .withColumnRenamed("_c3", "Net_Worth") \
       .withColumnRenamed("_c4", "Company")

In [0]:
# Save the Data Source to Delta Table with schema overwrite
df.write.format("delta").option("overwriteSchema", "true").mode("overwrite").save("/mnt/delta/source_table_wealth")


In [0]:
# Read from the Data Source Delta Table
source_df = spark.read.format("delta").load("/mnt/delta/source_table_wealth")

In [0]:
# Create a temporary SQL view for Spark SQL queries
source_df.createOrReplaceTempView("wealth_data")

In [0]:
# Use Spark SQL to filter and transform the data
transformed_df = spark.sql("""
    SELECT Name, Country, Industry, Net_Worth, Company
    FROM wealth_data
    WHERE Industry = 'Technology' AND Net_Worth > 100
""")

In [0]:
# Write Transformed Data to Delta Lake Table as Data Sink
transformed_df.write.format("delta").mode("overwrite").save("/mnt/delta/sink_table_wealth")

In [0]:
# Verify: Read and Display Both Source and Sink Tables
source = spark.read.format("delta").load("/mnt/delta/source_table_wealth")
sink = spark.read.format("delta").load("/mnt/delta/sink_table_wealth")

# Show the data for verification
print("Source Data:")
source.show()

Source Data:
+-----------------+-------+-------------+--------------------+-------------------+
|             Name|Country|     Industry|           Net_Worth|            Company|
+-----------------+-------+-------------+--------------------+-------------------+
|             Name|Country|     Industry|Net Worth (in bil...|            Company|
|       Rob Walton| Mexico|      Finance|                 8.5|            Walmart|
|      Sergey Brin|    USA|   Automotive|               44.76|             Google|
|    Steve Ballmer|    USA|Manufacturing|               13.43|    Koch Industries|
|    Mukesh Ambani|    USA|   Technology|              120.44|             Google|
|       Jim Walton|    USA|      Fashion|              122.39|            Walmart|
|      Sergey Brin|    USA|   Technology|               93.19|            Walmart|
|Michael Bloomberg|    USA|    Cosmetics|              117.96|Reliance Industries|
|   Warren Buffett| France|       Retail|               36.62|          Mi

In [0]:
print("Transformed (Sink) Data:")
sink.show()

Transformed (Sink) Data:
+--------------------+-------+----------+---------+-------------------+
|                Name|Country|  Industry|Net_Worth|            Company|
+--------------------+-------+----------+---------+-------------------+
|       Mukesh Ambani|    USA|Technology|   120.44|             Google|
|        Alice Walton|    USA|Technology|   167.09|               Zara|
|        Alice Walton|    USA|Technology|   192.96|             Oracle|
|          Larry Page|    USA|Technology|    184.8| Berkshire Hathaway|
|     Mark Zuckerberg|    USA|Technology|   175.15|             Google|
|     Mark Zuckerberg|    USA|Technology|   171.94|          Microsoft|
|          Jeff Bezos|    USA|Technology|   126.21| Berkshire Hathaway|
|          Bill Gates|    USA|Technology|   104.75|          Microsoft|
|          Jim Walton|    USA|Technology|   196.19|            Walmart|
|          Jeff Bezos| Mexico|Technology|   167.46|             Oracle|
|          David Koch|    USA|Technolog