In [0]:
###extract layer###

print("\n  extract step start ")
print("-" * 70)

# Utility function to streamline renaming + writing
def create_bronze_table(src_table, new_cols, target_table):
    """
    Reads from a source table, assigns clean column names,
    and saves it into the bronze schema.
    """
    df = spark.table(src_table).toDF(*new_cols)
    df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(target_table)
    print(f" {target_table} ready with {df.count():,} rows")
    return df

# Payments
print("\nLoading payments into bronze...")
bronze_payments_df = create_bronze_table(
    "payments",
    ["payment_id", "payment_date", "amount", "rider_id"],
    "bronze_payments"
)

# Riders
print("\nLoading riders into bronze...")
bronze_riders_df = create_bronze_table(
    "riders",
    ["rider_id", "first_name", "last_name", "address", "birthday", "account_start_date", "is_member"],
    "bronze_riders"
)

# Stations
print("\nLoading stations into bronze...")
bronze_stations_df = create_bronze_table(
    "stations",
    ["station_id", "station_name", "latitude", "longitude"],
    "bronze_stations"
)

# Trips
print("\nLoading trips into bronze...")
bronze_trips_df = create_bronze_table(
    "trips",
    ["trip_id", "bike_type", "start_time", "end_time", "start_station_id", "end_station_id", "rider_id"],
    "bronze_trips"
)

print("\n bronze laye complete ")
print("=" * 70)

##verify##

print("\n Verify extract tables ")
print("=" * 70)

for table in ["bronze_payments", "bronze_riders", "bronze_stations", "bronze_trips"]:
    df = spark.table(table)
    print(f"\nTable: {table}")
    print("-" * 40)
    print(f" Row count   : {df.count():,}")
    print(f" Column count: {len(df.columns)}")
    print(" Schema:")
    for cname, ctype in df.dtypes:
        print(f"   {cname} ({ctype})")
    print(" Preview:")
    df.show(2, truncate=True)


  extract step start 
----------------------------------------------------------------------

Loading payments into bronze...
 bronze_payments ready with 1,946,606 rows

Loading riders into bronze...
 bronze_riders ready with 74,999 rows

Loading stations into bronze...
 bronze_stations ready with 837 rows

Loading trips into bronze...
 bronze_trips ready with 4,584,920 rows

 bronze laye complete 

 Verify extract tables 

Table: bronze_payments
----------------------------------------
 Row count   : 1,946,606
 Column count: 4
 Schema:
   payment_id (bigint)
   payment_date (date)
   amount (double)
   rider_id (bigint)
 Preview:
+----------+------------+------+--------+
|payment_id|payment_date|amount|rider_id|
+----------+------------+------+--------+
|   1012544|  2021-11-01|   9.0|   40136|
|   1012545|  2021-12-01|   9.0|   40136|
+----------+------------+------+--------+
only showing top 2 rows

Table: bronze_riders
----------------------------------------
 Row count   : 74,999