In [0]:
##fact_trip###


print(" CREATE - FACT TABLES")
print("="*70)

# FACT_TRIP - Primary fact table for time/ride analysis
print("\n Create fact_trip...")

fact_trip = spark.sql("""
    SELECT 
        -- Primary Key
        t.trip_id,
        
        -- Foreign Keys to Dimensions
        t.rider_id as rider_key,
        t.start_station_id as start_station_key,
        t.end_station_id as end_station_key,
        DATE_FORMAT(DATE(t.start_time), 'yyyyMMdd') as date_key,
        HOUR(t.start_time) * 100 as time_key,
        
        -- FACTS - Core Business Metrics
        ROUND((UNIX_TIMESTAMP(t.end_time) - UNIX_TIMESTAMP(t.start_time)) / 60.0, 2) as trip_duration_minutes,
        FLOOR(DATEDIFF(DATE(t.start_time), r.birthday) / 365.25) as rider_age_at_trip,
        r.is_member as is_member_trip,
        
        -- Additional Attributes
        t.bike_type,
        t.start_time,
        t.end_time,
        
        -- Derived Fields for Analysis
        CASE 
            WHEN HOUR(t.start_time) BETWEEN 7 AND 9 THEN 'Morning Rush'
            WHEN HOUR(t.start_time) BETWEEN 17 AND 19 THEN 'Evening Rush'
            WHEN HOUR(t.start_time) BETWEEN 9 AND 17 THEN 'Business Hours'
            ELSE 'Off Hours'
        END as trip_time_category
        
    FROM bronze_trips t
    INNER JOIN bronze_riders r ON t.rider_id = r.rider_id
    WHERE t.start_time IS NOT NULL 
      AND t.end_time IS NOT NULL
      AND t.end_time > t.start_time  -- Data quality check
""")

# Write with partitioning by date for performance
fact_trip.write \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("date_key") \
    .saveAsTable("gold_fact_trip")

trip_count = fact_trip.count()
print(f" fact_trip created: {trip_count:,} rows")

# Show sample with business metrics
print("\n Sample Trip Facts with business metric:")
fact_trip.select("trip_id", "rider_key", "trip_duration_minutes", "rider_age_at_trip", 
                 "is_member_trip", "trip_time_category").show(5)

 CREATE - FACT TABLES

 Create fact_trip...
 fact_trip created: 4,584,706 rows

 Sample Trip Facts with business metric:
+----------------+---------+---------------------+-----------------+--------------+------------------+
|         trip_id|rider_key|trip_duration_minutes|rider_age_at_trip|is_member_trip|trip_time_category|
+----------------+---------+---------------------+-----------------+--------------+------------------+
|DF22914D785D9CE3|    27676|                17.27|               63|         false|    Business Hours|
|191FD832FAB509E6|    55684|                 0.05|               19|         false|         Off Hours|
|A69A7D200767C66D|    23828|               114.77|               21|          true|    Business Hours|
|17CE1AFAADB4E5EC|    40707|                71.20|               15|          true|    Business Hours|
|9D4345488835D46B|    26602|                15.83|               17|          true|    Business Hours|
+----------------+---------+---------------------+-----