In [0]:
%pip install gtfs-realtime-bindings --quiet
dbutils.library.restartPython()


In [0]:
dbutils.library.restartPython()


In [0]:
import datetime as dt
from google.transit import gtfs_realtime_pb2
import requests

TODAY = "2025-05-21"   # or your actual ingestion date
BRONZE_RT_PATH = f"dbfs:/bronze/gtfs_rt/{TODAY}"


In [0]:
# URL = "https://svc.metrotransit.org/gtfs/vehiclepositions.pb"
URL = "https://s3.amazonaws.com/kcm-alerts-realtime-prod/vehiclepositions.pb"
response = requests.get(URL)
feed = gtfs_realtime_pb2.FeedMessage()
feed.ParseFromString(response.content)

print(f"# of entities: {len(feed.entity)}")
print(feed.entity[0]) if feed.entity else print('No vehicle updates available.')


In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, LongType, TimestampType
from datetime import datetime
import time

# Define schema for the DataFrame
schema = StructType([
    StructField("vehicle_id", StringType(), True),
    StructField("trip_id", StringType(), True),
    StructField("route_id", StringType(), True),
    StructField("latitude", DoubleType(), True),
    StructField("longitude", DoubleType(), True),
    StructField("timestamp", LongType(), True),
    StructField("ingestion_ts", TimestampType(), True)
])

# Initialize an empty list to collect rows
rows = []

# Check if the feed has entities
if feed.entity:
    for entity in feed.entity:
        vehicle = entity.vehicle
        position = vehicle.position
        trip = vehicle.trip

        rows.append((
            entity.id,
            trip.trip_id,
            trip.route_id,
            position.latitude,
            position.longitude,
            vehicle.timestamp,
            datetime.utcnow()
        ))
else:
    print("No vehicle updates available. Creating an empty DataFrame.")

# Create Spark DataFrame
df_rt = spark.createDataFrame(rows, schema)

# Write to Bronze Delta Lake path
df_rt.write.format("delta").mode("overwrite").save(BRONZE_RT_PATH)
print("✓ GTFS-RT Bronze ingest complete")


In [0]:
df_check = spark.read.format("delta").load(BRONZE_RT_PATH)
df_check.show(5)
