In [0]:
from pyspark.sql.functions import col

creds = (spark.table("tfl.config.api_credentials")
             .filter(col("api_name") == "tfl_unified_api")
             .collect()[0])

APP_ID  = creds["app_id"]
APP_KEY = creds["app_key"]
BASE_URL = creds["base_url"]



In [0]:
print(f"Loaded TfL creds: {APP_ID=}  {BASE_URL=}  {APP_KEY=}")

In [0]:
# %pip install requests   # if needed

import requests, json, datetime
from pyspark.sql import Row

# TfL Tube line status (no key needed for a quick test; add ?app_key=... if you registered)
url = "https://api.tfl.gov.uk/StopPoint/Meta/Modes"
data = requests.get(url, timeout=20).json()

ingest_ts = datetime.datetime.utcnow().isoformat()
rows = [Row(ingest_time=ingest_ts, json_text=json.dumps(obj)) for obj in data]

(
  spark.createDataFrame(rows)
  .write.mode("overwrite")
  .format("delta")
  .saveAsTable("tfl.bronze.modes")   # <-- UC table the Warehouse can see
)


In [0]:
%sql
select * from tfl.bronze.modes;

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StructType, StructField, StringType, BooleanType

schema = StructType([
    StructField("$type", StringType(), True),
    StructField("isTflService", BooleanType(), True),
    StructField("isFarePaying", BooleanType(), True),
    StructField("isScheduledService", BooleanType(), True),
    StructField("modeName", StringType(), True)
])

df = spark.read.table("tfl.bronze.modes")
unpacked_df = df.select(
    "ingest_time",
    F.from_json("json_text", schema).alias("data")
).select(
    "ingest_time",
    "data.*"
)
unpacked_df.write.mode("overwrite").format("delta").saveAsTable("tfl.bronze.modes_unpacked")