In [0]:
import requests
import json
from pyspark.sql.functions import current_timestamp, lit

In [0]:
%sql
-- creates the container for raw data
CREATE SCHEMA IF NOT EXISTS flights_bronze
COMMENT 'This schema holds raw flight data from the Israeli Airport Authority';

In [0]:
resource_id = "e83f763b-b7d7-479e-b172-ae981ddc6de5"

def fetch_flights_data(rid):
    url = f"https://data.gov.il/api/3/action/datastore_search?resource_id={resource_id}"
    response = requests.get(url)

    if response.status_code == 200:
        return response.json()['result']['records']
    else:
        raise Exception(f"API error: {response.status_code}")

records = fetch_flights_data(resource_id)

In [0]:
# creating sprak data frame for pulled records
df_raw = spark.createDataFrame(records)

# adding metadata columns - ingestion time and source file
df_bronze = df_raw.withColumn("ingestion_timestamp", current_timestamp()) \
                  .withColumn("source_file", lit("data_gov_il_api"))

In [0]:
(df_bronze.write
    .mode("append")
    .format("delta")
    .option("mergeSchema", "true")
    .saveAsTable(f"workspace.flights_bronze.raw_flights")
    )

print(f"ingested {df_bronze.count()} records")