In [9]:
from pyspark.sql import SparkSession
import requests
import json

with open('/home/gesser/air-traffic-data-pipeline/credentials.json') as f:
    creds = json.load(f)

client_id = creds.get("clientId")
client_secret = creds.get("clientSecret")

if not client_id or not client_secret:
    raise ValueError("Set CLIENT_ID and CLIENT_SECRET environment variables before running.")

token_url = "https://auth.opensky-network.org/auth/realms/opensky-network/protocol/openid-connect/token"

payload = {
    "grant_type": "client_credentials",
    "client_id": client_id,
    "client_secret": client_secret
}

headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}

response = requests.post(token_url, data=payload, headers=headers)
response.raise_for_status()

access_token = response.json().get("access_token")

#print(f"Access token: {access_token}")

url = "https://opensky-network.org/api/states/all"
params = {
    "lamin": 45.8389,
    "lomin": 5.9962,
    "lamax": 47.8229,
    "lomax": 10.5226
}

headers = {
    "Authorization": f"Bearer {access_token}"
}

response = requests.get(url, headers=headers, params=params)
response.raise_for_status()  # raise error if request failed

data = response.json()

if response.status_code == 200:
    data_json = response.text

    spark = SparkSession.builder.appName("flight-data-pipeline").getOrCreate()

    # Create RDD from JSON string (split by lines if multiline JSON)
    rdd = spark.sparkContext.parallelize([data_json])

    # Read JSON from RDD
    df = spark.read.json(rdd)

    df.show()

    spark.stop()
else:
    print(f"Error: {response.status_code} - {response.text}")


+--------------------+----------+
|              states|      time|
+--------------------+----------+
|[[39de41, TVF73KZ...|1750334097|
+--------------------+----------+

