In [0]:
import json
import re

folder_path = "/Volumes/swi_audience_prd/swi_posts/payloads/"
files = [f.path for f in dbutils.fs.ls(folder_path) if f.path.endswith(".txt")]

payloads = []
for path in files:
    lines_df = spark.read.text(path)
    raw = "\n".join(r.value for r in lines_df.collect())
    # Ersetze "upperLimit": <zahl> durch "upperLimit": 5000000
    raw = re.sub(r'("upperLimit"\s*:\s*)\d+', r'\g<1>5000000', raw)
    payload = json.loads(raw)
    payloads.append(payload)

# (Optional) Sicher ausgeben
for payload in payloads:
    print(json.dumps(payload, ensure_ascii=False, indent=4))

In [0]:
table_names = [
    "swi_audience_prd.swi_posts.mapp_api_query_" + f.replace("dbfs:/Volumes/swi_audience_prd/swi_posts/payloads/", "")
                        .replace(".txt", "")
                        .replace(".", "_")
    for f in files
]
table_names

In [0]:
import requests
import os
import json
import time  # Add this import

# Zugangsdaten (SECRET_SCOPE = "swi-secret-scope"
# Für Hilfe: Keller, Pascal (SRF) oder hier: https://github.com/mmz-srf/swi-analytics-databricks/blob/main/intelligence.eu.mapp.com_analysis-query_7455/Secret%20management%20in%20databricks.ipynb)
SECRET_SCOPE = "swi-secret-scope"
user   = dbutils.secrets.get(SECRET_SCOPE, "mapp-user")
secret = dbutils.secrets.get(SECRET_SCOPE, "mapp-secret")
try:
    baseurl = dbutils.secrets.get(SECRET_SCOPE, "mapp-baseurl")
except:
    baseurl = "https://intelligence.eu.mapp.com"

token_file = 'mapp_token.json'

# Prüfen, ob bereits ein Token existiert und ob es noch gültig ist
def get_token():
    # Prüfen, ob bereits ein Token existiert und ob es noch gültig ist
    if os.path.exists(token_file):
        with open(token_file, 'r') as f:
            data = json.load(f)
            token = data.get('access_token')
            expires_at = data.get('expires_at')
            if token and expires_at and time.time() < expires_at:
                return token  # ⏳ Noch gültig

    # 🆕 Token holen
    auth_url = f"{baseurl}/analytics/api/oauth/token"
    querystring = {"grant_type": "client_credentials", "scope": "mapp.intelligence-api"}
    response = requests.post(auth_url, auth=(user, secret), params=querystring)
    response.raise_for_status()
    result = response.json()
    token = result['access_token']
    expires_in = result.get('expires_in', 3600)  # meist 3600 Sekunden
    expires_at = time.time() + expires_in - 60   # etwas Puffer

    # Token speichern für später
    with open(token_file, 'w') as f:
        json.dump({'access_token': token, 'expires_at': expires_at}, f)

    return token

# Token abrufen
token = get_token()

In [0]:
import requests

for i, payload in enumerate(payloads):
    headers = {
        'Authorization': f'Bearer {token}',
        'Content-Type': 'application/json'
    }

    url = f"{baseurl}/analytics/api/analysis-query"
    response = requests.post(url, headers=headers, json=payload)
    result = response.json()

    resultUrl = result.get("resultUrl")
    statusUrl = result.get("statusUrl")

    tries = 0
    while not resultUrl and tries < 10:
        time.sleep(10)
        status_response = requests.get(statusUrl, headers=headers)
        result = status_response.json()
        resultUrl = result.get("resultUrl")
        tries += 1

    if not resultUrl:
        print(f"❌ Kein Ergebnis für Payload {i} nach mehreren Versuchen.")
        continue
    else:
        result_data = requests.get(resultUrl, headers=headers).json()

        headers_out = [col["name"] for col in result_data["headers"]]
        rows = result_data["rows"]

        df = spark.createDataFrame(rows, headers_out)

        row_count = df.count()
        if row_count > 1:
            df = df.limit(row_count - 1)
        else:
            print(f"⚠️ Zu wenige Zeilen zum Kürzen für Payload {i}.")

        display(df)
        # ✅ Speichern als Delta Table (Version 2.0)
        # (Dynamischer Name aus der Liste `table_names`)
        df.write.format("delta") \
            .mode("append") \
            .option("mergeSchema", "true") \
            .saveAsTable(table_names[i])