### Documentazione del Servizio Apitube
- news rest api service:  https://apitube.io/
- documentation:          https://docs.apitube.io/guides/user-guide/what-is-apitube
- response structure:     https://docs.apitube.io/platform/news-api/response-structure
- dashboard api key:      https://dashboard.apitube.io/
- cookbook:               https://apitube.io/cookbook

In [0]:
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import ExternalFunctionRequestHttpMethod

# workspace client creation
w = WorkspaceClient()

In [0]:
response = w.serving_endpoints.http_request(
    conn="Apitube",   
    method=ExternalFunctionRequestHttpMethod.GET,
    path="",
    params={
        "title": "Teamsystem",
        "language.code": "it",
        # se vuoi altri filtri li aggiungi qui, es:
        # "location.name": "Milan,Rome",
        # "ignore.location.name": "New York",
    },
    headers={}
)

# Stessa cosa del print(response.text) nel tuo script originale
print(response.text)

In [0]:
import requests

# news rest api service:  https://apitube.io/
# documentation:          https://docs.apitube.io/guides/user-guide/what-is-apitube
# response structure:     https://docs.apitube.io/platform/news-api/response-structure
# dashboard api key:      https://dashboard.apitube.io/
# cookbook:               https://apitube.io/cookbook

querystring = {
    "title": "Teamsystem",
    # "location.name": "Milan,Rome",
    "language.code": "it",
    # "ignore.location.name": "New York",
    "api_key": "api_live_Ig2aR4SU8KpiCGqIKfFYfMXSZTMvXa3TMUeayrzikSb9k452OoE"
}

url = "https://api.apitube.io/v1/news/everything"

response = requests.request("GET", url, params=querystring)

print(response.text)

In [0]:
data = response.json()
display(data)

In [0]:
import pandas as pd

# Estrazione delle informazioni principali dagli articoli
articles = []
for article in data.get("results", []):
    articles.append({
        "id": article.get("id"),
        "href": article.get("href"),
        "published_at": article.get("published_at"),
        "title": article.get("title"),
        "description": article.get("description"),
        "body": article.get("body"),
        "excerpt": article.get("excerpt"),
        "language": article.get("language"),
        "sentiment": article.get("sentiment"),
        "categories": article.get("categories"),
        "topics": article.get("topics"),
        "industries": article.get("industries"),
        "entities": article.get("entities"),
        "persons": article.get("persons"),
        "locations": article.get("locations"),
        "organizations": article.get("organizations"),
        "brands": article.get("brands"),
        "author_id": article.get("author", {}).get("id") if article.get("author") else None,
        "author_name": article.get("author", {}).get("name") if article.get("author") else None,
        "author_href": article.get("author", {}).get("href") if article.get("author") else None,
        "image": article.get("image"),
        "images": article.get("images"),  # Collezione immagini
        "media": article.get("media"),
        "is_duplicate": article.get("is_duplicate"),
        "is_paywall": article.get("is_paywall"),
        "is_breaking": article.get("is_breaking"),
        "source_id": article.get("source", {}).get("id") if article.get("source") else None,
        "source_name": article.get("source", {}).get("name") if article.get("source") else None,
        "source_href": article.get("source", {}).get("href") if article.get("source") else None,
        "source_rank": article.get("source", {}).get("rank") if article.get("source") else None,
        "source_categories": article.get("source", {}).get("categories") if article.get("source") else None,
        "source_location": article.get("source", {}).get("location") if article.get("source") else None,
        "keywords": article.get("keywords"),
        "paywall_url": article.get("paywall_url"),
        "canonical_url": article.get("canonical_url")
        # Aggiungi qui eventuali altri campi citati nella documentazione specifica
    })

# Da Pandas a Spark DataFrame (Databricks)
pdf = pd.DataFrame(articles)
df = spark.createDataFrame(pdf)

# Mostra un esempio
display(df)

In [0]:
void_fields = [f.name for f in df.schema.fields if f.dataType.typeName() == "void"]
df = df.drop(*void_fields)
df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("dad_open_data.news.apitube_news")