## inspect api metadata
---


### Inspect GDELT metadata

In [28]:
import requests
import json

GDELT_DOC_API = "https://api.gdeltproject.org/api/v2/doc/doc"

params = {
    "query": "(economy OR business OR politics)",
    "mode": "artlist",
    "format": "json",
    "timespan": "24h",
    "maxrecords": 5,
}

r = requests.get(GDELT_DOC_API, params=params, timeout=30)
data = r.json()

print("Status code:", r.status_code)
articles = data.get("articles", [])[:2]
print(json.dumps(articles, indent=2))


Status code: 200
[
  {
    "url": "https://vneconomy.vn/thu-tuong-lay-hieu-qua-thuc-te-lam-thuoc-do-phat-trien-kinh-te-tu-nhan.htm",
    "url_mobile": "",
    "title": "Th\u1ee7 t\u01b0\u1edbng Ph\u1ea1m Minh Ch\u00ednh : \u0110\u00e1nh gi\u00e1 hi\u1ec7u qu\u1ea3 ph\u00e1t tri\u1ec3n kinh t\u1ebf t\u01b0 nh\u00e2n",
    "seendate": "20251222T044500Z",
    "socialimage": "https://premedia.vneconomy.vn/files/uploads/2025/12/21/fb340ae1db014b95b5e1cf1ffa30d78a-58353.jpg?&mode=crop",
    "domain": "vneconomy.vn",
    "language": "Vietnamese",
    "sourcecountry": "Vietnam"
  },
  {
    "url": "https://www.shuud.mn/a/561159",
    "url_mobile": "",
    "title": "\u041d\u0438\u0439\u0433\u043c\u0438\u0439\u043d \u0445\u04af\u043b\u044d\u044d\u043b\u0442\u044d\u0434 \u043d\u0438\u0439\u0446\u044d\u044d\u0433\u04af\u0439 \u0434\u044d\u0434 \u0441\u0430\u0439\u0434 \u043d\u0430\u0440 ... ",
    "seendate": "20251222T014500Z",
    "socialimage": "https://www.shuud.mn/resource/shuud/image/2025/12

### Inspect EventRegistry metadata

In [29]:
import os
import json
from eventregistry import EventRegistry, QueryArticlesIter

API_KEY = os.getenv("EVENTREGISTRY_API_KEY")

er = EventRegistry(apiKey=API_KEY)

q = QueryArticlesIter(
    keywords="economy OR business OR politics",
    lang=["eng", "deu", "spa"]
)

articles = []
for art in q.execQuery(er):
    articles.append(art)
    if len(articles) >= 3:
        break

print(json.dumps(articles, indent=2)[:3000])


[
  {
    "uri": "8980251686",
    "lang": "eng",
    "isDuplicate": false,
    "date": "2025-12-01",
    "time": "08:50:33",
    "dateTime": "2025-12-01T08:50:33Z",
    "dateTimePub": "2025-12-01T08:42:00Z",
    "dataType": "news",
    "sim": 0.3176470696926117,
    "url": "https://www.pravda.com.ua/eng/articles/2025/12/01/8009732/",
    "title": "Survive. Endure. Prevail!",
    "body": "On 1 December 1991, Ukrainians changed the course of their nation's history, and that of the world, by proclaiming the independence of their country. Today, once again, the future of Ukraine and of the world depends on our choice. That is why we have chosen this day, 1 December, to publish our Manifesto.\n\nWe live in momentous and troubled times, when the world stands at a crossroads between exciting opportunities for development and unprecedented threats to human existence. And our fate, and indeed that of the entire world, largely depends on whether Ukraine will be able to survive, endure and preva

## DuckDB database

In [30]:
import duckdb
import pandas as pd
con = duckdb.connect("world_news.duckdb", read_only=True)

In [31]:
con.execute("""
    SELECT COUNT(*) AS total_articles
    FROM articles
""").df()

#gdelt is limited to max 25 atm


Unnamed: 0,total_articles
0,3247


In [32]:
con.execute("""
    DESCRIBE articles;
""").df()

Unnamed: 0,column_name,column_type,null,key,default,extra
0,article_id,VARCHAR,NO,PRI,,
1,provider,VARCHAR,YES,,,
2,provider_id,VARCHAR,YES,,,
3,url,VARCHAR,YES,,,
4,title,VARCHAR,YES,,,
5,summary,VARCHAR,YES,,,
6,body,VARCHAR,YES,,,
7,image_url,VARCHAR,YES,,,
8,published_at,TIMESTAMP,YES,,,
9,source_name,VARCHAR,YES,,,


In [33]:
con.execute("""
    SELECT provider, COUNT(*) AS count
    FROM articles
    GROUP BY provider
    ORDER BY count DESC
    LIMIT 10
""").df()


Unnamed: 0,provider,count
0,gdelt,1747
1,eventregistry,1500


In [34]:
con.execute("""
    SELECT
        article_id,
        provider,
        source_name,
        source_country,
        language,
        topics,
        published_at,
        url
    FROM articles
    WHERE provider = 'eventregistry'
    ORDER BY published_at DESC
    LIMIT 10
""").df()

Unnamed: 0,article_id,provider,source_name,source_country,language,topics,published_at,url
0,107857cb6172b78ef41eab40be35da39b3f148a53a14ff...,eventregistry,Anglo Celt,Unknown,English,"[""health"", ""science""]",2025-12-21 21:07:52,https://www.anglocelt.ie/2025/12/21/rob-cross-...
1,bdd041922e68651ea0f71b57ff4dc3639c3abd6cb0367e...,eventregistry,FOX10 News,Unknown,English,"[""space"", ""technology""]",2025-12-21 21:07:46,https://www.fox10tv.com/2025/12/21/prichards-m...
2,02f28b1f7a27de1733b1ce464d3d76b6f512632672bb33...,eventregistry,Clipperholics,Unknown,English,"[""business"", ""sports""]",2025-12-21 21:07:27,https://clipperholics.com/tyronn-lue-wishful-t...
3,21f23be463b5f14594d4fe0ebe09494c81513fd586a7f5...,eventregistry,Travel And Tour World,Unknown,English,"[""environment_climate"", ""science""]",2025-12-21 21:07:25,https://www.travelandtourworld.com/news/articl...
4,dae2b81623b18b4f0554c852000b15764b1590f0bfe927...,eventregistry,The Daily Signal,Unknown,English,"[""politics""]",2025-12-21 21:07:21,https://www.dailysignal.com/2025/12/21/vance-a...
5,2e67fa8a8b5df02103de05a2f463b9b38bd42ff91a7228...,eventregistry,Brisbane Times,Unknown,English,"[""politics""]",2025-12-21 21:07:20,https://www.brisbanetimes.com.au/national/quee...
6,2b4913b457bc16d35befa2dd22851f7f06a1129b4d469e...,eventregistry,The Irish Times,Unknown,English,"[""health"", ""politics""]",2025-12-21 21:07:07,https://www.irishtimes.com/opinion/editorials/...
7,cd3e641545de9eb01ca9530031e3dcf8715c3a836fdb06...,eventregistry,Biziday,Unknown,Ron,"[""technology""]",2025-12-21 21:07:04,https://www.biziday.ro/342734-2
8,c257e934b96100166d9ee6cc03e3da518a622559a8ee82...,eventregistry,The Maitland Mercury,Unknown,English,"[""environment_climate""]",2025-12-21 21:07:04,https://www.maitlandmercury.com.au/story/91392...
9,7a192a5522ebeca9d3c34dce6cba4ca04dafd409320851...,eventregistry,GiveMeSport,Unknown,English,"[""sports""]",2025-12-21 21:07:04,https://www.givemesport.com/roy-keane-blames-d...


In [35]:
con.execute("""
    SELECT
        article_id,
        provider,
        source_name,
        source_country,
        language,
        topics,
        published_at
    FROM articles
    WHERE provider = 'gdelt'
    ORDER BY published_at DESC
    LIMIT 10
""").df()

Unnamed: 0,article_id,provider,source_name,source_country,language,topics,published_at
0,4e96e3793621553a9d5253cc3297cb6f0a22632041d84d...,gdelt,United States,United States,English,"[""Unknown""]",2025-12-21 20:45:00
1,911cea0dd432103ce0816b9279efd1a25d94aac902df83...,gdelt,United States,United States,Spanish,"[""Unknown""]",2025-12-21 20:45:00
2,c47c63d2bdc4d6d8130ac9a39fc7a7422bc8b65d6de10d...,gdelt,Argentina,Argentina,Spanish,"[""Unknown""]",2025-12-21 20:45:00
3,51e4779e965c83b6a698eb47ab572c9a467aec28df5bf3...,gdelt,United States,United States,English,"[""Unknown""]",2025-12-21 20:45:00
4,2bf026b038e04f61665c62b19ace95dcaf7e4b1e49801c...,gdelt,Spain,Spain,Spanish,"[""Unknown""]",2025-12-21 20:45:00
5,cef9b7ee66c30564bb07c148580d6c5d0ec32792504fa3...,gdelt,Belgium,Belgium,Dutch,"[""Unknown""]",2025-12-21 20:45:00
6,642bf3687667a251ae9f3084257265d7247e701f8ec8b1...,gdelt,Austria,Austria,German,"[""Unknown""]",2025-12-21 20:45:00
7,6d1d610bbc156a306c9acdf1c5b9e792f1378d29172717...,gdelt,Taiwan,Taiwan,Chinese,"[""Unknown""]",2025-12-21 20:45:00
8,86c9d80156f4f4305b9635a6ae2ba0f4fb8d350f2ef054...,gdelt,Germany,Germany,German,"[""Unknown""]",2025-12-21 20:45:00
9,6a105ddf57eb01f91b68497435301e7e4bd18cfbae5f68...,gdelt,United States,United States,English,"[""Unknown""]",2025-12-21 20:45:00


### Countries with article counts


In [36]:
import duckdb
import pandas as pd

con = duckdb.connect('world_news.duckdb', read_only=True)
countries = con.execute('''
    SELECT
        source_country,
        COUNT(*) AS article_count
    FROM articles
    WHERE source_country IS NOT NULL
    GROUP BY source_country
    ORDER BY article_count DESC
''').df()
con.close()
countries


Unnamed: 0,source_country,article_count
0,Unknown,1500
1,United States,260
2,China,108
3,Spain,98
4,Italy,92
...,...,...
103,Andorra,1
104,Fiji,1
105,Monaco,1
106,United Arab Emirates,1


### Inspect articles with many topics


In [37]:
import duckdb
import pandas as pd

con = duckdb.connect('world_news.duckdb', read_only=True)
df = con.execute("""
    SELECT
        title,
        source_country,
        topics,
        json_array_length(topics) AS topic_count
    FROM articles
    WHERE topics IS NOT NULL
    ORDER BY topic_count DESC
    LIMIT 15
""").df()
con.close()
df


Unnamed: 0,title,source_country,topics,topic_count
0,Innovative PU Tactile Solutions by Eminent Tac...,Unknown,"[""business"", ""technology""]",2
1,Boston Officials Highlight City's Low Crime Ra...,Unknown,"[""health"", ""society""]",2
2,World Insights: How social fractures are fueli...,Unknown,"[""politics"", ""society""]",2
3,He ate a hamburger and died hours later. Docto...,Unknown,"[""health"", ""science""]",2
4,Pharmalittle: We're reading about cancer drug ...,Unknown,"[""business"", ""health""]",2
5,üî¥ KTN LIVE STREAM,Unknown,"[""politics"", ""technology""]",2
6,Ingersoll Rand appoints Jerome Guillen to boar...,Unknown,"[""business"", ""technology""]",2
7,"Positive test ratio drops, but India still has...",Unknown,"[""politics"", ""sports""]",2
8,Priority Recommendations for Puerto Rico to Pr...,Unknown,"[""economy"", ""society""]",2
9,Jeff Bezos and Lauren Sanchez Bezos are awardi...,Unknown,"[""business"", ""society""]",2
