In [27]:
from rich.console import Console
from rich_theme_manager import ThemeManager
import pathlib

theme_dir = pathlib.Path("themes")
theme_manager = ThemeManager(theme_dir=theme_dir)
dark = theme_manager.get("dark")

# Create a console with the dark theme
console = Console(theme=dark)

In [2]:
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

In [30]:
import gdelt

# get a handle to the gdelt data
gd2 = gdelt.gdelt(version=2)

# get events for today
data = gd2.Search(['2025-08-27'],
    table="events",
    coverage=True
)

# save the results in a CSV file to save time on reruns
data.to_csv("data/gdelt_results.csv")

In [32]:
import pandas as pd

# load the gdelt data
data = (
    pd
    .read_csv('data/gdelt_results.csv')
    .query('CAMEOCodeDescription.notna()')
    .reset_index(drop=True)
    .to_dict('records')
)

# output a couple samples
console.print(data[:2])

In [5]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

# create the embedding encoder LLM
encoder = SentenceTransformer('all-MiniLM-L6-v2')




In [6]:
# name the collection
collection_name="gdelt"

# create the collection to store the gdelt data
qdrant.recreate_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

True

In [7]:
from codetiming import Timer

with Timer(name='qdrant_upload', text="GDELT data points uploaded in {:.2f}s"):
    # load the vector DB
    qdrant.upload_points(
        collection_name=collection_name,
        points=[
            models.PointStruct(
                id=idx,
                vector=encoder.encode(doc["CAMEOCodeDescription"]).tolist(),
                payload=doc
            ) for idx, doc in enumerate(data)
        ]
    )

GDELT data points uploaded in 1628.30s


In [8]:
# show some stats on the qdrant collection
console.print(qdrant.get_collection(collection_name=collection_name))

In [12]:
# create a question
user_prompt = "who is making endorsements today?"

# get the vectors for the question
query_vector = encoder.encode(user_prompt).tolist()

# get the results
hits = qdrant.search(
    collection_name=collection_name,
    query_vector=query_vector,
    limit=3
)

In [29]:
def format_qdrant_results(results):
    """
    format the results

    :param results:
    :return:
    """
    formatted = []

    # for each result
    for res in results:
        # create a dict of the results
        item = {
            'id': res.id,
            'score': round(res.score, 3),
            'title': res.payload.get('ActionGeo_FullName', 'N/A'),
            'category': res.payload.get('CAMEOCodeDescription', 'N/A'),
            'url': res.payload.get('SOURCEURL', 'N/A'),
            'Actor1Geo_Lat': res.payload.get('Actor1Geo_Lat', 'N/A'),
            'Actor1Geo_Long': res.payload.get('Actor1Geo_Long', 'N/A'),
            'Actor2Geo_Lat': res.payload.get('Actor2Geo_Lat', 'N/A'),
            'Actor2Geo_Long': res.payload.get('Actor2Geo_Long', 'N/A'),
        }

        # add the formatted data to the list
        formatted.append(item)

    # return to the caller
    return formatted

# get the formatted results
formatted_results = format_qdrant_results(hits)

console.print('Prompt:', user_prompt)

# display the results
for item in formatted_results:
    console.print(f"ID: {item['id']}, Score: {item['score']}, Title: {item['title']}, Category: {item['category']}, actor 1 lat/long: {item['Actor1Geo_Lat']},{item['Actor1Geo_Long']}, actor 2 lat/long: {item['Actor2Geo_Lat']},{item['Actor2Geo_Long']},[link={item['url']}]Link to the reference[/link]")
