In [None]:
from elasticsearch import Elasticsearch
from dateutil.parser import parse
from datetime import datetime, timezone
from random import randint


es = Elasticsearch(
    "https://localhost:9200",
    verify_certs=False,
    ssl_show_warn=False,
    basic_auth=("elastic", "elastic") 
)

try:
    info = es.info()
    print("✅ Connected to Elasticsearch!")
    print(f"Cluster name: {info['cluster_name']}")
    print(f"Version: {info['version']['number']}")
except Exception as e:
    print("❌ Connection failed:", e)

✅ Connected to Elasticsearch!
Cluster name: elasticsearch
Version: 8.5.1


In [None]:

def parse_any_time(val):
    if val is None:
        return val

    if isinstance(val, (int, float)):
        ts = val / 1000 if val > 1e12 else val
        dt = datetime.fromtimestamp(ts, tz=timezone.utc)

    elif isinstance(val, str):
        dt = parse(val)
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)

    elif isinstance(val, datetime):
        dt = val
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)

    else:
        raise ValueError(f"Unsupported time format: {val}")

    return dt.replace(microsecond=0).isoformat() if dt is not None else None


In [None]:
query_body = {
    "query": {
        "function_score": {
            "query": {
                "bool": {
                    "must": [
                        {"term": {"platform": "reddit"}},
                    ]
                }
            },
            "random_score": {
                "seed": randint(0, 1000000),
                "field": "id"  
            }
        }
    },
    "size": 100
}

response = es.search(index="search_all", body=query_body)
for hit in response["hits"]["hits"]:
    doc = hit["_source"]
    try:
        created_dt = parse_any_time(doc["created_utc"])
        print("creeated at", created_dt)
    except Exception as e:
        print("Error parsing created_utc:", e)
        continue    





creeated at 2019-04-10T10:13:37+00:00
creeated at 2018-02-24T01:14:46+00:00
creeated at 2018-10-30T03:47:17+00:00
creeated at 2024-03-09T00:24:57+00:00
creeated at 2017-01-28T09:27:14+00:00
creeated at 2017-10-24T16:39:44+00:00
creeated at 2017-05-21T01:23:03+00:00
creeated at 2018-09-27T04:54:25+00:00
creeated at 2020-05-28T17:57:27+00:00
creeated at 2016-11-24T17:27:33+00:00
creeated at 2021-09-11T23:02:01+00:00
creeated at 2019-12-06T16:30:55+00:00
creeated at 2018-06-21T03:03:39+00:00
creeated at 2017-11-07T19:36:08+00:00
creeated at 2025-03-16T11:45:42+00:00
creeated at 2019-01-11T00:08:53+00:00
creeated at 2017-11-16T12:24:55+00:00
creeated at 2022-01-30T19:18:46+00:00
creeated at 2019-08-08T19:21:13+00:00
creeated at 2022-09-01T02:06:50+00:00
creeated at 2020-05-29T19:40:37+00:00
creeated at 2017-11-14T08:24:29+00:00
creeated at 2020-04-24T19:59:07+00:00
creeated at 2019-12-14T01:12:04+00:00
creeated at 2020-04-22T16:49:39+00:00
creeated at 2018-07-03T20:04:57+00:00
creeated at 