In [1]:
!python -m pip uninstall -y elasticsearch
!python -m pip install "elasticsearch==8.14.0"

Found existing installation: elasticsearch 8.14.0
Uninstalling elasticsearch-8.14.0:
  Successfully uninstalled elasticsearch-8.14.0
Collecting elasticsearch==8.14.0
  Using cached elasticsearch-8.14.0-py3-none-any.whl.metadata (7.2 kB)
Using cached elasticsearch-8.14.0-py3-none-any.whl (480 kB)
Installing collected packages: elasticsearch
Successfully installed elasticsearch-8.14.0


In [2]:
!python -m pip install --upgrade pip
!python -m pip install confluent-kafka ipykernel



In [3]:
!docker compose up -d

!docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"

[33mWARN[0m[0000] /Users/ss/realtime-store/docker-compose.yml: the attribute `version` is obsolete, it will be ignored, please remove it to avoid potential confusion 
[1A[1B[0G[?25l[+] Running 3/3
 [32m✔[0m Container realtime-store-kafka-1  [32mRunning[0m                               [34m0.0s [0m
 [32m✔[0m Container es                      [32mRunning[0m                               [34m0.0s [0m
 [32m✔[0m Container kibana                  [32mRunning[0m                               [34m0.0s [0m
[?25h[1A[1A[1A[1A[0G[?25l[34m[+] Running 3/3[0m
 [32m✔[0m Container realtime-store-kafka-1  [32mRunning[0m                               [34m0.0s [0m
 [32m✔[0m Container es                      [32mRunning[0m                               [34m0.0s [0m
 [32m✔[0m Container kibana                  [32mRunning[0m                               [34m0.0s [0m
[?25h{.Names}   {.Status}   {.Ports}
{.Names}   {.Status}   {.Ports}
{.Names}   {.Status}  

In [4]:
from confluent_kafka.admin import AdminClient

admin = AdminClient({"bootstrap.servers": "localhost:9092"})
md = admin.list_topics(timeout=5)
print("Connected. Brokers:", md.brokers.keys())
print("Topics:", list(md.topics.keys()))

Connected. Brokers: dict_keys([1])
Topics: ['market_ticks', 'trade_orders', '__consumer_offsets']


In [5]:
from confluent_kafka.admin import NewTopic

topics = [
    NewTopic("trade_orders", num_partitions=1, replication_factor=1),
    NewTopic("market_ticks", num_partitions=1, replication_factor=1),
]

fs = admin.create_topics(topics)

for t, f in fs.items():
    try:
        f.result()  # The result itself is None
        print(f"Topic '{t}' created successfully")
    except Exception as e:
        if "Topic already exists" in str(e):
            print(f"Topic '{t}' already exists (reusing it)")
        else:
            print(f"Failed to create topic '{t}': {e}")

Failed to create topic 'trade_orders': KafkaError{code=TOPIC_ALREADY_EXISTS,val=36,str="Topic 'trade_orders' already exists."}
Failed to create topic 'market_ticks': KafkaError{code=TOPIC_ALREADY_EXISTS,val=36,str="Topic 'market_ticks' already exists."}


In [6]:
from confluent_kafka import Producer
import json, random, time, uuid

producer = Producer({
    "bootstrap.servers": "localhost:9092",
    "enable.idempotence": False,
    "acks": "all",
    "linger.ms": 5,
    "batch.size": 32768
})

SYMBOLS = ["AAPL", "MSFT", "TSLA", "NVDA", "BAC", "RY", "TD", "BNS"]
SIDES   = ["BUY", "SELL"]
TYPES   = ["NEW", "CANCEL", "EXECUTE"]

def make_order():
    """Build one fake trade/order event."""
    return {
        "event_type": random.choices(TYPES, weights=[70, 10, 20])[0],
        "order_id": str(uuid.uuid4()),
        "symbol": random.choice(SYMBOLS),
        "side":   random.choice(SIDES),
        "price":  round(random.uniform(10, 500), 2),
        "qty":    random.choice([10, 25, 50, 100, 200]),
        "ts":     time.time(),
    }

TOPIC = "trade_orders"
N = 400 

for i in range(N):
    evt = make_order()
    producer.produce(
        TOPIC,
        key=evt["order_id"].encode(),
        value=json.dumps(evt).encode()
    )
    producer.poll(0)         
    if (i + 1) % 50 == 0:
        print(f"sent {i+1}/{N}")
    time.sleep(0.01)         

producer.flush(10)
print("burst done")

sent 50/400
sent 100/400
sent 150/400
sent 200/400
sent 250/400
sent 300/400
sent 350/400
sent 400/400
burst done


In [7]:
from confluent_kafka import Producer

print("micro-test: producing one message...")
p = Producer({"bootstrap.servers": "localhost:9092", "enable.idempotence": False})

def cb(err, msg):
    if err:
        print("delivery error:", err)
    else:
        print(f"delivered to {msg.topic()}[{msg.partition()}] @ offset {msg.offset()}")

p.produce("trade_orders", key=b"TEST", value=b'{"ping":1}', on_delivery=cb)
p.flush(10)   # wait up to 10s for the delivery report
print("micro-test done")

micro-test: producing one message...
delivered to trade_orders[0] @ offset 2898
micro-test done


In [8]:
import requests

trade_mappings = {
  "properties": {
    "@timestamp": {"type": "date"},
    "event_type": {"type": "keyword"},
    "order_id":   {"type": "keyword"},
    "symbol":     {"type": "keyword"},
    "side":       {"type": "keyword"},
    "price":      {"type": "double"},
    "qty":        {"type": "integer"}
  }
}

tick_mappings = {
  "properties": {
    "@timestamp": {"type": "date"},
    "symbol":     {"type": "keyword"},
    "last":       {"type": "double"},
    "bid":        {"type": "double"},
    "ask":        {"type": "double"},
    "vol":        {"type": "integer"}
  }
}

def create_index_http(name, mappings):
    r = requests.put(f"http://localhost:9200/{name}", json={"mappings": mappings}, timeout=10)
    print(name, r.status_code, r.text[:200])

create_index_http("trade-orders", trade_mappings)
create_index_http("market-ticks", tick_mappings)

trade-orders 400 {"error":{"root_cause":[{"type":"resource_already_exists_exception","reason":"index [trade-orders/kQQzG6JyQ0CuW58kfsbD2Q] already exists","index_uuid":"kQQzG6JyQ0CuW58kfsbD2Q","index":"trade-orders"}]
market-ticks 400 {"error":{"root_cause":[{"type":"resource_already_exists_exception","reason":"index [market-ticks/QlLhxrubQoug0RyGiA8fgA] already exists","index_uuid":"QlLhxrubQoug0RyGiA8fgA","index":"market-ticks"}]


In [9]:
from elasticsearch import Elasticsearch, helpers
es = Elasticsearch("http://localhost:9200")

In [10]:
from confluent_kafka import Consumer, KafkaException
from elasticsearch import helpers
import json, datetime, requests

ES_HOST = "http://localhost:9200"

consumer = Consumer({
    "bootstrap.servers": "localhost:9092",
    "group.id": "py-es-consumer",
    "auto.offset.reset": "earliest",
    "enable.auto.commit": False
})

topics = ["trade_orders", "market_ticks"]
consumer.subscribe(topics)
print(f"Subscribed to: {topics}")

BATCH = 5
buf = []

def to_es_action(msg):
    topic = msg.topic()
    src = json.loads(msg.value())

    if "ts" in src:
        src["@timestamp"] = datetime.datetime.utcfromtimestamp(src["ts"]).isoformat(timespec="milliseconds") + "Z"

    index = "trade-orders" if topic == "trade_orders" else "market-ticks"
    doc_id = f"{topic}-{msg.partition()}-{msg.offset()}"

    return {"_index": index, "_id": doc_id, "_source": src}

try:
    print("Consuming... press stop/interrupt to end")
    while True:
        m = consumer.poll(0.5)
        if m is None:
            continue
        if m.error():
            raise KafkaException(m.error())

        buf.append(to_es_action(m))

        if len(buf) >= BATCH:
            ok, _ = helpers.bulk(es, buf)   # buf contains {"_index","_id","_source"}
            print(f"Flushed {len(buf)} docs -> ok={ok}")
            buf.clear()
            consumer.commit()

except KeyboardInterrupt:
    print("Stopping consumer...")

finally:
    consumer.close()
    print("Consumer closed")

Subscribed to: ['trade_orders', 'market_ticks']
Consuming... press stop/interrupt to end


  src["@timestamp"] = datetime.datetime.utcfromtimestamp(src["ts"]).isoformat(timespec="milliseconds") + "Z"


Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 docs -> ok=5
Flushed 5 d

In [11]:
from confluent_kafka import Producer
import json, time, uuid, random

p = Producer({"bootstrap.servers": "localhost:9092"})

def make_event(i):
    return {
        "event_type": "NEW",
        "order_id": str(uuid.uuid4()),
        "symbol": random.choice(["AAPL","MSFT","NVDA","TSLA"]),
        "side": random.choice(["BUY","SELL"]),
        "price": round(100 + random.random()*50, 2),
        "qty": random.choice([10,25,50,100]),
        "ts": time.time(),  # seconds since epoch
    }

for i in range(20):
    evt = make_event(i)
    p.produce("trade_orders", key=str(i).encode(), value=json.dumps(evt).encode())

p.flush(10)
print("Produced 20 events to 'trade_orders'")

Produced 20 events to 'trade_orders'


In [12]:
import requests, json

print("trade-orders count:", requests.get("http://localhost:9200/trade-orders/_count").json())
print("market-ticks count:", requests.get("http://localhost:9200/market-ticks/_count").json())

resp = requests.get("http://localhost:9200/trade-orders/_search", params={"size": 1, "sort": "@timestamp:desc"})
print(json.dumps(resp.json(), indent=2)[:1200])

trade-orders count: {'count': 2495, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}
market-ticks count: {'count': 0, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}
{
  "took": 15,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2495,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "trade-orders",
        "_id": "trade_orders-0-2896",
        "_score": null,
        "_source": {
          "event_type": "NEW",
          "order_id": "865045fb-87c5-40b7-a0a7-7aa5788cfce8",
          "symbol": "TSLA",
          "side": "SELL",
          "price": 477.2,
          "qty": 50,
          "ts": 1758161268.1144059,
          "@timestamp": "2025-09-18T02:07:48.114Z"
        },
        "sort": [
          1758161268114
        ]
      }
    ]
  }
}
