# Debug helpers

This notebook contains a bunch of scripts that can help debug some issues.

In [1]:
%%bash
# Install the required Python 3 dependencies
python3 -m pip install kafka-python  # type: ignore

Collecting kafka-python
  Downloading kafka_python-2.0.2-py2.py3-none-any.whl (246 kB)
Installing collected packages: kafka-python
Successfully installed kafka-python-2.0.2


## Delete a Kafka topic

In [1]:
from kafka.admin import KafkaAdminClient, NewTopic
from kafka.errors import TopicAlreadyExistsError

#
#  CHANGE THIS TO THE TOPIC YOU WANT TO DELETE
#
topic = 'clicks'

admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
admin_client.delete_topics([topic])

DeleteTopicsResponse_v3(throttle_time_ms=0, topic_error_codes=[(topic='clicks', error_code=0)])

## Show the last 10 messages on a Kafka topic

In [2]:
import json
from datetime import datetime

from kafka import KafkaConsumer, TopicPartition

client = "localhost:9092"
consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'],
                         enable_auto_commit=False,
                         auto_offset_reset='latest')

#
#  CHANGE THIS TO THE TOPIC YOU WANT TO DEBUG
#
topic = 'clicks'


tp = TopicPartition(topic,0)
consumer.assign([tp])

consumer.seek_to_end(tp)
lastOffset = consumer.position(tp)
print(f"Last offset: {lastOffset}")

tailOffset = lastOffset - 10
if tailOffset < 0:
    print("Topic has no messages!")
else :
    consumer.seek(tp, tailOffset)    

    for message in consumer:
        print(f"RAW MESSAGE VALUE: {message.value}")
        try:
            click = json.loads(message.value)
            print(json.dumps(click, indent=4))
        except:
            print("FAILED to decode message")
        print()
    #     ts = click.get('ts_ingest') or 0
    #     ts= datetime.fromtimestamp(ts/1000.0)
    #     print(f"timestamp: {ts_format}")
        if message.offset >= lastOffset - 1:
                break

Last offset: 100
RAW MESSAGE VALUE: b'{"visitor_platform": "mobile", "ts_ingest": 1639344546129, "article_title": "Francesco Gabriele", "visitor_country": "BE", "visitor_page_timer": 9132, "visitor_os": "android", "article": "https://en.wikipedia.org/wiki/Francesco_Gabriele", "visitor_page_height": 4657, "visitor_browser": "chrome"}'
{
    "visitor_platform": "mobile",
    "ts_ingest": 1639344546129,
    "article_title": "Francesco Gabriele",
    "visitor_country": "BE",
    "visitor_page_timer": 9132,
    "visitor_os": "android",
    "article": "https://en.wikipedia.org/wiki/Francesco_Gabriele",
    "visitor_page_height": 4657,
    "visitor_browser": "chrome"
}

RAW MESSAGE VALUE: b'{"visitor_platform": "mobile", "ts_ingest": 1639344546151, "article_title": "Kingdom of Hawaii", "visitor_country": "NL", "visitor_page_timer": 0, "visitor_os": "ios", "article": "https://en.wikipedia.org/wiki/Kingdom_of_Hawaii", "visitor_page_height": 0, "visitor_browser": "unknown"}'
{
    "visitor_platf

## Follow a Kafka topic

This script prints the content of messages of a Kafka topic when they get produced.

In [None]:
import json
from datetime import datetime
from IPython.display import clear_output

from kafka import KafkaConsumer, TopicPartition

client = "localhost:9092"
consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'],
                         enable_auto_commit=False,
                         auto_offset_reset='latest')

#
#  CHANGE THIS TO THE TOPIC YOU WANT TO DEBUG
#
topic = 'clicks'

consumer.subscribe(topic)

i = 0

for message in consumer:
    print(f"RAW MESSAGE VALUE: {message.value}")
    try:
        click = json.loads(message.value)
        print(json.dumps(click, indent=4))
    except:
        print("FAILED to decode message")
    print()
#     ts = click.get('ts_ingest') or 0
#     ts= datetime.fromtimestamp(ts/1000.0)
#     print(f"timestamp: {ts_format}")
    if i > 500:
        clear_output()
        i = 0
    i = i+1