In [1]:
import sys
sys.path.append('../')

In [2]:
import numpy as np
from src.utils import publish_message, read_messages
from src.utils import connect_kafka_producer, connect_kafka_consumer

In [3]:
kafka_servers = ['kafka:9093']
kafka_topics = ['clickstream', 'bookmarks']

In [4]:
def generate_clickstream(n_samples=5):
    sample = []
    for i in range(n_samples):
        key = f"user_{np.random.randint(1000)}"
        item = f"item_{np.random.randint(30)}"
        reaction = np.random.choice(['like', 'dislike', 'comment'])
        value = {item: reaction}
        sample.append((key, value))
    return sample

In [5]:
def generate_bookmarks(n_samples=5):
    sample = []
    for i in range(n_samples):
        key = f"user_{np.random.randint(1000)}"
        value = f"item_{np.random.randint(30)}"
        sample.append((key, value))
    return sample

In [6]:
producer = connect_kafka_producer(kafka_servers)

for key, value in generate_clickstream(5):
    publish_message(producer, kafka_topics[0], key, value)

Message 'user_532: {'item_29': 'comment'}' published successfully.
Message 'user_804: {'item_5': 'comment'}' published successfully.
Message 'user_996: {'item_17': 'comment'}' published successfully.
Message 'user_351: {'item_28': 'like'}' published successfully.
Message 'user_458: {'item_18': 'dislike'}' published successfully.


In [7]:
for key, value in generate_bookmarks(5):
    publish_message(producer, kafka_topics[1], key, value)

Message 'user_12: item_10' published successfully.
Message 'user_623: item_9' published successfully.
Message 'user_608: item_23' published successfully.
Message 'user_689: item_0' published successfully.
Message 'user_814: item_18' published successfully.


In [8]:
consumers = []
for topic in kafka_topics:
    print(f"adding consumer for {topic} topic")
    consumers.append(connect_kafka_consumer(topic, kafka_servers))

adding consumer for clickstream topic
adding consumer for bookmarks topic


In [9]:
for cons in consumers:
    print(f"__reading from {cons.subscription()} topics__")
    print(*read_messages(cons), sep='\n')

__reading from {'clickstream'} topics__
{'user_133': {'item_28': 'comment'}}
{'user_583': {'item_15': 'dislike'}}
{'user_201': {'item_17': 'like'}}
{'user_524': {'item_3': 'like'}}
{'user_739': {'item_20': 'like'}}
{'user_398': {'item_20': 'like'}}
{'user_721': {'item_24': 'comment'}}
{'user_532': {'item_29': 'comment'}}
{'user_804': {'item_5': 'comment'}}
{'user_351': {'item_28': 'like'}}
{'user_458': {'item_18': 'dislike'}}
{'user_205': {'item_6': 'like'}}
{'user_200': {'item_1': 'comment'}}
{'user_543': {'item_15': 'dislike'}}
{'user_152': {'item_7': 'like'}}
{'user_996': {'item_17': 'comment'}}
__reading from {'bookmarks'} topics__
{'user_681': 'item_4'}
{'user_895': 'item_3'}
{'user_997': 'item_28'}
{'user_477': 'item_13'}
{'user_145': 'item_26'}
{'user_978': 'item_7'}
{'user_478': 'item_24'}
{'user_715': 'item_1'}
{'user_663': 'item_20'}
{'user_980': 'item_28'}
{'user_865': 'item_9'}
{'user_832': 'item_23'}
{'user_72': 'item_5'}
{'user_719': 'item_9'}
{'user_224': 'item_27'}
{'us

In [10]:
for key, value in generate_clickstream(1):
    publish_message(producer, kafka_topics[0], key, value)

Message 'user_737: {'item_3': 'comment'}' published successfully.


In [11]:
for key, value in generate_bookmarks(10):
    publish_message(producer, kafka_topics[1], key, value)

Message 'user_168: item_29' published successfully.
Message 'user_518: item_12' published successfully.
Message 'user_817: item_6' published successfully.
Message 'user_716: item_14' published successfully.
Message 'user_704: item_26' published successfully.
Message 'user_65: item_27' published successfully.
Message 'user_872: item_6' published successfully.
Message 'user_97: item_14' published successfully.
Message 'user_746: item_4' published successfully.
Message 'user_970: item_24' published successfully.


In [12]:
for cons in consumers:
    print(f"__reading from {cons.subscription()} topics__")
    print(*read_messages(cons), sep='\n')

__reading from {'clickstream'} topics__
{'user_737': {'item_3': 'comment'}}
__reading from {'bookmarks'} topics__
{'user_168': 'item_29'}
{'user_518': 'item_12'}
{'user_817': 'item_6'}
{'user_716': 'item_14'}
{'user_704': 'item_26'}
{'user_65': 'item_27'}
{'user_872': 'item_6'}
{'user_97': 'item_14'}
{'user_746': 'item_4'}
{'user_970': 'item_24'}


In [13]:
if producer is not None:
    producer.close()
for cons in consumers:
    if cons is not None:
        cons.close()

In [15]:
for key, value in generate_clickstream(1):
    publish_message(producer, kafka_topics[0], key, value)

Exception in publishing message
RecordAccumulator is closed
