Client code

In [1]:
from collections import OrderedDict
import time
from random import randint
import zmq
import itertools
import logging
import sys
from threading import Thread

In [2]:

def run_producer():
    logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO)

    REQUEST_TIMEOUT = 2500
    REQUEST_RETRIES = 3
    SERVER_ENDPOINT = "tcp://localhost:5555"

    context = zmq.Context()

    logging.info("Connecting to server…")
    client = context.socket(zmq.REQ)
    client.connect(SERVER_ENDPOINT)
    stop=False
    for sequence in itertools.count():
        request = str(sequence).encode()
        logging.info("Sending (%s)", request)
        client.send(request)

        retries_left = REQUEST_RETRIES
        while True:
            
            # check if there is a result
            if (client.poll(REQUEST_TIMEOUT) & zmq.POLLIN) != 0:
                reply = client.recv()
                if int(reply) == sequence:
                    logging.info("Server replied OK (%s)", reply)
                    retries_left = REQUEST_RETRIES
                    break
                else:
                    logging.error("Malformed reply from server: %s", reply)
                    continue
            
            retries_left -= 1
            logging.warning("No response from server")
            # Socket is confused. Close and remove it.
            client.setsockopt(zmq.LINGER, 0)
            client.close()
            if retries_left == 0:
                logging.error("Was not able to send request")
                stop=True
                break
#                 sys.exit()

            logging.info("Reconnecting to server…")
            # Create new connection
            client = context.socket(zmq.REQ)
            client.connect(SERVER_ENDPOINT)
            logging.info("Resending (%s)", request)
            client.send(request)
        if stop:
            print("STOPPING CLIENT")
            break
            

Queue Code

In [3]:

def run_message_queue():
    HEARTBEAT_LIVENESS = 3     # 3..5 is reasonable
    HEARTBEAT_INTERVAL = 1.0   # Seconds

    #  Paranoid Pirate Protocol constants
    PPP_READY = b"\x01"      # Signals worker is ready
    PPP_HEARTBEAT = b"\x02"  # Signals worker heartbeat


    class Worker(object):
        def __init__(self, address):
            self.address = address
            self.expiry = time.time() + HEARTBEAT_INTERVAL * HEARTBEAT_LIVENESS

    class WorkerQueue(object):
        def __init__(self):
            self.queue = OrderedDict()

        def ready(self, worker):
            self.queue.pop(worker.address, None)
            self.queue[worker.address] = worker

        def purge(self):
            """Look for & kill expired workers."""
            t = time.time()
            expired = []
            for address, worker in self.queue.items():
                if t > worker.expiry:  # Worker expired
                    expired.append(address)
            for address in expired:
                print("W: Idle worker expired: %s" % address)
                self.queue.pop(address, None)

        def next(self):
            address, worker = self.queue.popitem(False)
            return address

    context = zmq.Context(1)

    frontend = context.socket(zmq.ROUTER) # ROUTER
    backend = context.socket(zmq.ROUTER)  # ROUTER
    frontend.bind("tcp://*:5555") # For clients
    backend.bind("tcp://*:5556")  # For workers

    poll_workers = zmq.Poller()
    poll_workers.register(backend, zmq.POLLIN)

    poll_both = zmq.Poller()
    poll_both.register(frontend, zmq.POLLIN)
    poll_both.register(backend, zmq.POLLIN)

    workers = WorkerQueue()

    heartbeat_at = time.time() + HEARTBEAT_INTERVAL

    while True:
        if len(workers.queue) > 0:
            poller = poll_both
        else:
            poller = poll_workers
        socks = dict(poller.poll(HEARTBEAT_INTERVAL * 1000))

        # Handle worker message
        if socks.get(backend) == zmq.POLLIN:
            # Use worker address for LRU routing
            frames = backend.recv_multipart()
            if not frames:
                break

            address = frames[0]
            workers.ready(Worker(address))

            # Validate control message, or return reply to client
            msg = frames[1:]
            if len(msg) == 1:
                if msg[0] not in (PPP_READY, PPP_HEARTBEAT):
                    print("E: Invalid message from worker: %s" % msg)
            else:
                frontend.send_multipart(msg)

            # Send heartbeats to idle workers if it's time
            if time.time() >= heartbeat_at:
                for worker in workers.queue:
                    msg = [worker, PPP_HEARTBEAT]
                    backend.send_multipart(msg)
                heartbeat_at = time.time() + HEARTBEAT_INTERVAL
        
        # passing message to backend
        if socks.get(frontend) == zmq.POLLIN:
            frames = frontend.recv_multipart()
            print("FRAMES", frames)
            if not frames:
                print("SERVER GOING DOWN")
                break

            frames.insert(0, workers.next())
            backend.send_multipart(frames)


        workers.purge()
    print("SERVER IS CLOSING")

Worker code

In [4]:


def run_consumer():
    HEARTBEAT_LIVENESS = 3
    HEARTBEAT_INTERVAL = 1
    INTERVAL_INIT = 1
    INTERVAL_MAX = 32

    #  Paranoid Pirate Protocol constants
    PPP_READY = b"\x01"      # Signals worker is ready
    PPP_HEARTBEAT = b"\x02"  # Signals worker heartbeat

    def worker_socket(context, poller):
        """Helper function that returns a new configured socket
           connected to the Paranoid Pirate queue"""
        worker = context.socket(zmq.DEALER) # DEALER
        identity = b"%04X-%04X" % (randint(0, 0x10000), randint(0, 0x10000))
        worker.setsockopt(zmq.IDENTITY, identity)
        poller.register(worker, zmq.POLLIN)
        worker.connect("tcp://localhost:5556")
        worker.send(PPP_READY)
        return worker


    context = zmq.Context(1)
    poller = zmq.Poller()

    liveness = HEARTBEAT_LIVENESS
    interval = INTERVAL_INIT

    heartbeat_at = time.time() + HEARTBEAT_INTERVAL

    worker = worker_socket(context, poller)
    cycles = 0
    while True:
        socks = dict(poller.poll(HEARTBEAT_INTERVAL * 1000))

        # Handle worker activity on backend
        if socks.get(worker) == zmq.POLLIN:
            #  Get message
            #  - 3-part envelope + content -> request
            #  - 1-part HEARTBEAT -> heartbeat
            frames = worker.recv_multipart()
            if not frames:
                break # Interrupted
            
            # get normal message
            if len(frames) == 3:
                # Simulate various problems, after a few cycles
                cycles += 1
                if cycles > 3 and randint(0, 5) == 0:
                    print("I: Simulating a crash")
                    break
                if cycles > 3 and randint(0, 5) == 0:
                    print("I: Simulating CPU overload")
                    time.sleep(3)
                print("I: Normal reply")
                worker.send_multipart(frames)
                liveness = HEARTBEAT_LIVENESS
                time.sleep(1)  # Do some heavy work
            # process heartbeat
            elif len(frames) == 1 and frames[0] == PPP_HEARTBEAT:
                print("I: Queue heartbeat")
                liveness = HEARTBEAT_LIVENESS
            # process wrong message
            else:
                print("E: Invalid message: %s" % frames)
            interval = INTERVAL_INIT
        # process silence
        else:
            liveness -= 1
            if liveness == 0:
                print("W: Heartbeat failure, can't reach queue")
                print("W: Reconnecting in %0.2fs..." % interval)
                time.sleep(interval)

                if interval < INTERVAL_MAX:
                    interval *= 2
                poller.unregister(worker)
                worker.setsockopt(zmq.LINGER, 0)
                worker.close()
                worker = worker_socket(context, poller)
                liveness = HEARTBEAT_LIVENESS
        # send heartbeat
        if time.time() > heartbeat_at:
            heartbeat_at = time.time() + HEARTBEAT_INTERVAL
            print("I: Sending Worker heartbeat")
            worker.send(PPP_HEARTBEAT)

In [5]:
from time import sleep

In [6]:
Thread(target=run_producer).start()
Thread(target=run_producer).start()
Thread(target=run_message_queue).start()
sleep(1)
Thread(target=run_consumer).start()
Thread(target=run_consumer).start()
Thread(target=run_consumer).start()



sleep(60)

INFO: Connecting to server…
INFO: Connecting to server…
INFO: Sending (b'0')
INFO: Sending (b'0')
INFO: Server replied OK (b'0')
INFO: Server replied OK (b'0')
INFO: Sending (b'1')
INFO: Sending (b'1')
INFO: Server replied OK (b'1')
INFO: Sending (b'2')


I: Queue heartbeatFRAMES [b'\x00\xb1\x16\x98\x98', b'', b'0']

I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x99', b'', b'0']
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x98', b'', b'1']
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x99', b'', b'1']
FRAMES [b'\x00\xb1\x16\x98\x98', b'', b'2']


INFO: Server replied OK (b'2')
INFO: Server replied OK (b'1')
INFO: Sending (b'3')
INFO: Sending (b'2')


I: Sending Worker heartbeatI: Sending Worker heartbeat

I: Normal reply
I: Normal reply
I: Sending Worker heartbeat
I: Queue heartbeat
FRAMES [b'\x00\xb1\x16\x98\x98', b'', b'3']
FRAMES [b'\x00\xb1\x16\x98\x99', b'', b'2']


INFO: Server replied OK (b'2')
INFO: Server replied OK (b'3')
INFO: Sending (b'3')
INFO: Sending (b'4')
INFO: Server replied OK (b'3')
INFO: Sending (b'4')


I: Sending Worker heartbeatI: Sending Worker heartbeat

I: Sending Worker heartbeat
I: Normal reply
I: Queue heartbeat
I: Normal reply
I: Queue heartbeat
FRAMES [b'\x00\xb1\x16\x98\x99', b'', b'3']
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x98', b'', b'4']
FRAMES [b'\x00\xb1\x16\x98\x99', b'', b'4']


INFO: Server replied OK (b'4')
INFO: Server replied OK (b'4')
INFO: Sending (b'5')
INFO: Sending (b'5')
INFO: Server replied OK (b'5')
INFO: Sending (b'6')


I: Sending Worker heartbeatI: Sending Worker heartbeat

I: Normal reply
I: Queue heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x99', b'', b'5']
FRAMES [b'\x00\xb1\x16\x98\x98', b'', b'5']
I: Sending Worker heartbeat
I: Queue heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x99', b'', b'6']


INFO: Server replied OK (b'5')
INFO: Server replied OK (b'6')
INFO: Sending (b'6')
INFO: Sending (b'7')
INFO: Server replied OK (b'6')
INFO: Sending (b'7')


I: Sending Worker heartbeatI: Sending Worker heartbeat

I: Queue heartbeat
I: Normal reply
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x98', b'', b'6']
FRAMES [b'\x00\xb1\x16\x98\x99', b'', b'7']
I: Sending Worker heartbeat
I: Queue heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x98', b'', b'7']
I: Sending Worker heartbeatI: Sending Worker heartbeat

I: Queue heartbeat
I: Simulating CPU overload
I: Simulating CPU overload
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat


INFO: Reconnecting to server…
INFO: Resending (b'7')
INFO: Reconnecting to server…
INFO: Resending (b'7')


FRAMES [b'\x00\xb1\x16\x98\x9a', b'', b'7']
FRAMES [b'\x00\xb1\x16\x98\x9b', b'', b'7']
I: Sending Worker heartbeat
I: Queue heartbeat
I: Normal replyI: Normal reply

I: Sending Worker heartbeat
I: Queue heartbeat


INFO: Reconnecting to server…
INFO: Resending (b'7')
INFO: Reconnecting to server…
INFO: Resending (b'7')
INFO: Server replied OK (b'7')
INFO: Sending (b'8')


I: Sending Worker heartbeatI: Sending Worker heartbeat

I: Queue heartbeat
I: Queue heartbeat
I: Simulating a crash
I: Queue heartbeat
I: Simulating CPU overload
FRAMES [b'\x00\xb1\x16\x98\x9c', b'', b'7']
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9d', b'', b'7']
FRAMES [b'\x00\xb1\x16\x98\x9c', b'', b'8']
I: Sending Worker heartbeat
I: Queue heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat


ERROR: Was not able to send request
INFO: Reconnecting to server…
INFO: Resending (b'8')
INFO: Server replied OK (b'8')
INFO: Sending (b'9')


STOPPING CLIENT
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'8']
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'9']
I: Normal reply


INFO: Server replied OK (b'9')
INFO: Sending (b'10')


I: Sending Worker heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'10']
I: Sending Worker heartbeat
I: Queue heartbeat
I: Queue heartbeat
I: Normal reply
I: Sending Worker heartbeat
I: Queue heartbeat
I: Queue heartbeat


INFO: Server replied OK (b'10')
INFO: Sending (b'11')
INFO: Server replied OK (b'11')
INFO: Sending (b'12')


I: Sending Worker heartbeat
I: Queue heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'11']
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'12']


INFO: Server replied OK (b'12')
INFO: Sending (b'13')


I: Sending Worker heartbeat
I: Queue heartbeat
I: Normal reply
I: Sending Worker heartbeat
I: Queue heartbeat
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'13']


INFO: Server replied OK (b'13')
INFO: Sending (b'14')
INFO: Server replied OK (b'14')
INFO: Sending (b'15')


I: Sending Worker heartbeatI: Sending Worker heartbeat

I: Queue heartbeat
I: Queue heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'14']
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'15']


INFO: Server replied OK (b'15')
INFO: Sending (b'16')
INFO: Server replied OK (b'16')
INFO: Sending (b'17')


I: Sending Worker heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'16']
I: Sending Worker heartbeat
I: Queue heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'17']


INFO: Server replied OK (b'17')
INFO: Sending (b'18')


I: Sending Worker heartbeat
I: Queue heartbeat
I: Normal reply
I: Sending Worker heartbeat
I: Queue heartbeat
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'18']


INFO: Server replied OK (b'18')
INFO: Sending (b'19')


I: Sending Worker heartbeatI: Sending Worker heartbeat

I: Queue heartbeat
I: Queue heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9e', b'', b'19']
I: Simulating a crash
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat


INFO: Reconnecting to server…
INFO: Resending (b'19')
INFO: Server replied OK (b'19')
INFO: Sending (b'20')


FRAMES [b'\x00\xb1\x16\x98\x9f', b'', b'19']
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9f', b'', b'20']


INFO: Server replied OK (b'20')
INFO: Sending (b'21')


I: Sending Worker heartbeat
I: Normal reply
FRAMES [b'\x00\xb1\x16\x98\x9f', b'', b'21']
I: Sending Worker heartbeat
I: Queue heartbeat
I: Simulating CPU overload


INFO: Reconnecting to server…
INFO: Resending (b'21')


FRAMES [b'\x00\xb1\x16\x98\xa0', b'', b'21']
I: Normal reply


INFO: Reconnecting to server…
INFO: Resending (b'21')


I: Sending Worker heartbeat
I: Queue heartbeat
I: Simulating CPU overload
FRAMES [b'\x00\xb1\x16\x98\xa1', b'', b'21']


ERROR: Was not able to send request


STOPPING CLIENT
I: Normal reply
I: Sending Worker heartbeat
I: Queue heartbeat
I: Queue heartbeat
I: Simulating CPU overload
I: Normal replyW: Idle worker expired: b'3CBB-8481'

I: Sending Worker heartbeat
I: Queue heartbeat
I: Queue heartbeat
I: Queue heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Wo

I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Worker heartbeat
I: Queue heartbeat
I: Sending Wo