Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spout and blocking connection? #85

Closed
dmitry-saritasa opened this issue Mar 26, 2017 · 3 comments
Closed

Spout and blocking connection? #85

dmitry-saritasa opened this issue Mar 26, 2017 · 3 comments

Comments

@dmitry-saritasa
Copy link

  1. I have a simple kafka spout.
  2. When messages are coming into kafka - topology works well
  3. When there are no messages for quite a while my local topology crashes with strange symptoms
import time

import config as cfg
from petrel import storm
from petrel.emitter import Spout

# enable loggers
log = cfg.config.loggers.kafka


class KafkaSpout(Spout):

    def __init__(self):
        self.stream = cfg.config.kafka
        # print(__file__)
        super().__init__(script=__file__)

    @classmethod
    def declareOutputFields(cls):
        return ['record']

    def nextTuple(self):
        try:
            record = next(self.stream)
            log.info("{}:{}:{}: key={} value={}".format(
                record.topic,
                record.partition,
                record.offset,
                record.key,
                record.value))
            storm.emit([record.value.decode()])
        finally:
            self.stream.commit()


def run():
    KafkaSpout().run()

so inside nextTuple I'm doing this record = next(self.stream), everything works perfect when messages are going inside kafka, but as soon as they're paused (i.e. no messages in kafka for 2 mins and then coming again) - storm process crashes with the following

681 [Thread-24] INFO  o.a.s.d.executor - BOLT ack TASK: 2 TIME:  TUPLE: source: spout:3, stream: default, id: {}, ["11
112222hello 8"]
Topology is running. Press ^C to stop it.
65505 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __sy
stem:-1, stream: __metrics_tick, id: {}, [60]
65510 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.task - Emitting: __acker __metrics [#object[org.apache.storm.metr
ic.api.IMetricsConsumer$TaskInfo 0x3b63e233 "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@3b63e233"] [#object[
org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x7b2fdc9e "[__emit-count = {}]"] #object[org.apache.storm.metri
c.api.IMetricsConsumer$DataPoint 0x5bb88d7d "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsCon
sumer$DataPoint 0x2d453da2 "[__receive = {arrival_rate_secs=0.10974539069359086, overflow=0, read_pos=-1, write_pos=0, 
sojourn_time_ms=9112.0, capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 
0x9133043 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0xc5dd6d3 "[__transfer-c
ount = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x13917140 "[__execute-latency = {}]"] #obj
ect[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x173de6bd "[__fail-count = {}]"] #object[org.apache.storm.m
etric.api.IMetricsConsumer$DataPoint 0x73c4d31 "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_p
os=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPo
int 0x65b5e84b "[__execute-count = {}]"]]]
65512 [Thread-21-__system-executor[-1 -1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: _
_system:-1, stream: __metrics_tick, id: {}, [60]
65516 [Thread-21-__system-executor[-1 -1]] INFO  o.a.s.d.task - Emitting: __system __metrics [#object[org.apache.storm.
metric.api.IMetricsConsumer$TaskInfo 0x177e2876 "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@177e2876"] [#obj
ect[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x532090c2 "[uptimeSecs = 65.567]"] #object[org.apache.storm
.metric.api.IMetricsConsumer$DataPoint 0x7f9d7b79 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsCon
sumer$DataPoint 0x9d4b20a "[__transfer-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x6
f8f06de "[newWorkerEvent = 1]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x387e3d6d "[__send-icon
nection = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x2cacdfa8 "[__fail-count = {}]"] #objec
t[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0xc47cd5 "[startTimeSecs = 1.490502217253E9]"] #object[org.apa
che.storm.metric.api.IMetricsConsumer$DataPoint 0x17ea33e5 "[__emit-count = {}]"] #object[org.apache.storm.metric.api.I
MetricsConsumer$DataPoint 0x5e36e120 "[memory/nonHeap = {unusedBytes=1163072, virtualFreeBytes=-60571841, initBytes=255
5904, committedBytes=61734912, maxBytes=-1, usedBytes=60571840}]"] #object[org.apache.storm.metric.api.IMetricsConsumer
$DataPoint 0x142d34d1 "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x372c
d055 "[__receive = {arrival_rate_secs=0.10967317394165386, overflow=0, read_pos=-1, write_pos=0, sojourn_time_ms=9118.0
, capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x719fa84e "[__transfe
r = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"]
 #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x18c978bc "[__execute-latency = {}]"] #object[org.apac
he.storm.metric.api.IMetricsConsumer$DataPoint 0x65bf4cc8 "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=
-1, write_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsCons
umer$DataPoint 0x34c08652 "[memory/heap = {unusedBytes=368639184, virtualFreeBytes=7267220688, initBytes=526385152, com
mittedBytes=559939584, maxBytes=7458521088, usedBytes=191300400}]"] #object[org.apache.storm.metric.api.IMetricsConsume
r$DataPoint 0x3891855c "[__execute-count = {}]"]]]
68059 [Thread-15-record-executor[2 2]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __sys
tem:-1, stream: __metrics_tick, id: {}, [60]
68061 [Thread-15-record-executor[2 2]] INFO  o.a.s.d.task - Emitting: record __metrics [#object[org.apache.storm.metric
.api.IMetricsConsumer$TaskInfo 0x6b23b2eb "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@6b23b2eb"] [#object[or
g.apache.storm.metric.api.IMetricsConsumer$DataPoint 0xf3c3a1d "[__emit-count = {}]"] #object[org.apache.storm.metric.a
pi.IMetricsConsumer$DataPoint 0x5e49d7e "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsConsume
r$DataPoint 0x5dedd09d "[__receive = {arrival_rate_secs=0.10346611484738748, overflow=0, read_pos=8, write_pos=9, sojou
rn_time_ms=9665.000000000002, capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x2b2f8521 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x45adbe8b "[__transfer-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x6403d42d "[__execute-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x57599908 "[__fail-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x8084f "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x2879a723 "[__execute-count = {}]"]]]
69131 [pool-52-thread-1] ERROR o.a.s.s.ShellSpout - Halting process: ShellSpout died. Command: [/bin/bash, setup_kafka_stream.sh], ProcessInfo pid:4115, name:spout exitCode:-1, errorString: 
java.lang.RuntimeException: subprocess heartbeat timeout
        at org.apache.storm.spout.ShellSpout$SpoutHeartbeatTimerTask.run(ShellSpout.java:311) [storm-core-1.0.2.jar:1.0
.2]
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_121]
        at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [?:1.8.0_121]
        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.
java:180) [?:1.8.0_121]
        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:29
4) [?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_121]
        at java.lang.Thread.run(Thread.java:745) [?:1.8.0_121]
69142 [pool-52-thread-1] ERROR o.a.s.d.executor - 
java.lang.RuntimeException: subprocess heartbeat timeout
        at org.apache.storm.spout.ShellSpout$SpoutHeartbeatTimerTask.run(ShellSpout.java:311) [storm-core-1.0.2.jar:1.0
.2]
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_121]
        at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [?:1.8.0_121]
        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.
java:180) [?:1.8.0_121]
        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:29
4) [?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_121]
        at java.lang.Thread.run(Thread.java:745) [?:1.8.0_121]
Shutting down local topology
69156 [Thread-11] INFO  o.a.s.d.nimbus - Shutting down master
2017-03-25 21:24:46,577 FATAL Unable to register shutdown hook because JVM is shutting down.
2017-03-25 21:24:46,581 FATAL Unable to register shutdown hook because JVM is shutting down.
69330 [Thread-11] INFO  o.a.s.zookeeper - closing zookeeper connection of leader elector.
69331 [Thread-11] INFO  o.a.s.d.nimbus - Shut down master
69334 [Thread-11] INFO  o.a.s.d.supervisor - Shutting down supervisor 1c78e734-baf4-49c9-9fd5-d30e32a1d469
69336 [Thread-7] INFO  o.a.s.event - Event manager interrupted
69337 [Thread-8] INFO  o.a.s.event - Event manager interrupted
69338 [Thread-11] INFO  o.a.s.d.supervisor - Shutting down 3f883fe9-b89f-4c21-b9de-41a56adcc97c:5658b591-8cb2-4454-b4dd-7bcc50262073
69339 [Thread-11] INFO  o.a.s.config - GET worker-user 5658b591-8cb2-4454-b4dd-7bcc50262073
69341 [Thread-11] INFO  o.a.s.process-simulator - Killing process 7690c156-4817-49e9-a342-3e41a1339b0b
69342 [Thread-11] INFO  o.a.s.d.worker - Shutting down worker test topology-1-1490502220 3f883fe9-b89f-4c21-b9de-41a56adcc97c 1027
69342 [Thread-11] INFO  o.a.s.d.worker - Terminating messaging context
69343 [Thread-11] INFO  o.a.s.d.executor - Shutting down executor record:[2 2]
69343 [Thread-14-disruptor-executor[2 2]-send-queue] INFO  o.a.s.util - Async loop interrupted!
69344 [Thread-15-record-executor[2 2]] INFO  o.a.s.util - Async loop interrupted!
69345 [Thread-11] INFO  o.a.s.d.executor - Shut down executor record:[2 2]
69346 [Thread-11] INFO  o.a.s.d.executor - Shutting down executor spout:[3 3]
69347 [Thread-16-disruptor-executor[3 3]-send-queue] INFO  o.a.s.util - Async loop interrupted!
69347 [Thread-24] ERROR o.a.s.d.executor - 
java.lang.RuntimeException: org.apache.storm.multilang.NoOutputException: Pipe to subprocess seems to be broken! No out
put read.
Serializer Exception:
(Unable to capture error stream)

        at org.apache.storm.utils.ShellProcess.readShellMsg(ShellProcess.java:122) ~[storm-core-1.0.2.jar:1.0.2]
        at org.apache.storm.task.ShellBolt$BoltReaderRunnable.run(ShellBolt.java:333) [storm-core-1.0.2.jar:1.0.2]
        at java.lang.Thread.run(Thread.java:745) [?:1.8.0_121]
Topology is running. Press ^C to stop it.
125505 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __s
ystem:-1, stream: __metrics_tick, id: {}, [60]
125507 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.task - Emitting: __acker __metrics [#object[org.apache.storm.met
ric.api.IMetricsConsumer$TaskInfo 0x7db68ecc "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@7db68ecc"] [#object
[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x5c687c9 "[__emit-count = {}]"] #object[org.apache.storm.metri
c.api.IMetricsConsumer$DataPoint 0x60761785 "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsCon
sumer$DataPoint 0x2a4b4c70 "[__receive = {arrival_rate_secs=0.10978153474585575, overflow=0, read_pos=0, write_pos=1, s
ojourn_time_ms=9109.0, capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0
x6342671b "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x528b4c75 "[__transfer-
count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x211a39e "[__execute-latency = {}]"] #obj
ect[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x72495760 "[__fail-count = {}]"] #object[org.apache.storm.m
etric.api.IMetricsConsumer$DataPoint 0x2c38c8d "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_p
os=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPo
int 0xaa38044 "[__execute-count = {}]"]]]
125512 [Thread-21-__system-executor[-1 -1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: 
__system:-1, stream: __metrics_tick, id: {}, [60]
125513 [Thread-21-__system-executor[-1 -1]] INFO  o.a.s.d.task - Emitting: __system __metrics [#object[org.apache.storm
.metric.api.IMetricsConsumer$TaskInfo 0x6bce066a "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@6bce066a"] [#ob
ject[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x7408ced7 "[uptimeSecs = 125.567]"] #object[org.apache.sto
rm.metric.api.IMetricsConsumer$DataPoint 0x44ca3412 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsC
onsumer$DataPoint 0x7045f2b2 "[__transfer-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 
0x6b234786 "[newWorkerEvent = 0]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x59ef4243 "[__send-i
connection = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x4c1097c8 "[__fail-count = {}]"] #ob
ject[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x131f2c9a "[GC/PSMarkSweep = {count=0, timeMs=0}]"] #objec
t[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x62715608 "[startTimeSecs = 1.490502217253E9]"] #object[org.a
pache.storm.metric.api.IMetricsConsumer$DataPoint 0x412fed5f "[__emit-count = {}]"] #object[org.apache.storm.metric.api
.IMetricsConsumer$DataPoint 0x9499c65 "[memory/nonHeap = {unusedBytes=1549616, virtualFreeBytes=-60775121, initBytes=25
55904, committedBytes=62324736, maxBytes=-1, usedBytes=60775120}]"] #object[org.apache.storm.metric.api.IMetricsConsume
r$DataPoint 0x63580a2a "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0xbf1
c15f "[__receive = {arrival_rate_secs=0.10969723562966213, overflow=0, read_pos=0, write_pos=1, sojourn_time_ms=9116.0,
 capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x358a806b "[__transfer
 = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] 
#object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x70e5880b "[GC/PSScavenge = {count=1, timeMs=7}]"] #obj
ect[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x53cfd567 "[__execute-latency = {}]"] #object[org.apache.st
orm.metric.api.IMetricsConsumer$DataPoint 0x56311d78 "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, w
rite_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$
DataPoint 0x3759460d "[memory/heap = {unusedBytes=504843952, virtualFreeBytes=7354142384, initBytes=526385152, committe
dBytes=609222656, maxBytes=7458521088, usedBytes=104378704}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$Dat
aPoint 0x6ba0253d "[__execute-count = {}]"]]]
Topology is running. Press ^C to stop it.
185505 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __s
ystem:-1, stream: __metrics_tick, id: {}, [60]
185506 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.task - Emitting: __acker __metrics [#object[org.apache.storm.met
ric.api.IMetricsConsumer$TaskInfo 0x5a1cfdd "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@5a1cfdd"] [#object[o
rg.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x3deb7a5e "[__emit-count = {}]"] #object[org.apache.storm.metric
.api.IMetricsConsumer$DataPoint 0x23523e8a "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsCons
umer$DataPoint 0x35653b04 "[__receive = {arrival_rate_secs=0.10978153474585575, overflow=0, read_pos=1, write_pos=2, so
journ_time_ms=9109.0, capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x
3e31d552 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x586e6140 "[__transfer-c
ount = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x6a7aa2a9 "[__execute-latency = {}]"] #obj
ect[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x765b19f9 "[__fail-count = {}]"] #object[org.apache.storm.m
etric.api.IMetricsConsumer$DataPoint 0x4670c925 "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_
pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataP
oint 0x6d3fd4ca "[__execute-count = {}]"]]]
69347 [Thread-24] ERROR o.a.s.d.executor - 
java.lang.RuntimeException: org.apache.storm.multilang.NoOutputException: Pipe to subprocess seems to be broken! No out
put read.
Serializer Exception:
(Unable to capture error stream)

        at org.apache.storm.utils.ShellProcess.readShellMsg(ShellProcess.java:122) ~[storm-core-1.0.2.jar:1.0.2]
        at org.apache.storm.task.ShellBolt$BoltReaderRunnable.run(ShellBolt.java:333) [storm-core-1.0.2.jar:1.0.2]
        at java.lang.Thread.run(Thread.java:745) [?:1.8.0_121]
Topology is running. Press ^C to stop it.
125505 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __s
ystem:-1, stream: __metrics_tick, id: {}, [60]
125507 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.task - Emitting: __acker __metrics [#object[org.apache.storm.met
ric.api.IMetricsConsumer$TaskInfo 0x7db68ecc "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@7db68ecc"] [#object
[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x5c687c9 "[__emit-count = {}]"] #object[org.apache.storm.metri
c.api.IMetricsConsumer$DataPoint 0x60761785 "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsCon
sumer$DataPoint 0x2a4b4c70 "[__receive = {arrival_rate_secs=0.10978153474585575, overflow=0, read_pos=0, write_pos=1, s
ojourn_time_ms=9109.0, capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0
x6342671b "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x528b4c75 "[__transfer-
count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x211a39e "[__execute-latency = {}]"] #obj
ect[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x72495760 "[__fail-count = {}]"] #object[org.apache.storm.m
etric.api.IMetricsConsumer$DataPoint 0x2c38c8d "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_p
os=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPo
int 0xaa38044 "[__execute-count = {}]"]]]
125512 [Thread-21-__system-executor[-1 -1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: 
__system:-1, stream: __metrics_tick, id: {}, [60]
125513 [Thread-21-__system-executor[-1 -1]] INFO  o.a.s.d.task - Emitting: __system __metrics [#object[org.apache.storm
.metric.api.IMetricsConsumer$TaskInfo 0x6bce066a "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@6bce066a"] [#ob
ject[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x7408ced7 "[uptimeSecs = 125.567]"] #object[org.apache.sto
rm.metric.api.IMetricsConsumer$DataPoint 0x44ca3412 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsC
onsumer$DataPoint 0x7045f2b2 "[__transfer-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 
0x6b234786 "[newWorkerEvent = 0]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x59ef4243 "[__send-i
connection = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x4c1097c8 "[__fail-count = {}]"] #ob
ject[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x131f2c9a "[GC/PSMarkSweep = {count=0, timeMs=0}]"] #objec
t[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x62715608 "[startTimeSecs = 1.490502217253E9]"] #object[org.a
pache.storm.metric.api.IMetricsConsumer$DataPoint 0x412fed5f "[__emit-count = {}]"] #object[org.apache.storm.metric.api
.IMetricsConsumer$DataPoint 0x9499c65 "[memory/nonHeap = {unusedBytes=1549616, virtualFreeBytes=-60775121, initBytes=25
55904, committedBytes=62324736, maxBytes=-1, usedBytes=60775120}]"] #object[org.apache.storm.metric.api.IMetricsConsume
r$DataPoint 0x63580a2a "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0xbf1
c15f "[__receive = {arrival_rate_secs=0.10969723562966213, overflow=0, read_pos=0, write_pos=1, sojourn_time_ms=9116.0,
 capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x358a806b "[__transfer
 = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] 
#object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x70e5880b "[GC/PSScavenge = {count=1, timeMs=7}]"] #obj
ect[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x53cfd567 "[__execute-latency = {}]"] #object[org.apache.st
orm.metric.api.IMetricsConsumer$DataPoint 0x56311d78 "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, w
rite_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$
DataPoint 0x3759460d "[memory/heap = {unusedBytes=504843952, virtualFreeBytes=7354142384, initBytes=526385152, committe
dBytes=609222656, maxBytes=7458521088, usedBytes=104378704}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$Dat
aPoint 0x6ba0253d "[__execute-count = {}]"]]]
Topology is running. Press ^C to stop it.
185505 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __s
ystem:-1, stream: __metrics_tick, id: {}, [60]
185506 [Thread-19-__acker-executor[1 1]] INFO  o.a.s.d.task - Emitting: __acker __metrics [#object[org.apache.storm.met
ric.api.IMetricsConsumer$TaskInfo 0x5a1cfdd "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@5a1cfdd"] [#object[o
rg.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x3deb7a5e "[__emit-count = {}]"] #object[org.apache.storm.metric
.api.IMetricsConsumer$DataPoint 0x23523e8a "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsCons
umer$DataPoint 0x35653b04 "[__receive = {arrival_rate_secs=0.10978153474585575, overflow=0, read_pos=1, write_pos=2, so
journ_time_ms=9109.0, capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x
3e31d552 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x586e6140 "[__transfer-c
ount = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x6a7aa2a9 "[__execute-latency = {}]"] #obj
ect[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x765b19f9 "[__fail-count = {}]"] #object[org.apache.storm.m
etric.api.IMetricsConsumer$DataPoint 0x4670c925 "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_
pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataP
oint 0x6d3fd4ca "[__execute-count = {}]"]]]
185510 [Thread-21-__system-executor[-1 -1]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: 
__system:-1, stream: __metrics_tick, id: {}, [60]
185512 [Thread-21-__system-executor[-1 -1]] INFO  o.a.s.d.task - Emitting: __system __metrics [#object[org.apache.storm
.metric.api.IMetricsConsumer$TaskInfo 0x46e9b1c3 "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@46e9b1c3"] [#ob
ject[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x18edaebb "[uptimeSecs = 185.565]"] #object[org.apache.sto
rm.metric.api.IMetricsConsumer$DataPoint 0x65f9ea85 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsC
onsumer$DataPoint 0xf9aa2ca "[__transfer-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0
x4767d53 "[newWorkerEvent = 0]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x719b9310 "[__send-ico
nnection = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x5b9cf264 "[__fail-count = {}]"] #obje
ct[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x14a972f "[GC/PSMarkSweep = {count=0, timeMs=0}]"] #object[o
rg.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x3661feb0 "[startTimeSecs = 1.490502217253E9]"] #object[org.apac
he.storm.metric.api.IMetricsConsumer$DataPoint 0x1bcfe87f "[__emit-count = {}]"] #object[org.apache.storm.metric.api.IM
etricsConsumer$DataPoint 0x27fc26b0 "[memory/nonHeap = {unusedBytes=1310256, virtualFreeBytes=-61211089, initBytes=2555
904, committedBytes=62521344, maxBytes=-1, usedBytes=61211088}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$
DataPoint 0x72da2e8f "[__process-latency = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x4b77c
a1a "[__receive = {arrival_rate_secs=0.10970927043335162, overflow=0, read_pos=1, write_pos=2, sojourn_time_ms=9115.0, 
capacity=1024, population=1}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x2f1f5f1b "[__transfer 
= {arrival_rate_secs=0.0, overflow=0, read_pos=-1, write_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #
object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x699025ee "[GC/PSScavenge = {count=0, timeMs=0}]"] #obje
ct[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x4a9dc005 "[__execute-latency = {}]"] #object[org.apache.sto
rm.metric.api.IMetricsConsumer$DataPoint 0x3f4ff456 "[__sendqueue = {arrival_rate_secs=0.0, overflow=0, read_pos=-1, wr
ite_pos=-1, sojourn_time_ms=0.0, capacity=1024, population=0}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$D
ataPoint 0x6afd8323 "[memory/heap = {unusedBytes=434950544, virtualFreeBytes=7284248976, initBytes=526385152, committed
Bytes=609222656, maxBytes=7458521088, usedBytes=174272112}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$Data
Point 0x22a74c7 "[__execute-count = {}]"]]]
193439 [Thread-17-spout-executor[3 3]] INFO  o.a.s.d.task - Emitting: spout default ["11112222hello 0"]
193440 [Thread-17-spout-executor[3 3]] INFO  o.a.s.d.executor - TRANSFERING tuple [dest: 2 tuple: source: spout:3, stre
am: default, id: {}, ["11112222hello 0"]]
193444 [Thread-17-spout-executor[3 3]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __sys
tem:-1, stream: __tick, id: {}, [30]
193444 [Thread-17-spout-executor[3 3]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __sys
tem:-1, stream: __tick, id: {}, [30]
193444 [Thread-17-spout-executor[3 3]] INFO  o.a.s.d.executor - Processing received message FOR -2 TUPLE: source: __sys
tem:-1, stream: __metrics_tick, id: {}, [60]
193446 [Thread-17-spout-executor[3 3]] INFO  o.a.s.d.task - Emitting: spout __metrics [#object[org.apache.storm.metric.
api.IMetricsConsumer$TaskInfo 0x1de378e7 "org.apache.storm.metric.api.IMetricsConsumer$TaskInfo@1de378e7"] [#object[org
.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x68866270 "[__complete-latency = {}]"] #object[org.apache.storm.me
tric.api.IMetricsConsumer$DataPoint 0x26dda891 "[__emit-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsu
mer$DataPoint 0x289b9b41 "[__receive = {arrival_rate_secs=0.02237970906378217, overflow=6, read_pos=-1, write_pos=3, so
journ_time_ms=178733.3333333333, capacity=1024, population=4}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$D
ataPoint 0x2eb9eaf5 "[__ack-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x757788b0 "[_
_transfer-count = {}]"] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x3ea52faf "[__fail-count = {}]"
] #object[org.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x4a90eaa9 "[__sendqueue = {arrival_rate_secs=0.007475
629447999521, overflow=1, read_pos=8, write_pos=9, sojourn_time_ms=133768.0, capacity=1024, population=1}]"] #object[or
g.apache.storm.metric.api.IMetricsConsumer$DataPoint 0x2b1f5f4c "[__skipped-max-spout = 0]"] #object[org.apache.storm.m
etric.api.IMetricsConsumer$DataPoint 0x62647097 "[__skipped-inactive = 0]"] #object[org.apache.storm.metric.api.IMetric
sConsumer$DataPoint 0x752be323 "[__skipped-throttle = 0]"]]]
193446 [Thread-17-spout-executor[3 3]] INFO  o.a.s.util - Async loop interrupted!

and then my kafka server shows this

[2017-03-25 21:27:20,695] INFO [GroupCoordinator 0]: Group trivver_persistence generation 1 is dead and removed (kafka.coordinator.GroupCoordinator)


so that the process is dead.

Any ideas why?

@barrywhart
Copy link
Contributor

I can't help with Storm problems. I suggest raising this issue with the Apache Storm community. There is probably a forum or mailing list.

@dmitry-saritasa
Copy link
Author

Just in case someone runs into the same problem, solution is

@property
    def kafka(self):
        ktopics = json.loads(self.config.get('KAFKA', 'topics'))
        kgroup = self.config.get('KAFKA', 'group')
        kservers = json.loads(self.config.get('KAFKA', 'servers'))
        # this is very important setting
        # if you don't use it or make it very high - then
        # apache storm will be killing the worker, due to 'heartbeat'
        # problem, as getting message with kafka-python is blocking
        # operation, i.e.
        # self.stream = cfg.config.kafka
        # record = next(self.stream)
        # that will block all I/O for the time defined in
        # consumer_timeout_ms
        # see more info here:
        # https://github.com/dpkp/kafka-python/blob/master/kafka/consumer/group.py
        ktimeout = self.config.getint('KAFKA', 'consumer_timeout_ms')
        consumer = KafkaConsumer(
            *ktopics,
            group_id=kgroup,
            bootstrap_servers=kservers,
            consumer_timeout_ms=ktimeout,
            value_deserializer=umsgpack.unpackb
        )
        return consumer

so use consumer_timeout_ms to avoid blocking I/O and then simply catch StopIteration exception as shown below

# ---------------------------------------------------
# kafka stream spout using kafka-stream library
# ---------------------------------------------------
import time

import config as cfg
from petrel import storm
from petrel.emitter import Spout

# enable loggers
log = cfg.config.loggers.storm


class KafkaSpout(Spout):

    def __init__(self):
        self.stream = cfg.config.kafka
        super().__init__(script=__file__)

    @classmethod
    def declareOutputFields(cls):
        return ['topic', 'record']

    def nextTuple(self):
        try:
            record = next(self.stream)
            log.debug("{}:{}:{}: key={} value={}".format(
                record.topic,
                record.partition,
                record.offset,
                record.key,
                record.value))
            storm.emit([record.topic, record.value])
        except StopIteration:
            pass


def run():
    KafkaSpout().run()

@barrywhart
Copy link
Contributor

Good tip -- thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants