Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Segfault due DB backend crash #853

Closed
46labs opened this issue Apr 11, 2016 · 4 comments
Closed

Segfault due DB backend crash #853

46labs opened this issue Apr 11, 2016 · 4 comments
Assignees
Labels
Milestone

Comments

@46labs
Copy link

46labs commented Apr 11, 2016

Compiled with QM_MALLOC and Debug

Version: opensips 2.3.0-dev (x86_64/linux)
flags: STATS: On, DISABLE_NAGLE, USE_MCAST, SHM_MMAP, PKG_MALLOC, QM_MALLOC, DBG_MALLOC, FAST_LOCK-ADAPTIVE_WAIT
ADAPTIVE_WAIT_LOOPS=1024, MAX_RECV_BUFFER_SIZE 262144, MAX_LISTEN 16, MAX_URI_SIZE 1024, BUF_SIZE 65535
poll method support: poll, epoll_lt, epoll_et, sigio_rt, select.
git revision: 0546c94

Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x00000000004a622e in internal_search_ID_avp (avp=0x3022202c22322220, id=150, flags=0) at usr_avp.c:318

318 if ( id==avp->id && (flags==0 || (flags&avp->flags))) {
(gdb) bt full
#0 0x00000000004a622e in internal_search_ID_avp (avp=0x3022202c22322220, id=150, flags=0) at usr_avp.c:318

No locals.
#1 0x00000000004a638e in search_first_avp (flags=0, id=150, val=0x0, start=0x0) at usr_avp.c:356

    head = 0x7f1e0fb4fce8
    avp = 0x7f1e1918db00
    __FUNCTION__ = "search_first_avp"
    __PRETTY_FUNCTION__ = "search_first_avp"

#2 0x00000000004a6550 in destroy_avps (flags=0, name=150, all=1) at usr_avp.c:408

    avp = 0x7f1e1918db00
    n = 1

#3 0x0000000000452bb8 in pv_set_avp (msg=0x7f1fe11cc2e0, param=0x7f1fdee21698, op=53, val=0x7ffc24b18190) at pvar.c:2422

    avp_name = 150
    avp_val = {n = 6, s = {s = 0x7f1f00000006 "", len = 0}}
    flags = 0
    name_type = 0
    idx = 0
    idxf = 0
    __FUNCTION__ = "pv_set_avp"

#4 0x00000000004582d4 in pv_set_value (msg=0x7f1fe11cc2e0, sp=0x7f1fdee21680, op=53, value=0x7ffc24b18190) at pvar.c:4174

    pv_msg = 0x7f1fe11cc2e0
    __FUNCTION__ = "pv_set_value"

#5 0x00000000004123e5 in do_assign (msg=0x7f1fe11cc2e0, a=0x7f1fdee219c8) at action.c:274

    ret = 1
    val = {rs = {s = 0x7f1fdee21850 "1", len = 1}, ri = 0, flags = 4}
    dspec = 0x7f1fdee21680
    __FUNCTION__ = "do_assign"

#6 0x000000000041a9ba in do_action (a=0x7f1fdee219c8, msg=0x7f1fe11cc2e0) at action.c:1957

    increment = 32543
    decrement = -556369096
    val_number = 32543
    j = 32542
    cdb_res = 0x669d
    val_s = {s = 0x2500000038 <error: Cannot access memory at address 0x2500000038>, len = -556630184}
    cdb_reply = 0x7f1ddd89f860 <faked_req>
    aux = {s = 0x2500000038 <error: Cannot access memory at address 0x2500000038>, len = -556630184}
    i = 105764392
    key_number = 26269
    it = 0x7f1fded6c1b0
    avp_val = {n = 615616304, s = {s = 0x7ffc24b18f30 "\020", len = 0}}
    avp_name = {n = 56, s = {s = 0x2500000038 <error: Cannot access memory at address 0x2500000038>, len = -556630184}}
    avp_type = 36656
    ret = -5
    v = 0
    sec = 615613024
    usec = 32764
    to = 0x669d
    p = 0x100411660
    tmp = 0x669d <error: Cannot access memory at address 0x669d>
    new_uri = 0x411660 <_start> "1\355I\211\321^H\211\342H\203\344\360PTI\307\300pPS"
    end = 0x411660 <_start> "1\355I\211\321^H\211\342H\203\344\360PTI\307\300pPS"
    crt = 0x7ffc24b182b0 "\235f"
    len = -555220992
    i = 11
    user = 0
    expires = 0
    vals = {{s = 0x7f1ddd89f860 <faked_req> "TC)", len = -556272944}, {s = 0x7f1fe11cc2e0 "\\C)", len = -556348592}, {s = 0x669d <error: Cannot access memory at address 0x669d>, len = 4265568}, {s = 0x7ffc24b18390 "\340\203\261$", len = 4658670}, {
        s = 0x669d <error: Cannot access memory at address 0x669d>, len = 0}}
    result = {s = 0x7ffc24b18f30 "\020", len = 0}
    uri = {user = {s = 0x7ffc24b18f30 "\020", len = 0}, passwd = {s = 0x7ffc24b183a0 "\340\203\261$\374\177", len = 1}, host = {s = 0x7f1ddd89f860 <faked_req> "TC)", len = -556240704}, port = {s = 0x24b183e0 <error: Cannot access memory at address 0x24b183e0>, len = -556200064}, params = {
        s = 0x7ffc24b183e0 "\360\212\261$\374\177", len = 4267638}, headers = {s = 0x7f1ddd89f860 <faked_req> "TC)", len = -556240704}, port_no = 63584, proto = 56713, type = 32541, transport = {s = 0xded7cc08 <error: Cannot access memory at address 0xded7cc08>, len = 26269}, ttl = {
        s = 0x411660 <_start> "1\355I\211\321^H\211\342H\203\344\360PTI\307\300pPS", len = 615615216}, user_param = {s = 0x414acb <do_action+8024> "\211\005{\v:", len = 0}, maddr = {s = 0x7f1fe11d5f40 "\003", len = -578160544}, method = {s = 0x7f1fded82470 "\a", len = 5}, lr = {s = 0x7f1fded5b600 "",
        len = -556419584}, r2 = {s = 0x24b18490 <error: Cannot access memory at address 0x24b18490>, len = 4896456}, gr = {s = 0x0, len = 5642374}, transport_val = {s = 0x7ffc24b18608 "\200\257S", len = 26269}, ttl_val = {s = 0x411660 <_start> "1\355I\211\321^H\211\342H\203\344\360PTI\307\300pPS",

---Type to continue, or q to quit---
len = 615616304}, user_param_val = {s = 0x0, len = 615613600}, maddr_val = {s = 0x411fa9 <run_action_list+52> "\211E\364\203", <incomplete sequence \364>, len = -518192592}, method_val = {s = 0x7f1fdee31c60 "\020", len = 615613960}, lr_val = {s = 0x0, len = 615613664}, r2_val = {
s = 0x411e76 <run_actions+315> "\211E\354\213\005\311\067:", len = -518192592}, gr_val = {s = 0x7f1fdee31c60 "\020", len = 4265568}, u_name = {{s = 0x1008065d4 <error: Cannot access memory at address 0x1008065d4>, len = 26269}, {
s = 0x411660 <_start> "1\355I\211\321^H\211\342H\203\344\360PTI\307\300pPS", len = 615613712}, {s = 0x412076 <run_top_route+83> "\213\005\314\065:", len = 470247240}, {s = 0x7f1dde4eb000 "\376(V", len = 470247384}, {s = 0x7f1dde4eb000 "\376(V", len = 4265568}, {
s = 0x524b18f30 <error: Cannot access memory at address 0x524b18f30>, len = 194677552}, {s = 0x7f1fe11d0d68 "0", len = -556634096}, {s = 0x4ac8dd <qm_free+2391> "H\203\304P[A\A]A^]\303UH\211\345SH\201", <incomplete sequence \354\210>, len = -518192640}, {s = 0x7f1fded272c0 "",
len = -556633408}, {s = 0x7ffc24b185c0 "@\206\261$\v", len = 4896456}}, u_val = {{s = 0x116de4eb000 <error: Cannot access memory at address 0x116de4eb000>, len = 5642374}, {s = 0x56a7b0 "parser/parse_to.c", len = 48}, {s = 0x7f1fe11d0cd8 "0", len = -556634096}, {
s = 0x669d <error: Cannot access memory at address 0x669d>, len = 144}, {s = 0x7f1fe11cea20 "X", len = -556634096}, {s = 0x669d <error: Cannot access memory at address 0x669d>, len = 615614016}, {s = 0x7f1fded274c8 "", len = -556632888}, {s = 0x7ffc24b18640 "\206\030V", len = 4896456}, {
s = 0x16024b18650 <error: Cannot access memory at address 0x16024b18650>, len = 5642374}, {s = 0x7f1fe11d3470 "(\001", len = -556634096}}, u_params_no = 44928}
next_hop = {user = {s = 0x2500000038 <error: Cannot access memory at address 0x2500000038>, len = -556630184}, passwd = {s = 0x7f1fded27f58 "", len = 615614096}, host = {s = 0x4ab6c8 <split_frag+304> "\270", len = -556634096}, port = {s = 0x561886 "fragm. from qm_malloc", len = 5520781}, params = {
s = 0x48 <error: Cannot access memory at address 0x48>, len = -518179896}, headers = {s = 0x7f1fded27010 "h\rV", len = 26269}, port_no = 392, proto = 0, type = ERROR_URI_T, transport = {s = 0xc2e11d3440 <error: Cannot access memory at address 0xc2e11d3440>, len = 5642374}, ttl = {
s = 0x7f1ddd694b93 "t_msgbuilder.c", len = 400}, user_param = {s = 0x7f1e1dcebdb8 "\220\001", len = -565268480}, maddr = {s = 0x669d <error: Cannot access memory at address 0x669d>, len = 0}, method = {s = 0x7f1dde4eb000 "\376(V", len = -565263216}, lr = {
s = 0x669d <error: Cannot access memory at address 0x669d>, len = -518180048}, r2 = {s = 0x7f1fded27010 "h\rV", len = -518179896}, gr = {s = 0x7f1fded27010 "h\rV", len = 4265568}, transport_val = {s = 0x3a24b18f30 <error: Cannot access memory at address 0x3a24b18f30>, len = -556628000},
ttl_val = {s = 0x7f1fded287e0 "", len = 615614352}, user_param_val = {s = 0x4ac8dd <qm_free+2391> "H\203\304P[A\A]A^]\303UH\211\345SH\201", <incomplete sequence \354\210>, len = 50}, maddr_val = {s = 0xf41dcebdb8 <error: Cannot access memory at address 0xf41dcebdb8>, len = -580300567},
method_val = {s = 0x7f1ddd694b93 "t_msgbuilder.c", len = -518179848}, lr_val = {s = 0x7f1fded27010 "h\rV", len = -518179896}, r2_val = {s = 0x1d0 <error: Cannot access memory at address 0x1d0>, len = -518179728}, gr_val = {s = 0x7f1fe11d3330 "8", len = 26269}, u_name = {{s = 0x0, len = 615616304},
{s = 0x0, len = 615614736}, {s = 0x7f1ddd67c1e5 <build_local+3897> "H\213\205(\377\377\377\353AH\213\205\b\377\377\377H\211\307\350\223\362\374\377H\213\005\224\034"", len = 373660920}, {s = 0x7f1e0d097df8 "", len = 615614448}, {
s = 0x7f1dd8f9d422 <tmcb_func+193> "\311\303UH\211\345AUATSH\203\354\030H\211}\330\353\021H\213E؋@\b\215P\377H\213E؉P\bH\213E\330H\213\020H\213E؋@\bH\230H\203\350\001H\001\320\017\266", len = 5653904}, {s = 0x7ffc24b18850 "\300\274\316\035\036\177", len = 0}, {
s = 0x7f1feee2362f <sendto+63> "H\211\320H\203\304\bH=\001\360\377\377s\001\303H\213\r"(,", len = 0}, {s = 0x5230f8 <proto_udp_send+98> "\211E܃}\334\377\017\205e\001", len = 615614608}, {s = 0x0, len = 0}, {s = 0x0, len = 218726384}}, u_val = {{
s = 0x18c00000000 <error: Cannot access memory at address 0x18c00000000>, len = 500088296}, {s = 0x7f1fdee82d08 "\f", len = 615614560}, {s = 0x18c00000010 <error: Cannot access memory at address 0x18c00000010>, len = 615614608}, {s = 0x669d <error: Cannot access memory at address 0x669d>,
len = 500088000}, {s = 0x7f1dde4eb000 "\376(V", len = 500088248}, {s = 0x7f1dde4eb000 "\376(V", len = 4265568}, {s = 0x3224b18f30 <error: Cannot access memory at address 0x3224b18f30>, len = 476739136}, {s = 0x7f1dde4ec490 "", len = 615614736}, {
s = 0x4ac8dd <qm_free+2391> "H\203\304P[A\A]A^]\303UH\211\345SH\201", <incomplete sequence \354\210>, len = 500088296}, {s = 0x13b0000018c <error: Cannot access memory at address 0x13b0000018c>, len = -580294192}}, u_params_no = 22129}
u = 0x4715ee <eval_expr+207>
port = 0
cmatch = -518208800
aitem = 0x7ffc00000000
adefault = 0x0
spec = 0x669d
model = 0x411660 <_start>
val = {rs = {s = 0x669d <error: Cannot access memory at address 0x669d>, len = -556349112}, ri = 615614992, flags = 32764}
pve = 0x7f1fe11cc2e0
name_s = {s = 0x7ffc24b18300 "\020\212\261$\374\177", len = 4267945}
start = {tv_sec = 139774897472224, tv_usec = 139774859331616}
end_time = 1
aux_counter = 4658670
FUNCTION = "do_action"
#7 0x0000000000411fa9 in run_action_list (a=0x7f1fdee219c8, msg=0x7f1fe11cc2e0) at action.c:172

    ret = -1
    t = 0x7f1fdee219c8

#8 0x0000000000411e76 in run_actions (a=0x7f1fdee219c8, msg=0x7f1fe11cc2e0) at action.c:137

    ret = 32541
    __FUNCTION__ = "run_actions"

#9 0x0000000000412076 in run_top_route (a=0x7f1fdee219c8, msg=0x7f1fe11cc2e0) at action.c:204

    bk_action_flags = 0
    bk_rec_lev = 0
    ret = 0

#10 0x00007f1ddd6848dd in reply_received (p_msg=0x7f1fe11cc2e0) at t_reply.c:1449

    msg_status = 503
    last_uac_status = 100
    branch = 0
    reply_status = -518190008
    timer = 2
    cancel_bitmap = 0
    uac = 0x7f1e0d097fd0
    t = 0x7f1e0d097df8
    backup_list = 0x7d2380 <global_avps>
    has_reply_route = 1
    __FUNCTION__ = "reply_received"

#11 0x000000000042bbe5 in forward_reply (msg=0x7f1fe11cc2e0) at forward.c:467

    new_buf = 0x0
    to = 0x0
    new_len = 8335136
    mod = 0x7f1fded5df50
    proto = 5566844
    id = 0

---Type to continue, or q to quit---
send_sock = 0x3007f2f41
s = 0x7f1fe11cc2e8 "\002"
len = 0
FUNCTION = "forward_reply"
#12 0x00000000004607e1 in receive_msg (buf=0x7f2f20 "SIP/2.0 503 Service Unavailable\r\nRecord-Route: sip:10.0.2.67;lr;ftag=308BC2CA-570BF2440009F56F-7F7F0700;did=4ee.5cf23853\r\nVia: SIP/2.0/UDP 10.0.2.67:5060;branch=z9hG4bK2066.9dcf6462.0\r\nVia: SIP/2.0/"..., len=515, rcv_info=0x7ffc24b18b90,

existing_context=0x0) at receive.c:257
    ctx = 0x7f1fe11d3cf8
    msg = 0x7f1fe11cc2e0
    start = {tv_sec = 139766995899344, tv_usec = 139766260496528}
    rc = 3
    tmp = 0xb224b18f30 <error: Cannot access memory at address 0xb224b18f30>
    in_buff = {s = 0x7f2f20 <buf> "SIP/2.0 503 Service Unavailable\r\nRecord-Route: <sip:10.0.2.67;lr;ftag=308BC2CA-570BF2440009F56F-7F7F0700;did=4ee.5cf23853>\r\nVia: SIP/2.0/UDP 10.0.2.67:5060;branch=z9hG4bK2066.9dcf6462.0\r\nVia: SIP/2.0/"..., len = 515}
    __FUNCTION__ = "receive_msg"

#13 0x000000000052306f in udp_read_req (si=0x7f1fdee82d08, bytes_read=0x7ffc24b18c48) at net/proto_udp/proto_udp.c:190

    ri = {src_ip = {af = 2, len = 4, u = {addrl = {139763654983690, 139767348568528}, addr32 = {1124204554, 32541, 522822096, 32542}, addr16 = {10, 17154, 32541, 0, 41424, 7977, 32542, 0}, addr = "\n\000\002C\035\177\000\000С)\037\036\177\000"}}, dst_ip = {af = 2, len = 4, u = {addrl = {1124204554, 0},
          addr32 = {1124204554, 0, 0, 0}, addr16 = {10, 17154, 0, 0, 0, 0, 0, 0}, addr = "\n\000\002C", '\000' <repeats 11 times>}}, src_port = 5062, dst_port = 5060, proto = 1, proto_reserved1 = 0, proto_reserved2 = 0, src_su = {s = {sa_family = 2,
          sa_data = "\023\306\n\000\002C\000\000\000\000\000\000\000"}, sin = {sin_family = 2, sin_port = 50707, sin_addr = {s_addr = 1124204554}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 2, sin6_port = 50707, sin6_flowinfo = 1124204554, sin6_addr = {__in6_u = {
              __u6_addr8 = "\000\000\000\000\000\000\000\000С)\037\036\177\000", __u6_addr16 = {0, 0, 0, 0, 41424, 7977, 32542, 0}, __u6_addr32 = {0, 0, 522822096, 32542}}}, sin6_scope_id = 4294967295}}, bind_address = 0x7f1fdee82d08}
    len = 515
    buf = "SIP/2.0 503 Service Unavailable\r\nRecord-Route: <sip:10.0.2.67;lr;ftag=308BC2CA-570BF2440009F56F-7F7F0700;did=4ee.5cf23853>\r\nVia: SIP/2.0/UDP 10.0.2.67:5060;branch=z9hG4bK2066.9dcf6462.0\r\nVia: SIP/2.0/"...
    tmp = 0x0
    fromlen = 16
    p = 0x669d
    msg = {s = 0x7f2f20 <buf> "SIP/2.0 503 Service Unavailable\r\nRecord-Route: <sip:10.0.2.67;lr;ftag=308BC2CA-570BF2440009F56F-7F7F0700;did=4ee.5cf23853>\r\nVia: SIP/2.0/UDP 10.0.2.67:5060;branch=z9hG4bK2066.9dcf6462.0\r\nVia: SIP/2.0/"..., len = 515}
    __FUNCTION__ = "udp_read_req"

#14 0x0000000000519488 in handle_io (fm=0x7f1fdee988e0, idx=0, event_type=1) at net/net_udp.c:259

    n = 0
    read = 1
    __FUNCTION__ = "handle_io"

#15 0x0000000000517dc3 in io_wait_loop_epoll (h=0x803a40 <_worker_io>, t=1, repeat=0) at net/../io_wait_loop.h:221

    ret = 1
    n = 1
    r = 0
    e = 0x7f1fdee988e0
    __FUNCTION__ = "io_wait_loop_epoll"

#16 0x00000000005197e7 in udp_rcv_loop (si=0x7f1fdee82d08) at net/net_udp.c:308

    __FUNCTION__ = "udp_rcv_loop"

#17 0x0000000000519d8d in udp_start_processes (chd_rank=0x7b60a8 <chd_rank>, startup_done=0x7f1ddea170f0) at net/net_udp.c:372

    si = 0x7f1fdee82d08
    load_p = 0x7f1ddea17158
    pid = 0
    i = 18
    p = 1
    __FUNCTION__ = "udp_start_processes"

#18 0x0000000000430a14 in main_loop () at main.c:671

    chd_rank = 19
    startup_done = 0x7f1ddea170f0
    __FUNCTION__ = "main_loop"

#19 0x000000000043334d in main (argc=16, argv=0x7ffc24b18f38) at main.c:1252

    cfg_log_stderr = 1
    cfg_stream = 0x83e010
    c = -1
    r = 0
    tmp = 0x7ffc24b19e36 ""
    tmp_len = 32543
    port = -277766144
    proto = 32543
    options = 0x540ab8 "f:cCm:M:b:l:n:N:rRvdDFETSVhw:t:u:g:P:G:W:o:"
    ret = -1
    seed = 91945615
    rfd = 4
    __FUNCTION__ = "main"
@46labs
Copy link
Author

46labs commented Apr 12, 2016

Further inspection showed that the segfault was caused by a crashed mysql server in the backend. This caused all opensips' mysql connections to drop at the same time, and thus triggered opensips segfault.
Im not sure there needs to be handling in opensips for this, or if this bug should just be closed.

@liviuchircu
Copy link
Member

It seems SHM memory gets overwritten for some reason, corrupting that avp pointer with the string 0" ,"2". Could you provide access to the corefile? Maybe we can print more memory in that area and figure out what it actually represents.

@bogdan-iancu
Copy link
Member

It should not crash, even if the DB backend died . For sure this is the result of a bogus error handling in mysql ops.
It looks like the stack got corrupted (this is why the QM_MALLOC did not help) - do you still have a corefile and binary ?

@bogdan-iancu bogdan-iancu changed the title Segfault in usr_avp.c:318 Segfault due DB backend crash Apr 20, 2016
@bogdan-iancu bogdan-iancu added this to the 2.3 milestone Apr 20, 2016
@bogdan-iancu
Copy link
Member

I suspect this crash is actually a side effect of #855 - the bogus dialog ref counting happened in some really weird signaling scenarios with large delay, multiple final replies, etc. A dead DB may easily lead to such signaling scenarios and finally trigger the ref counting bug.
The backtrace is 100% consistent with all the other crashes due wrong ref counting.
I would suggest to close this and IF it will happen again in the future , we can simply reopen.

@bogdan-iancu bogdan-iancu self-assigned this May 23, 2016
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

2 participants