Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ncnn模型在预处理copy_make_border时出错,使用NCNN_ARM82预编译指令修改无效。 #4891

Closed
jdskddd opened this issue Jul 29, 2023 · 0 comments

Comments

@jdskddd
Copy link

jdskddd commented Jul 29, 2023

nihui,你好:
我在使用ncnn并将其部署到android过程中时,发现其编译正常,但在运行到copy_make_border时,会报错,报错时堆栈信息为:
ncnn-1
此时其具体的堆栈信息为:

ncnn::copy_make_border(ncnn::Mat const&, ncnn::Mat&, int, int, int, int, int, float, ncnn::Option const&):
sub sp, sp, #0x90
str d8, [sp, #0x20]
stp x29, x30, [sp, #0x30]
add x29, sp, #0x30
stp x28, x27, [sp, #0x40]
stp x26, x25, [sp, #0x50]
stp x24, x23, [sp, #0x60]
stp x22, x21, [sp, #0x70]
stp x20, x19, [sp, #0x80]
mrs x28, TPIDR_EL0
mov x22, x0
ldr x8, [x28, #0x28]
mov w0, #0x2b
mov x19, x7
fmov s8, s0
mov w23, w6
mov w24, w5
mov w25, w4
mov w26, w3
mov w27, w2
mov x21, x1
str x8, [sp, #0x18]
bl 0x3989e8 ; ncnn::create_layer(int)
mov x20, x0
add x0, sp, #0x8
bl 0x3b039c ; ncnn::ParamDict::ParamDict()
add x0, sp, #0x8
mov w1, wzr
mov w2, w27
bl 0x3b1294 ; ncnn::ParamDict::set(int, int)
add x0, sp, #0x8
mov w1, #0x1
mov w2, w26
bl 0x3b1294 ; ncnn::ParamDict::set(int, int)
add x0, sp, #0x8
mov w1, #0x2
mov w2, w25
bl 0x3b1294 ; ncnn::ParamDict::set(int, int)
add x0, sp, #0x8
mov w1, #0x3
mov w2, w24
bl 0x3b1294 ; ncnn::ParamDict::set(int, int)
add x0, sp, #0x8
mov w1, #0x4
mov w2, w23
bl 0x3b1294 ; ncnn::ParamDict::set(int, int)
fmov s0, s8
add x0, sp, #0x8
mov w1, #0x5
bl 0x3b12ac ; ncnn::ParamDict::set(int, float)
ldr x8, [x20]
add x1, sp, #0x8
mov x0, x20
ldr x8, [x8, #0x10]
blr x8
ldr x8, [x20]
mov x0, x20
mov x1, x19
ldr x8, [x8, #0x20]
blr x8
ldr x8, [x20]
mov x0, x20
mov x1, x22
mov x2, x21
mov x3, x19
ldr x8, [x8, #0x38]
blr x8
ldr x8, [x20]
mov x0, x20
mov x1, x19
ldr x8, [x8, #0x28]
blr x8
ldr x8, [x20]
mov x0, x20
ldr x8, [x8, #0x8]
blr x8
add x0, sp, #0x8
bl 0x3b09b0 ; ncnn::ParamDict::~ParamDict()
ldr x8, [x28, #0x28]
ldr x9, [sp, #0x18]
cmp x8, x9
b.ne 0x39d5a8 ; <+364>
ldp x20, x19, [sp, #0x80]
ldp x22, x21, [sp, #0x70]
ldp x24, x23, [sp, #0x60]
ldp x26, x25, [sp, #0x50]
ldp x28, x27, [sp, #0x40]
ldp x29, x30, [sp, #0x30]
ldr d8, [sp, #0x20]
add sp, sp, #0x90
ret
bl 0x98a0e0 ; symbol stub for: __stack_chk_fail

ncnn::cast_float32_to_float16(ncnn::Mat const&, ncnn::Mat&, ncnn::Option const&):
sub sp, sp, #0x60
stp x29, x30, [sp, #0x20]
add x29, sp, #0x20
str x23, [sp, #0x30]
stp x22, x21, [sp, #0x40]
stp x20, x19, [sp, #0x50]
mrs x23, TPIDR_EL0
mov x21, x0
ldr x8, [x23, #0x28]
mov w0, #0x40
mov x19, x2
mov x20, x1
stur x8, [x29, #-0x8]
bl 0x3989e8 ; ncnn::create_layer(int)
mov x22, x0
add x0, sp, #0x8
bl 0x3b039c ; ncnn::ParamDict::ParamDict()
add x0, sp, #0x8
mov w1, wzr
mov w2, #0x1
bl 0x3b1294 ; ncnn::ParamDict::set(int, int)
add x0, sp, #0x8
mov w1, #0x1
mov w2, #0x2
bl 0x3b1294 ; ncnn::ParamDict::set(int, int)
ldr x8, [x22]
add x1, sp, #0x8
mov x0, x22
ldr x8, [x8, #0x10]
blr x8
ldr x8, [x22]
mov x0, x22
mov x1, x19
ldr x8, [x8, #0x20]
blr x8
ldr x8, [x22]
mov x0, x22
mov x1, x21
mov x2, x20
mov x3, x19
ldr x8, [x8, #0x38]
blr x8
ldr x8, [x22]
mov x0, x22
mov x1, x19
ldr x8, [x8, #0x28]
blr x8
ldr x8, [x22]
mov x0, x22
ldr x8, [x8, #0x8]
blr x8
add x0, sp, #0x8
bl 0x3b09b0 ; ncnn::ParamDict::~ParamDict()
ldr x8, [x23, #0x28]
ldur x9, [x29, #-0x8]
cmp x8, x9
b.ne 0x39e1e0 ; <+252>
ldp x20, x19, [sp, #0x50]
ldp x22, x21, [sp, #0x40]
ldp x29, x30, [sp, #0x20]
ldr x23, [sp, #0x30]
add sp, sp, #0x60
ret
bl 0x98a0e0 ; symbol stub for: __stack_chk_fail

ncnn::Padding_arm::create_pipeline(ncnn::Option const&):
stp x29, x30, [sp, #-0x20]!
stp x20, x19, [sp, #0x10]
mov x29, sp
ldr x8, [x0]
mov x20, x0
ldrb w10, [x1, #0x42]
mov x19, x1
ldur x8, [x8, #-0x18]
add x8, x0, x8
ldrb w9, [x8, #0xd]
cmp w9, #0x0
ccmp w10, #0x0, #0x4, ne
b.eq 0x6554d8 ; <+92>
ldr s0, [x8, #0xdc]
bl 0x39d3cc ; ncnn::float32_to_float16(float)
ldr x8, [x20]
strh w0, [x20, #0x58]
add x1, x20, #0x60
mov x2, x19
ldur x8, [x8, #-0x18]
add x8, x20, x8
add x0, x8, #0xf0
bl 0x39e0e4 ; ncnn::cast_float32_to_float16(ncnn::Mat const&, ncnn::Mat&, ncnn::Option const&)
ldrb w8, [x19, #0x40]
cbz w8, 0x65550c ; <+144>
ldr x8, [x20]
add x1, x20, #0x10
mov x2, x19
ldur x9, [x8, #-0x18]
add x9, x20, x9
ldrh w9, [x9, #0xde]
strh w9, [x20, #0x8]
ldur x8, [x8, #-0x18]
add x8, x20, x8
add x0, x8, #0xf0
bl 0x39e3e4 ; ncnn::cast_float32_to_bfloat16(ncnn::Mat const&, ncnn::Mat&, ncnn::Option const&)
ldp x20, x19, [sp, #0x10]
mov w0, wzr
ldp x29, x30, [sp], #0x20
ret

ncnn::Cast_arm::forward(ncnn::Mat const&, ncnn::Mat&, ncnn::Option const&) const:
sub sp, sp, #0x80
stp x29, x30, [sp, #0x20]
add x29, sp, #0x20
stp x28, x27, [sp, #0x30]
stp x26, x25, [sp, #0x40]
stp x24, x23, [sp, #0x50]
stp x22, x21, [sp, #0x60]
stp x20, x19, [sp, #0x70]
mrs x22, TPIDR_EL0
mov x23, x0
adrp x0, 719
ldr x8, [x22, #0x28]
add x0, x0, #0xcc0
mov x25, x3
mov x19, x2
mov x20, x1
stur x8, [x29, #-0x8]
bl 0x98ae20 ; symbol stub for: __kmpc_global_thread_num
ldr x8, [x23]
ldur x8, [x8, #-0x18]
add x8, x23, x8
ldp w9, w10, [x8, #0xc8]
cmp w9, w10
b.ne 0x6e53f4 ; <+184>
cmp x19, x20
b.eq 0x6e58e0 ; <+1444>
ldr x8, [x20, #0x8]
cbz x8, 0x6e53bc ; <+128>
ldaxr w9, [x8]
add w9, w9, #0x1
stlxr w10, w9, [x8]
cbnz w10, 0x6e53ac ; <+112>
ldr x8, [x19, #0x8]
cbz x8, 0x6e57b4 ; <+1144>
ldaxr w9, [x8]
subs w9, w9, #0x1
stlxr w10, w9, [x8]
cbnz w10, 0x6e53c4 ; <+136>
b.ne 0x6e57b4 ; <+1144>
ldr x0, [x19, #0x20]
ldr x1, [x19]
cbz x0, 0x6e57a8 ; <+1132>
ldr x8, [x0]
ldr x8, [x8, #0x18]
blr x8
b 0x6e57b4 ; <+1144>
ldp w24, w11, [x20, #0x30]
ldp w21, w26, [x20, #0x28]
ldr w28, [x20, #0x38]
sub w10, w10, #0x1
ldrsw x27, [x20, #0x18]
cmp w10, #0x3
stp w0, w11, [sp]
str w28, [sp, #0xc]
b.hi 0x6e5498 ; <+348>
adrp x11, -1060
add x11, x11, #0xc94
adr x12, #0x10 ; <+244>
ldrb w13, [x11, x10]
add x12, x12, x13, lsl #2
br x12
lsl w8, w27, #1
sxtw x8, w8
sub w9, w21, #0x1
mov x21, x25
cmp w9, #0x3
b.ls 0x6e54ac ; <+368>
b 0x6e5540 ; <+516>
cmp w9, #0x3
b.ne 0x6e5468 ; <+300>
mov x0, x8
mov x1, x20
mov x2, x19
mov x3, x25
bl 0x6e45b8 ; ncnn::Cast::forward(ncnn::Mat const&, ncnn::Mat&, ncnn::Option const&) const
lsl x8, x27, #2
sub w9, w21, #0x1
mov x21, x25
cmp w9, #0x3
b.ls 0x6e54ac ; <+368>
b 0x6e5540 ; <+516>
mov x8, x27
sub w9, w21, #0x1
mov x21, x25
cmp w9, #0x3
b.ls 0x6e54ac ; <+368>
b 0x6e5540 ; <+516>
ldr x8, [x20, #0x10]
sub w9, w21, #0x1
mov x21, x25
cmp w9, #0x3
b.hi 0x6e5540 ; <+516>
adrp x10, -1060
add x10, x10, #0xc98
adr x11, #0x10 ; <+392>
ldrb w12, [x10, x9]
add x11, x11, x12, lsl #2
br x11
ldr x4, [x21, #0x8]
mov x0, x19
mov w1, w26
mov x2, x8
mov w3, w27
bl 0x398d10 ; ncnn::Mat::create(int, unsigned long, int, ncnn::Allocator*)
ldr x8, [x19]
mov w0, #-0x64
cbnz x8, 0x6e554c ; <+528>
b 0x6e58e4 ; <+1448>
ldr x6, [x21, #0x8]
mov x0, x19
mov w1, w26
mov w2, w24
mov w3, w28
mov x4, x8
mov w5, w27
bl 0x399024 ; ncnn::Mat::create(int, int, int, unsigned long, int, ncnn::Allocator*)
ldr x8, [x19]
mov w0, #-0x64
cbnz x8, 0x6e554c ; <+528>
b 0x6e58e4 ; <+1448>
ldr x7, [x21, #0x8]
mov x0, x19
mov w1, w26
mov w2, w24
ldr w3, [sp, #0x4]
mov w4, w28
mov x5, x8
mov w6, w27
bl 0x3991e8 ; ncnn::Mat::create(int, int, int, int, unsigned long, int, ncnn::Allocator*)
ldr x8, [x19]
mov w0, #-0x64
cbz x8, 0x6e58e4 ; <+1448>
ldr x8, [x19, #0x40]
ldrsw x9, [x19, #0x38]
mul x8, x8, x9
cbz x8, 0x6e58e4 ; <+1448>
ldr x8, [x23]
mul w9, w24, w26
ldr w10, [sp, #0x4]
ldur x8, [x8, #-0x18]
mul w9, w9, w10
add x10, x23, x8
mul w11, w9, w27
ldr w9, [x10, #0xc8]
str w11, [sp, #0x8]
cmp w9, #0x1
b.ne 0x6e5620 ; <+740>
ldr w9, [x10, #0xcc]
ldr w25, [sp]
cmp w9, #0x2
b.ne 0x6e5714 ; <+984>
ldp w8, w9, [x20, #0x2c]
ldr w11, [x20, #0x18]
adrp x24, 719
add x24, x24, #0xcc0
ldr w2, [x21, #0x4]
mov x0, x24
mov w1, w25
mul w8, w9, w8
ldp w9, w10, [x20, #0x34]
mul w8, w8, w9
stur w10, [x29, #-0xc]
mul w8, w8, w11
str w8, [sp, #0x10]
bl 0x98ae40 ; symbol stub for: __kmpc_push_num_threads
adrp x2, 0
sub x3, x29, #0xc
add x2, x2, #0xc98
add x6, sp, #0x10
mov x0, x24
mov w1, #0x4
mov x4, x20
mov x5, x19
bl 0x98ae50 ; symbol stub for: __kmpc_fork_call
ldr x8, [x23]
ldur x8, [x8, #-0x18]
add x9, x23, x8
ldr w9, [x9, #0xc8]
cmp w9, #0x2
b.eq 0x6e562c ; <+752>
cmp w9, #0x3
b.eq 0x6e56b0 ; <+884>
cmp w9, #0x1
b.eq 0x6e5714 ; <+984>
b 0x6e573c ; <+1024>
ldr w25, [sp]
cmp w9, #0x2
b.ne 0x6e560c ; <+720>
add x8, x23, x8
ldr w8, [x8, #0xcc]
cmp w8, #0x1
b.ne 0x6e573c ; <+1024>
ldp w8, w9, [x20, #0x2c]
ldr w11, [x20, #0x18]
adrp x24, 719
add x24, x24, #0xcc0
ldr w2, [x21, #0x4]
mov x0, x24
mov w1, w25
mul w8, w9, w8
ldp w9, w10, [x20, #0x34]
mul w8, w8, w9
stur w10, [x29, #-0xc]
mul w8, w8, w11
str w8, [sp, #0x10]
bl 0x98ae40 ; symbol stub for: __kmpc_push_num_threads
adrp x2, 0
sub x3, x29, #0xc
add x2, x2, #0xe70
add x6, sp, #0x10
mov x0, x24
mov w1, #0x4
mov x4, x20
mov x5, x19
bl 0x98ae50 ; symbol stub for: __kmpc_fork_call
ldr x8, [x23]
ldur x8, [x8, #-0x18]
add x9, x23, x8
ldr w9, [x9, #0xc8]
cmp w9, #0x3
b.ne 0x6e5614 ; <+728>
add x8, x23, x8
ldr w8, [x8, #0xcc]
cmp w8, #0x1
b.ne 0x6e573c ; <+1024>
adrp x24, 719
ldr w2, [x21, #0x4]
add x24, x24, #0xcc0
mov w1, w25
mov x0, x24
bl 0x98ae40 ; symbol stub for: __kmpc_push_num_threads
adrp x2, 0
add x3, sp, #0xc
add x2, x2, #0x9f8
add x6, sp, #0x8
mov x0, x24
mov w1, #0x4
mov x4, x20
mov x5, x19
bl 0x98ae50 ; symbol stub for: __kmpc_fork_call
ldr x8, [x23]
ldur x8, [x8, #-0x18]
add x9, x23, x8
ldr w9, [x9, #0xc8]
cmp w9, #0x1
b.ne 0x6e573c ; <+1024>
add x8, x23, x8
ldr w8, [x8, #0xcc]
cmp w8, #0x4
b.ne 0x6e573c ; <+1024>
bl 0x3a10cc ; ncnn::cpu_support_arm_bf16()
cbz w0, 0x6e586c ; <+1328>
mov x0, x20
mov x1, x19
mov x2, x21
bl 0x6e7ad4 ; ncnn::cast_fp32_to_bf16_neon_bf16(ncnn::Mat const&, ncnn::Mat&, ncnn::Option const&)
ldr x8, [x23]
ldur x8, [x8, #-0x18]
add x8, x23, x8
ldr w9, [x8, #0xc8]
cmp w9, #0x4
b.ne 0x6e58e0 ; <+1444>
ldr w8, [x8, #0xcc]
cmp w8, #0x1
b.ne 0x6e58e0 ; <+1444>
bl 0x3a10cc ; ncnn::cpu_support_arm_bf16()
cbz w0, 0x6e580c ; <+1232>
mov x0, x20
mov x1, x19
mov x2, x21
bl 0x6e7d6c ; ncnn::cast_bf16_to_fp32_neon_bf16(ncnn::Mat const&, ncnn::Mat&, ncnn::Option const&)
b 0x6e58e0 ; <+1444>
ldr x5, [x21, #0x8]
mov x0, x19
mov w1, w26
mov w2, w24
mov x3, x8
mov w4, w27
bl 0x398e8c ; ncnn::Mat::create(int, int, unsigned long, int, ncnn::Allocator*)
ldr x8, [x19]
mov w0, #-0x64
cbnz x8, 0x6e554c ; <+528>
b 0x6e58e4 ; <+1448>
cbz x1, 0x6e57b4 ; <+1144>
mov x0, x1
bl 0x98a120 ; symbol stub for: free
str xzr, [x19, #0x40]
mov w0, wzr
stp xzr, xzr, [x19, #0x8]
str xzr, [x19]
str wzr, [x19, #0x18]
str wzr, [x19, #0x38]
stp xzr, xzr, [x19, #0x28]
ldr q0, [x20]
str q0, [x19]
ldr x8, [x20, #0x10]
str x8, [x19, #0x10]
ldr w8, [x20, #0x18]
str w8, [x19, #0x18]
ldr x8, [x20, #0x20]
str x8, [x19, #0x20]
ldur q0, [x20, #0x28]
stur q0, [x19, #0x28]
ldr w8, [x20, #0x38]
str w8, [x19, #0x38]
ldr x8, [x20, #0x40]
str x8, [x19, #0x40]
b 0x6e58e4 ; <+1448>
ldp w8, w9, [x20, #0x2c]
ldr w11, [x20, #0x18]
mov w1, w25
ldr w2, [x21, #0x4]
adrp x21, 719
add x21, x21, #0xcc0
mul w8, w9, w8
mov x0, x21
ldp w9, w10, [x20, #0x34]
mul w8, w8, w9
stur w10, [x29, #-0xc]
mul w8, w8, w11
str w8, [sp, #0x10]
bl 0x98ae40 ; symbol stub for: __kmpc_push_num_threads
adrp x2, 1
sub x3, x29, #0xc
add x2, x2, #0x298
add x6, sp, #0x10
mov x0, x21
mov w1, #0x4
mov x4, x20
mov x5, x19
bl 0x98ae50 ; symbol stub for: __kmpc_fork_call
b 0x6e58e0 ; <+1444>
ldp w8, w9, [x20, #0x2c]
ldr w11, [x20, #0x18]
adrp x24, 719
add x24, x24, #0xcc0
ldr w2, [x21, #0x4]
mov x0, x24
mov w1, w25
mul w8, w9, w8
ldp w9, w10, [x20, #0x34]
mul w8, w8, w9
stur w10, [x29, #-0xc]
mul w8, w8, w11
str w8, [sp, #0x10]
bl 0x98ae40 ; symbol stub for: __kmpc_push_num_threads
adrp x2, 1
sub x3, x29, #0xc
add x2, x2, #0x48
add x6, sp, #0x10
mov x0, x24
mov w1, #0x4
mov x4, x20
mov x5, x19
bl 0x98ae50 ; symbol stub for: __kmpc_fork_call
ldr x8, [x23]
ldur x8, [x8, #-0x18]
add x8, x23, x8
ldr w9, [x8, #0xc8]
cmp w9, #0x4
b.eq 0x6e5754 ; <+1048>
mov w0, wzr
ldr x8, [x22, #0x28]
ldur x9, [x29, #-0x8]
cmp x8, x9
b.ne 0x6e5914 ; <+1496>
ldp x20, x19, [sp, #0x70]
ldp x22, x21, [sp, #0x60]
ldp x24, x23, [sp, #0x50]
ldp x26, x25, [sp, #0x40]
ldp x28, x27, [sp, #0x30]
ldp x29, x30, [sp, #0x20]
add sp, sp, #0x80
ret
bl 0x98a0e0 ; symbol stub for: __stack_chk_fail

__kmp_env_get:
sub sp, sp, #0x50
stp x29, x30, [sp, #0x30]
stp x20, x19, [sp, #0x40]
add x29, sp, #0x30
bl 0x9904b0 ; symbol stub for: getenv
cbz x0, 0x92f324 ; <+60>
mov x19, x0
bl 0x98aa20 ; symbol stub for: strlen
add x20, x0, #0x1
mov x0, x20
bl 0x98a810 ; symbol stub for: malloc
cbz x0, 0x92f334 ; <+76>
mov x1, x19
mov x2, x20
bl 0x9907d0 ; symbol stub for: strncpy
ldp x20, x19, [sp, #0x40]
ldp x29, x30, [sp, #0x30]
add sp, sp, #0x50
ret
mov w0, #0x6e
add x8, sp, #0x18
movk w0, #0x4, lsl #16
bl 0x990640 ; symbol stub for: __kmp_msg_format
adrp x8, 151
ldr x8, [x8, #0xcd8]
add x0, sp, #0x18
mov x1, sp
ldr q0, [x8]
ldr x8, [x8, #0x10]
str q0, [sp]
str x8, [sp, #0x10]
bl 0x990890 ; symbol stub for: __kmp_fatal

__kmp_get_global_thread_id_reg:
stp x29, x30, [sp, #-0x20]!
stp x20, x19, [sp, #0x10]
adrp x20, 151
ldr x20, [x20, #0xd30]
mov x29, sp
ldr w8, [x20]
cbz w8, 0x92fd18 ; <+132>
adrp x8, 151
ldr x8, [x8, #0xe30]
ldr w8, [x8]
cmp w8, #0x3
b.lt 0x92fcec ; <+88>
adrp x0, 151
ldr x0, [x0, #0xe38]
bl 0x989e58 ; __emutls_get_address at emutls.c:173
ldr w19, [x0]
cmn w19, #0x2
b.eq 0x92fd18 ; <+132>
mov w0, w19
ldp x20, x19, [sp, #0x10]
ldp x29, x30, [sp], #0x20
ret
cmp w8, #0x2
b.ne 0x92fd08 ; <+116>
bl 0x990ae0 ; symbol stub for: __kmp_gtid_get_specific
mov w19, w0
cmn w19, #0x2
b.ne 0x92fcdc ; <+72>
b 0x92fd18 ; <+132>
bl 0x9907e0 ; symbol stub for: __kmp_get_global_thread_id
mov w19, w0
cmn w19, #0x2
b.ne 0x92fcdc ; <+72>
adrp x0, 151
ldr x0, [x0, #0xe50]
mov w1, #-0x2
bl 0x990970 ; symbol stub for: __kmp_acquire_ticket_lock
ldr w8, [x20]
cbz w8, 0x92fd3c ; <+168>
mov w0, wzr
bl 0x990b00 ; symbol stub for: __kmp_register_root
b 0x92fd44 ; <+176>
bl 0x92fd68 ; __kmp_do_serial_initialize()
bl 0x990ae0 ; symbol stub for: __kmp_gtid_get_specific
mov w19, w0
adrp x0, 151
ldr x0, [x0, #0xe50]
mov w1, #-0x2
bl 0x990980 ; symbol stub for: __kmp_release_ticket_lock
mov w0, w19
ldp x20, x19, [sp, #0x10]
ldp x29, x30, [sp], #0x20
ret

__kmp_do_serial_initialize():
sub sp, sp, #0x80
stp x29, x30, [sp, #0x50]
str x21, [sp, #0x60]
stp x20, x19, [sp, #0x70]
add x29, sp, #0x50
bl 0x990b10 ; symbol stub for: ompt_pre_init
bl 0x990b20 ; symbol stub for: __kmp_validate_locks
bl 0x990b30 ; symbol stub for: __kmp_init_memkind
bl 0x990b40 ; symbol stub for: __kmp_register_library_startup()
adrp x19, 151
ldr x19, [x19, #0xe58]
adrp x0, 151
ldr wzr, [x19, #0x44]
str wzr, [x19, #0x40]
str wzr, [x19, #0x44]
ldr x0, [x0, #0xe60]
bl 0x990b50 ; symbol stub for: __kmp_init_ticket_lock
adrp x0, 151
ldr x0, [x0, #0xe68]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xe70]
bl 0x990b50 ; symbol stub for: __kmp_init_ticket_lock
adrp x0, 151
ldr x0, [x0, #0xe78]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xe80]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xe88]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xe90]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xe98]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xea0]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xea8]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xeb0]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xeb8]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xec0]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xec8]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xed0]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xed8]
bl 0x990b60 ; symbol stub for: __kmp_init_queuing_lock
adrp x0, 151
ldr x0, [x0, #0xee0]
bl 0x990b50 ; symbol stub for: __kmp_init_ticket_lock
adrp x0, 151
ldr x0, [x0, #0xee8]
bl 0x990b50 ; symbol stub for: __kmp_init_ticket_lock
adrp x0, 151
ldr x0, [x0, #0xef0]
bl 0x990b50 ; symbol stub for: __kmp_init_ticket_lock
bl 0x990b70 ; symbol stub for: __kmp_runtime_initialize
adrp x9, 151
adrp x8, 151
adrp x10, 151
adrp x20, 151
adrp x21, 151
adrp x11, 151
adrp x12, 151
adrp x13, 151
ldr x9, [x9, #0xef8]
ldr x8, [x8, #0xd88]
ldr x10, [x10, #0xf00]
ldr x20, [x20, #0xf08]
ldr x21, [x21, #0xf10]
ldr x11, [x11, #0xf18]
ldr x12, [x12, #0xf20]
ldr x13, [x13, #0xf28]
adrp x15, 151
ldr x15, [x15, #0xf30]
mov w14, #0xc8
str w14, [x13]
mov w13, #0x3
adrp x14, 151
ldr x14, [x14, #0xf38]
str w13, [x15]
adrp x15, 151
ldr x15, [x15, #0xf40]
mov w13, #0x29
str w13, [x14]
adrp x13, 151
adrp x14, 151
ldr w15, [x15]
ldr x13, [x13, #0xf48]
ldr x14, [x14, #0xf50]
mov x0, xzr
stp w15, w15, [x14]
mov w15, #0x1
str w15, [x14, #0x8]
adrp x14, 151
ldr x14, [x14, #0xf58]
ldr w13, [x13]
str w15, [x14]
adrp x14, 151
ldr x14, [x14, #0xf60]
str w15, [x14, #0x8]
adrp x15, 151
ldr x15, [x15, #0xf68]
stp w13, w13, [x14]
adrp x13, 151
adrp x14, 151
ldr x13, [x13, #0xf70]
ldr x14, [x14, #0xf78]
str wzr, [x9]
mov w9, #0x2
str w9, [x13, #0x8]
str w9, [x14, #0x8]
adrp x9, 151
ldr x9, [x9, #0xf80]
str wzr, [x9]
adrp x9, 151
ldr x9, [x9, #0xf88]
ldr w8, [x8]
ldr w10, [x10]
ldr w15, [x15]
ldr w9, [x9]
cmp w8, #0x1
str w10, [x21]
str w10, [x11]
stp w9, w9, [x14]
csinc w9, w8, wzr, gt
cmp w9, w10
csel w9, w10, w9, gt
cmp w8, w10
csel w8, w10, w8, gt
stp w15, w15, [x13]
str w9, [x20]
str w8, [x12]
str xzr, [x19, #0x48]
bl 0x990b80 ; symbol stub for: __kmp_env_initialize(char const*)
ldr w0, [x20]
bl 0x990b90 ; symbol stub for: __kmp_initial_threads_capacity(int)
adrp x9, 151
ldr w8, [x20]
ldr w1, [x21]
ldr x9, [x9, #0xf90]
adrp x19, 151
ldr w2, [x9]
ldr x19, [x19, #0xe40]
str w0, [x19]
mov w0, w8
bl 0x990ba0 ; symbol stub for: __kmp_default_tp_capacity(int, int, int)
adrp x8, 151
adrp x9, 151
adrp x10, 151
ldr x8, [x8, #0xf98]
ldr x9, [x9, #0xfa0]
b 0x989fec ; __CortexA53843419_930004
ldr w11, [x19]
str w0, [x8]
str xzr, [x9]
str xzr, [x10]
adrp x8, 150
ldr x8, [x8, #0xfb0]
str xzr, [x8]
lsl w8, w11, #4
add w8, w8, #0x40
sxtw x0, w8
bl 0x9909b0 ; symbol stub for: ___kmp_allocate
adrp x20, 150
ldr x20, [x20, #0xc90]
ldrsw x8, [x19]
adrp x21, 150
adrp x9, 150
adrp x10, 150
ldr x21, [x21, #0xfb8]
ldr x9, [x9, #0xc88]
ldr x10, [x10, #0xd78]
str x0, [x20]
add x8, x0, x8, lsl #3
mov w0, #0x1
str x8, [x21]
str wzr, [x9]
str wzr, [x10]
bl 0x990b00 ; symbol stub for: __kmp_register_root
mov w19, w0
tbnz w0, #0x1f, 0x9300a4 ; <+828>
ldr x8, [x21]
ldr x8, [x8, w19, uxtw #3]
cbz x8, 0x9300a4 ; <+828>
ldr x9, [x20]
mov w10, w19
ldr x9, [x9, x10, lsl #3]
cbz x9, 0x9300a4 ; <+828>
ldr x8, [x8, #0x18]
cmp x9, x8
b.eq 0x9300bc ; <+852>
adrp x0, -2116
adrp x1, -2085
add x0, x0, #0xa1a
add x1, x1, #0x988
mov w2, #0x1a26
bl 0x9905e0 ; symbol stub for: __kmp_debug_assert
cbz w19, 0x9300d8 ; <+880>
adrp x0, -2116
adrp x1, -2085
add x0, x0, #0xa1a
add x1, x1, #0x988
mov w2, #0x1a27
bl 0x9905e0 ; symbol stub for: __kmp_debug_assert
dmb ish
bl 0x990bb0 ; symbol stub for: __kmp_common_initialize
bl 0x990bc0 ; symbol stub for: __kmp_register_atfork
adrp x0, 150
ldr x0, [x0, #0xfc0]
bl 0x34ebf0 ; atexit
cbnz w0, 0x930170 ; <+1032>
bl 0x990bd0 ; symbol stub for: __kmp_install_signals
adrp x8, 150
ldr x8, [x8, #0xfc8]
adrp x10, 150
mov w11, #0x1
ldr w9, [x8]
ldr x10, [x10, #0xd30]
add w9, w9, #0x1
str w9, [x8]
str w11, [x10]
adrp x8, 150
ldr x8, [x8, #0xfd0]
ldr w8, [x8]
cbz w8, 0x930130 ; <+968>
bl 0x990be0 ; symbol stub for: __kmp_env_print()
adrp x8, 150
adrp x9, 150
ldr x8, [x8, #0xfd8]
ldr x9, [x9, #0xfe0]
ldr w8, [x8]
ldr w9, [x9]
orr w8, w9, w8
cbz w8, 0x930154 ; <+1004>
bl 0x990bf0 ; symbol stub for: __kmp_env_print_2()
bl 0x990c00 ; symbol stub for: ompt_post_init
dmb ish
ldp x20, x19, [sp, #0x70]
ldr x21, [sp, #0x60]
ldp x29, x30, [sp, #0x50]
add sp, sp, #0x80
ret
mov w19, w0
adrp x1, -2079
mov w0, #0xb2
add x1, x1, #0xa16
sub x8, x29, #0x18
movk w0, #0x4, lsl #16
bl 0x990640 ; symbol stub for: __kmp_msg_format
add x8, sp, #0x20
mov w0, w19
bl 0x990a10 ; symbol stub for: __kmp_msg_error_code
adrp x8, 150
ldr x8, [x8, #0xcd8]
sub x0, x29, #0x18
add x1, sp, #0x20
mov x2, sp
ldr q0, [x8]
ldr x8, [x8, #0x10]
str q0, [sp]
str x8, [sp, #0x10]
bl 0x990890 ; symbol stub for: __kmp_fatal

__kmp_abort_thread:
stp x29, x30, [sp, #-0x10]!
mov x29, sp
bl 0x990c90 ; symbol stub for: __kmp_infinite_loop

__kmp_serial_initialize:
stp x29, x30, [sp, #-0x20]!
str x19, [sp, #0x10]
adrp x19, 143
ldr x19, [x19, #0xd30]
mov x29, sp
ldr w8, [x19]
cbz w8, 0x937444 ; <+40>
ldr x19, [sp, #0x10]
ldp x29, x30, [sp], #0x20
ret
adrp x0, 143
ldr x0, [x0, #0xe50]
mov w1, #-0x2
bl 0x990970 ; symbol stub for: __kmp_acquire_ticket_lock
ldr w8, [x19]
cbnz w8, 0x937460 ; <+68>
bl 0x92fd68 ; __kmp_do_serial_initialize()
adrp x0, 143
ldr x0, [x0, #0xe50]
ldr x19, [sp, #0x10]
mov w1, #-0x2
ldp x29, x30, [sp], #0x20
b 0x990980 ; symbol stub for: __kmp_release_ticket_lock

abort:
sub sp, sp, #0xd0
str x21, [sp, #0xa0]
stp x20, x19, [sp, #0xb0]
stp x29, x30, [sp, #0xc0]
add x29, sp, #0xc0
sub x0, x29, #0x18
bl 0xe4e50 ; symbol stub for: sigfillset64
sub x0, x29, #0x18
orr w1, wzr, #0x6
bl 0xe4a10 ; symbol stub for: sigdelset64
sub x1, x29, #0x18
orr w0, wzr, #0x2
mov x2, xzr
bl 0xe4e60 ; symbol stub for: sigprocmask64
mov w0, #0xac
bl 0xe4bb0 ; symbol stub for: syscall
mov x19, x0
mov w0, #0xb2
bl 0xe4bb0 ; symbol stub for: syscall
mov x20, x0
movi v0.2d, #0000000000000000
mov w21, #-0x1
stp q0, q0, [sp, #0x20]
stp q0, q0, [sp, #0x80]
stp q0, q0, [sp, #0x60]
stp q0, q0, [sp, #0x40]
str w21, [sp, #0x28]
str w19, [sp, #0x30]
bl 0xe4a90 ; symbol stub for: getuid
mov w8, w0
str xzr, [sp, #0x38]
sxtw x0, w19
sxtw x1, w20
add x3, sp, #0x20
adrp x9, -93
add x9, x9, #0x4e0
ldp q1, q0, [x9]
str w8, [sp, #0x34]
orr w2, wzr, #0x6
orr w8, wzr, #0xf0
svc #0
stp q1, q0, [sp]
mov x1, sp
orr w0, wzr, #0x6
mov x2, xzr
bl 0xe4e70 ; symbol stub for: sigaction64
sub x1, x29, #0x18
orr w0, wzr, #0x2
mov x2, xzr
bl 0xe4e60 ; symbol stub for: sigprocmask64
mov w0, #0xac
bl 0xe4bb0 ; symbol stub for: syscall
mov x19, x0
mov w0, #0xb2
bl 0xe4bb0 ; symbol stub for: syscall
mov x20, x0
movi v0.2d, #0000000000000000
stp q0, q0, [sp, #0x20]
stp q0, q0, [sp, #0x80]
stp q0, q0, [sp, #0x60]
stp q0, q0, [sp, #0x40]
str w21, [sp, #0x28]
str w19, [sp, #0x30]
bl 0xe4a90 ; symbol stub for: getuid
mov w8, w0
str xzr, [sp, #0x38]
sxtw x0, w19
sxtw x1, w20
add x3, sp, #0x20
str w8, [sp, #0x34]
orr w2, wzr, #0x6
orr w8, wzr, #0xf0
svc #0
orr w0, wzr, #0x7f
bl 0xe4e80 ; symbol stub for: _exit

我检查后认为,其可能是因为不支持arm82,故在编译时在cmakelist中增加了:
option(NCNN_ARM82 "optimize aarch64 platform with armv8.2 fp16" OFF)
此时编译语句为:
cd ncnn
mkdir -p build-android-armv7
cd build-android-armv7
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-28 -DNCNN_VULKAN=ON -DNCNN_ARM82=OFF ..
make -j16
make install

重新编译后,将其导入android项目,报错任然没有任何变化,为何我修改了ARM82编译命令后,其还是会进入cast_float32_to_float16这个函数?为何我的修改没有生效??

@jdskddd jdskddd closed this as completed Aug 2, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant