onednn_verbose,info,oneDNN v2.7.0 (commit 650085b2f3643aad05c629425983491d63b5c289) onednn_verbose,info,cpu,runtime:OpenMP,nthr:2 onednn_verbose,info,cpu,isa:AArch64 SVE (256 bits) onednn_verbose,info,gpu,runtime:none onednn_verbose,info,prim_template:operation,engine,primitive,implementation,prop_kind,memory_descriptors,attributes,auxiliary,problem_desc,exec_time onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.16089 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0681152 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0559082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.22607 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.51489 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.114014 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25684 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08105 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.49097 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25513 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10303 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0581055 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46094 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0910645 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23901 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.11401 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07007 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46997 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.26001 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07007 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10498 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0500488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.47388 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21094 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0708 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0581055 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0620117 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09985 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43896 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23999 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07715 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07983 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0571289 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0561523 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.4541 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.18188 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08911 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0500488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.45483 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.27393 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08179 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09302 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.48413 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0888672 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20483 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08716 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43799 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0891113 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25195 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08984 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08716 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0588379 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09985 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.42798 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0861816 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.19922 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06689 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.48584 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22705 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.458008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0151367 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0578613 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0500488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.50415 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24805 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06787 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06982 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44482 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24292 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.11304 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.42114 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20215 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0769 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0578613 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.45093 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24097 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09302 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.1001 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08984 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.47412 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20093 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0769 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0578613 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0458984 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09985 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.4458 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22998 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07715 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.060791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.47607 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0861816 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20801 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43701 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0861816 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.27197 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0769 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0500488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.48291 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.19995 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06592 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09302 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.45703 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23584 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09912 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0510254 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.50806 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22095 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46313 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24097 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.447021 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0148926 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07178 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.448 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23096 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0588379 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44092 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24097 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46899 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20508 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06787 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0581055 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.046875 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.47095 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0888672 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23901 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09692 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0588379 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41382 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.2229 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.42505 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22388 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0700684 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46484 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.16406 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06592 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06885 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.427 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23706 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0529785 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0708 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41406 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.078125 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.19702 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0708 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0461426 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09888 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.448 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0898438 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23901 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10205 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44409 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0888672 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20386 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0752 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06982 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0561523 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.4541 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23096 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.447021 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0170898 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06885 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08521 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.302 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23804 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07715 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.046875 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08105 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46094 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23193 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.49878 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0898438 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.19897 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07715 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0488281 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08105 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44092 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0898438 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20703 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0620117 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41382 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20703 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06494 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06787 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06689 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0629883 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.42505 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0900879 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.271 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09521 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.448 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.15698 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43604 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23511 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0500488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0769 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.39819 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.19995 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07007 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06201 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43506 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21509 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07983 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.046875 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0752 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43091 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20312 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06396 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06592 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44604 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0791016 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22388 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.450195 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0158691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06494 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06689 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0500488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08105 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46387 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21411 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0588379 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.37012 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.104004 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21899 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08618 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.060791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43896 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.16309 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0568848 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.39209 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0791016 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22998 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07715 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0500488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.38696 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0810547 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.19702 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06885 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0578613 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0500488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44897 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22192 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0620117 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09302 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.45996 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.1731 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46704 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22485 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07983 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.046875 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.39697 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.18311 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0708 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09912 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41113 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24097 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09692 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09814 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46899 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20898 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08081 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0488281 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44312 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22119 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.447021 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0158691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08911 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0708 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.046875 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44092 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0888672 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24487 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06201 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07983 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07007 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0581055 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41992 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.2229 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0578613 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41089 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0861816 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.18115 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06592 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09302 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41089 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22705 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08716 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44702 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.18994 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06885 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08911 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.48315 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.2251 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06885 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43213 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0861816 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.1731 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0708 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.45093 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22803 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07715 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0581055 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08984 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.4231 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.18506 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06592 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06909 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.427 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0791016 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21899 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08081 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.40503 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0849609 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.17603 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0471191 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0769 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43604 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0861816 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20508 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.444092 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0170898 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06714 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41895 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.229 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06982 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06787 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07007 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0588379 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44312 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20996 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0478516 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41699 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.19702 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43896 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0891113 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0588379 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07178 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43799 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22681 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06396 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08618 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0588379 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.45093 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23608 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0529785 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46118 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.10498 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.16309 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06714 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0649414 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43408 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0910645 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.2522 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09302 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0490723 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.42505 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0891113 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.19507 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.46704 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25488 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08716 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0480957 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08081 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.45508 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0900879 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.20703 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08521 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0551758 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.49902 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23486 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.450928 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0170898 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06982 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0649414 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.53101 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22217 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08081 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.51196 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.28613 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.1001 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0979 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0568848 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.49121 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23804 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0578613 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.51392 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.29785 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07715 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0568848 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10498 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.51392 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21509 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0568848 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08105 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.52295 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0561523 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07983 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.53711 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21411 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.48999 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0888672 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.2688 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10693 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08716 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0759277 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0561523 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.54712 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23486 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08008 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0559082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.50513 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0888672 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.26709 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08105 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08618 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0559082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.53979 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21094 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08911 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08716 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0561523 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09717 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.51196 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0898438 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22583 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.449951 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0180664 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08423 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09619 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0571289 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.56592 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25195 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0559082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08179 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.4978 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0891113 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25195 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08911 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0571289 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.50317 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0891113 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22412 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09888 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0629883 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10205 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.50806 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0861816 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.28198 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09888 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08887 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0559082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.55298 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0900879 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24097 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08081 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.1001 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.53101 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25806 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09717 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09888 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09595 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10498 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.53613 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22803 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08105 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09302 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0568848 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10083 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.53003 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.29321 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0979 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10498 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10205 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.55884 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0859375 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25513 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.1001 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10596 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.5188 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0898438 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.28491 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08911 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08179 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.56714 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24023 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09009 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0979 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0749512 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.53711 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0898438 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.26416 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.449951 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0178223 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08984 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0581055 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.52197 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.28198 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08521 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07715 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0620117 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08081 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.51611 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.102051 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25195 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09814 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09985 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10303 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0559082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10107 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.48389 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.22607 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08813 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0559082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10303 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.50903 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0888672 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.26294 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09888 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.54297 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10376 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.55103 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.29199 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09692 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.10107 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0551758 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08984 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.52393 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07007 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0720215 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.51782 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0871582 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.27808 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07983 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09814 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0918 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.5061 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0910645 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.2749 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07397 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08105 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.52002 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08496 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09692 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.50903 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.25195 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07983 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0551758 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.49292 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0888672 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.26392 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.446045 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0161133 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09497 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0581055 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09204 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.49414 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0881348 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.16406 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06714 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07886 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0588379 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0791 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.49902 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.143066 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24609 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06201 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.073 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06689 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0571289 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.5 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.142822 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23291 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06689 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0739746 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0568848 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.51489 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.14209 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.26196 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07104 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09106 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09302 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0598145 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0581055 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.0769 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.44604 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0869141 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.27002 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07788 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0610352 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0578613 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.45312 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.143066 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23999 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.1001 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0620117 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0539551 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08691 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.49512 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.138916 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.23218 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.09399 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07593 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0600586 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0559082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06885 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.5 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.133057 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.27197 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07617 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08398 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0751953 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0568848 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08594 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.39917 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.134033 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.21704 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07812 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08301 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.0688477 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0561523 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08911 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.43994 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.0878906 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.2312 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06714 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07007 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07495 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0688477 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.07202 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.41187 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.138916 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.24097 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08716 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.06616 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08203 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x64:12x64x32:12x32x32,0.059082 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:abc:f0 wei_f32::blocked:abc:f0 dst_f32::blocked:abc:f0,attr-scratchpad:user ,,12x32x32:12x32x64:12x32x64,0.0549316 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x768:32x768,1.08789 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x768:768x3072:32x3072,4.48193 onednn_verbose,exec,cpu,eltwise,jit:sve_256,forward_training,data_f32::blocked:abc:f0 diff_undef::undef::f0,attr-scratchpad:user ,alg:eltwise_gelu_erf alpha:0 beta:0,1x32x3072,0.14209 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,32x3072:3072x768:32x768,4.26294 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x768:1x768,0.452881 onednn_verbose,exec,cpu,matmul,gemm:jit,undef,src_f32::blocked:ab:f0 wei_f32::blocked:ba:f0 dst_f32::blocked:ab:f0,attr-scratchpad:user attr-post-ops:sum ,,1x768:768x2:1x2,0.0170898 --------------------------- ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls --------------------------- ------------ ------------ ------------ ------------ ------------ ------------ aten::slice 0.03% 82.000us 0.03% 89.000us 17.800us 5 aten::as_strided 0.09% 273.000us 0.09% 273.000us 1.011us 270 aten::unsqueeze 0.00% 9.000us 0.00% 11.000us 5.500us 2 aten::to 0.03% 87.000us 0.10% 310.000us 20.667us 15 aten::_to_copy 0.04% 135.000us 0.08% 238.000us 15.867us 15 aten::empty_strided 0.02% 52.000us 0.02% 52.000us 3.467us 15 aten::copy_ 0.52% 1.620ms 0.52% 1.620ms 11.825us 137 aten::rsub 0.00% 8.000us 0.01% 28.000us 28.000us 1 aten::sub 0.00% 13.000us 0.01% 20.000us 20.000us 1 aten::mul 0.00% 8.000us 0.00% 13.000us 13.000us 1 aten::embedding 0.01% 37.000us 0.05% 166.000us 55.333us 3 aten::reshape 0.11% 338.000us 0.14% 437.000us 8.569us 51 aten::_reshape_alias 0.03% 99.000us 0.03% 99.000us 1.941us 51 aten::index_select 0.02% 53.000us 0.03% 79.000us 26.333us 3 aten::empty 0.07% 213.000us 0.07% 213.000us 1.690us 126 aten::select 0.01% 29.000us 0.01% 32.000us 4.571us 7 aten::view 0.26% 814.000us 0.26% 814.000us 3.322us 245 aten::add 0.24% 733.000us 0.24% 733.000us 19.811us 37 aten::add_ 0.00% 12.000us 0.00% 12.000us 12.000us 1 aten::layer_norm 0.05% 149.000us 0.43% 1.349ms 53.960us 25 aten::native_layer_norm 0.34% 1.060ms 0.39% 1.200ms 48.000us 25 aten::dropout 0.01% 17.000us 0.01% 17.000us 0.447us 38 aten::linear 0.42% 1.307ms 72.03% 223.654ms 3.022ms 74 aten::t 0.10% 320.000us 0.23% 726.000us 9.811us 74 aten::transpose 0.11% 350.000us 0.15% 472.000us 5.488us 86 aten::addmm 70.79% 219.827ms 71.17% 220.985ms 2.986ms 74 aten::expand 0.12% 374.000us 0.14% 426.000us 3.492us 122 aten::resolve_conj 0.00% 0.000us 0.00% 0.000us 0.000us 196 aten::permute 0.09% 281.000us 0.12% 362.000us 7.542us 48 aten::matmul 0.15% 475.000us 17.35% 53.882ms 2.245ms 24 aten::bmm 16.67% 51.773ms 16.97% 52.704ms 2.196ms 24 aten::contiguous 0.06% 173.000us 0.35% 1.089ms 22.688us 48 aten::clone 0.09% 272.000us 0.33% 1.016ms 21.167us 48 aten::empty_like 0.04% 111.000us 0.07% 209.000us 4.354us 48 aten::_unsafe_view 0.03% 93.000us 0.03% 93.000us 3.875us 24 aten::div 0.07% 222.000us 0.15% 455.000us 37.917us 12 aten::softmax 0.01% 43.000us 0.26% 815.000us 67.917us 12 aten::_softmax 0.25% 772.000us 0.25% 772.000us 64.333us 12 aten::gelu 9.10% 28.255ms 9.10% 28.255ms 2.355ms 12 aten::tanh 0.01% 29.000us 0.01% 29.000us 29.000us 1 --------------------------- ------------ ------------ ------------ ------------ ------------ ------------ Self CPU time total: 310.518ms Inference time: 0.3231241703033447