Source: /Users/flash/Documents/Code/external_code/ToT_LLVM/llvm-project/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll -- 1. ModuleToFunctionPassAdaptor -- 1. PassManager : Skipping NOP -- 2. LoopVectorizePass ---------------------------------------- define void @same_step_and_size(ptr %a, ptr %b, i64 %n) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 4 x i64 %iv %l = load i32, ptr %gep.a, align 4 %mul = mul nsw i32 %l, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! (syntactically equal) -- 3. LoopVectorizePass ---------------------------------------- define void @same_step_and_size(ptr %a, ptr %b, i64 %n) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 4 x i64 %iv %l = load i32, ptr %gep.a, align 4 %mul = mul nsw i32 %l, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } => define void @same_step_and_size(ptr %a, ptr %b, i64 %n) { entry: %a2 = ptrtoint ptr %a to i64 %b1 = ptrtoint ptr %b to i64 %min.iters.check = icmp ult i64 %n, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck vector.memcheck: %#0 = sub i64 %b1, %a2 %diff.check = icmp ult i64 %#0, 16 br i1 %diff.check, label %scalar.ph, label %vector.ph vector.ph: %n.mod.vf = urem i64 %n, 4 %n.vec = sub i64 %n, %n.mod.vf br label %vector.body vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %#1 = add i64 %index, 0 %#2 = gep inbounds ptr %a, 4 x i64 %#1 %#3 = gep inbounds ptr %#2, 4 x i32 0 %wide.load = load <4 x i32>, ptr %#3, align 4 %#4 = mul nsw <4 x i32> %wide.load, { 3, 3, 3, 3 } %#5 = gep inbounds ptr %b, 4 x i64 %#1 %#6 = gep inbounds ptr %#5, 4 x i32 0 store <4 x i32> %#4, ptr %#6, align 4 %index.next = add nuw i64 %index, 4 %#7 = icmp eq i64 %index.next, %n.vec br i1 %#7, label %middle.block, label %vector.body middle.block: %cmp.n = icmp eq i64 %n, %n.vec br i1 %cmp.n, label %exit, label %scalar.ph scalar.ph: %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ] br label %loop loop: %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 4 x i64 %iv %l = load i32, ptr %gep.a, align 4 %mul = mul nsw i32 %l, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! -- 4. PassManager : Skipping NOP -- 5. PassManager : Skipping NOP -- 6. LoopVectorizePass ---------------------------------------- define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %cmp = icmp ne i64 %iv, %x br i1 %cmp, label %then, label %else then: %gep.a = gep inbounds ptr %a, 4 x i64 %iv store i32 0, ptr %gep.a, align 4 br label %loop.latch else: %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 10, ptr %gep.b, align 4 br label %loop.latch loop.latch: %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! (syntactically equal) -- 7. LoopVectorizePass ---------------------------------------- define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %cmp = icmp ne i64 %iv, %x br i1 %cmp, label %then, label %else then: %gep.a = gep inbounds ptr %a, 4 x i64 %iv store i32 0, ptr %gep.a, align 4 br label %loop.latch else: %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 10, ptr %gep.b, align 4 br label %loop.latch loop.latch: %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } => define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) { entry: %b2 = ptrtoint ptr %b to i64 %a1 = ptrtoint ptr %a to i64 %min.iters.check = icmp ult i64 %n, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck vector.memcheck: %#0 = sub i64 %a1, %b2 %diff.check = icmp ult i64 %#0, 16 br i1 %diff.check, label %scalar.ph, label %vector.ph vector.ph: %n.mod.vf = urem i64 %n, 4 %n.vec = sub i64 %n, %n.mod.vf %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, 0, 0, 0, 0 br label %vector.body vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %pred.store.continue16 ] %vec.ind = phi <4 x i64> [ { 0, 1, 2, 3 }, %vector.ph ], [ %vec.ind.next, %pred.store.continue16 ] %#1 = add i64 %index, 0 %#2 = add i64 %index, 1 %#3 = add i64 %index, 2 %#4 = add i64 %index, 3 %#5 = icmp ne <4 x i64> %vec.ind, %broadcast.splat %#6 = xor <4 x i1> %#5, { 1, 1, 1, 1 } %#7 = extractelement <4 x i1> %#6, i32 0 br i1 %#7, label %pred.store.if, label %pred.store.continue pred.store.if: %#8 = gep inbounds ptr %b, 4 x i64 %#1 store i32 10, ptr %#8, align 4 br label %pred.store.continue pred.store.continue: %#9 = extractelement <4 x i1> %#6, i32 1 br i1 %#9, label %pred.store.if3, label %pred.store.continue4 pred.store.if3: %#10 = gep inbounds ptr %b, 4 x i64 %#2 store i32 10, ptr %#10, align 4 br label %pred.store.continue4 pred.store.continue4: %#11 = extractelement <4 x i1> %#6, i32 2 br i1 %#11, label %pred.store.if5, label %pred.store.continue6 pred.store.if5: %#12 = gep inbounds ptr %b, 4 x i64 %#3 store i32 10, ptr %#12, align 4 br label %pred.store.continue6 pred.store.continue6: %#13 = extractelement <4 x i1> %#6, i32 3 br i1 %#13, label %pred.store.if7, label %pred.store.continue8 pred.store.if7: %#14 = gep inbounds ptr %b, 4 x i64 %#4 store i32 10, ptr %#14, align 4 br label %pred.store.continue8 pred.store.continue8: %#15 = extractelement <4 x i1> %#5, i32 0 br i1 %#15, label %pred.store.if9, label %pred.store.continue10 pred.store.if9: %#16 = gep inbounds ptr %a, 4 x i64 %#1 store i32 0, ptr %#16, align 4 br label %pred.store.continue10 pred.store.continue10: %#17 = extractelement <4 x i1> %#5, i32 1 br i1 %#17, label %pred.store.if11, label %pred.store.continue12 pred.store.if11: %#18 = gep inbounds ptr %a, 4 x i64 %#2 store i32 0, ptr %#18, align 4 br label %pred.store.continue12 pred.store.continue12: %#19 = extractelement <4 x i1> %#5, i32 2 br i1 %#19, label %pred.store.if13, label %pred.store.continue14 pred.store.if13: %#20 = gep inbounds ptr %a, 4 x i64 %#3 store i32 0, ptr %#20, align 4 br label %pred.store.continue14 pred.store.continue14: %#21 = extractelement <4 x i1> %#5, i32 3 br i1 %#21, label %pred.store.if15, label %pred.store.continue16 pred.store.if15: %#22 = gep inbounds ptr %a, 4 x i64 %#4 store i32 0, ptr %#22, align 4 br label %pred.store.continue16 pred.store.continue16: %index.next = add nuw i64 %index, 4 %vec.ind.next = add <4 x i64> %vec.ind, { 4, 4, 4, 4 } %#23 = icmp eq i64 %index.next, %n.vec br i1 %#23, label %middle.block, label %vector.body middle.block: %cmp.n = icmp eq i64 %n, %n.vec br i1 %cmp.n, label %exit, label %scalar.ph scalar.ph: %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ] br label %loop loop: %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop.latch ] %cmp = icmp ne i64 %iv, %x br i1 %cmp, label %then, label %else then: %gep.a = gep inbounds ptr %a, 4 x i64 %iv store i32 0, ptr %gep.a, align 4 br label %loop.latch else: %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 10, ptr %gep.b, align 4 br label %loop.latch loop.latch: %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! -- 8. PassManager : Skipping NOP -- 9. PassManager : Skipping NOP -- 10. LoopVectorizePass ---------------------------------------- define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 2 x i64 %iv %l = load i16, ptr %gep.a, align 2 %l.ext = sext i16 %l to i32 %mul = mul nsw i32 %l.ext, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! (syntactically equal) -- 11. LoopVectorizePass ---------------------------------------- define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 2 x i64 %iv %l = load i16, ptr %gep.a, align 2 %l.ext = sext i16 %l to i32 %mul = mul nsw i32 %l.ext, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } => define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) { entry: %min.iters.check = icmp ult i64 %n, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck vector.memcheck: %#0 = shl i64 %n, 2 %scevgep = gep ptr %b, 1 x i64 %#0 %#1 = shl i64 %n, 1 %scevgep1 = gep ptr %a, 1 x i64 %#1 %bound0 = icmp ult ptr %b, %scevgep1 %bound1 = icmp ult ptr %a, %scevgep %found.conflict = and i1 %bound0, %bound1 br i1 %found.conflict, label %scalar.ph, label %vector.ph vector.ph: %n.mod.vf = urem i64 %n, 4 %n.vec = sub i64 %n, %n.mod.vf br label %vector.body vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %#2 = add i64 %index, 0 %#3 = gep inbounds ptr %a, 2 x i64 %#2 %#4 = gep inbounds ptr %#3, 2 x i32 0 %wide.load = load <4 x i16>, ptr %#4, align 2 %#5 = sext <4 x i16> %wide.load to <4 x i32> %#6 = mul nsw <4 x i32> %#5, { 3, 3, 3, 3 } %#7 = gep inbounds ptr %b, 4 x i64 %#2 %#8 = gep inbounds ptr %#7, 4 x i32 0 store <4 x i32> %#6, ptr %#8, align 4 %index.next = add nuw i64 %index, 4 %#9 = icmp eq i64 %index.next, %n.vec br i1 %#9, label %middle.block, label %vector.body middle.block: %cmp.n = icmp eq i64 %n, %n.vec br i1 %cmp.n, label %exit, label %scalar.ph scalar.ph: %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ] br label %loop loop: %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 2 x i64 %iv %l = load i16, ptr %gep.a, align 2 %l.ext = sext i16 %l to i32 %mul = mul nsw i32 %l.ext, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! ERROR: Unsupported metadata: 7 -- 12. PassManager : Skipping NOP ERROR: Unsupported metadata: 7 -- 13. PassManager : Skipping NOP -- 14. LoopVectorizePass ---------------------------------------- define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1 %l = load i16, ptr %gep.a, align 2 %l.ext = sext i16 %l to i32 %mul = mul nsw i32 %l.ext, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! (syntactically equal) -- 15. LoopVectorizePass ---------------------------------------- define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1 %l = load i16, ptr %gep.a, align 2 %l.ext = sext i16 %l to i32 %mul = mul nsw i32 %l.ext, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } => define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) { entry: %a2 = ptrtoint ptr %a to i64 %b1 = ptrtoint ptr %b to i64 %min.iters.check = icmp ult i64 %n, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck vector.memcheck: %#0 = add i64 %b1, -2 %#1 = sub i64 %#0, %a2 %diff.check = icmp ult i64 %#1, 16 br i1 %diff.check, label %scalar.ph, label %vector.ph vector.ph: %n.mod.vf = urem i64 %n, 4 %n.vec = sub i64 %n, %n.mod.vf br label %vector.body vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %#2 = add i64 %index, 0 %#3 = add i64 %index, 1 %#4 = add i64 %index, 2 %#5 = add i64 %index, 3 %#6 = gep inbounds ptr %a, 4 x i64 %#2, 2 x i64 1 %#7 = gep inbounds ptr %a, 4 x i64 %#3, 2 x i64 1 %#8 = gep inbounds ptr %a, 4 x i64 %#4, 2 x i64 1 %#9 = gep inbounds ptr %a, 4 x i64 %#5, 2 x i64 1 %#10 = load i16, ptr %#6, align 2 %#11 = load i16, ptr %#7, align 2 %#12 = load i16, ptr %#8, align 2 %#13 = load i16, ptr %#9, align 2 %#14 = insertelement <4 x i16> poison, i16 %#10, i32 0 %#15 = insertelement <4 x i16> %#14, i16 %#11, i32 1 %#16 = insertelement <4 x i16> %#15, i16 %#12, i32 2 %#17 = insertelement <4 x i16> %#16, i16 %#13, i32 3 %#18 = sext <4 x i16> %#17 to <4 x i32> %#19 = mul nsw <4 x i32> %#18, { 3, 3, 3, 3 } %#20 = gep inbounds ptr %b, 4 x i64 %#2 %#21 = gep inbounds ptr %#20, 4 x i32 0 store <4 x i32> %#19, ptr %#21, align 4 %index.next = add nuw i64 %index, 4 %#22 = icmp eq i64 %index.next, %n.vec br i1 %#22, label %middle.block, label %vector.body middle.block: %cmp.n = icmp eq i64 %n, %n.vec br i1 %cmp.n, label %exit, label %scalar.ph scalar.ph: %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ] br label %loop loop: %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ] %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1 %l = load i16, ptr %gep.a, align 2 %l.ext = sext i16 %l to i32 %mul = mul nsw i32 %l.ext, 3 %gep.b = gep inbounds ptr %b, 4 x i64 %iv store i32 %mul, ptr %gep.b, align 4 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! -- 16. PassManager : Skipping NOP -- 17. PassManager : Skipping NOP -- 18. LoopVectorizePass ---------------------------------------- define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.b = gep inbounds ptr %b, 4 x i64 %iv %l = load i32, ptr %gep.b, align 4 %mul = mul nsw i32 %l, 3 %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1 %trunc = trunc i32 %mul to i16 store i16 %trunc, ptr %gep.a, align 2 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation seems to be correct! (syntactically equal) -- 19. LoopVectorizePass ---------------------------------------- define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.b = gep inbounds ptr %b, 4 x i64 %iv %l = load i32, ptr %gep.b, align 4 %mul = mul nsw i32 %l, 3 %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1 %trunc = trunc i32 %mul to i16 store i16 %trunc, ptr %gep.a, align 2 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } => define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) { entry: %b2 = ptrtoint ptr %b to i64 %a1 = ptrtoint ptr %a to i64 %min.iters.check = icmp ult i64 %n, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck vector.memcheck: %#0 = add i64 %a1, 2 %#1 = sub i64 %#0, %b2 %diff.check = icmp ult i64 %#1, 16 br i1 %diff.check, label %scalar.ph, label %vector.ph vector.ph: %n.mod.vf = urem i64 %n, 4 %n.vec = sub i64 %n, %n.mod.vf br label %vector.body vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %#2 = add i64 %index, 0 %#3 = add i64 %index, 1 %#4 = add i64 %index, 2 %#5 = add i64 %index, 3 %#6 = gep inbounds ptr %b, 4 x i64 %#2 %#7 = gep inbounds ptr %#6, 4 x i32 0 %wide.load = load <4 x i32>, ptr %#7, align 4 %#8 = mul nsw <4 x i32> %wide.load, { 3, 3, 3, 3 } %#9 = gep inbounds ptr %a, 4 x i64 %#2, 2 x i64 1 %#10 = gep inbounds ptr %a, 4 x i64 %#3, 2 x i64 1 %#11 = gep inbounds ptr %a, 4 x i64 %#4, 2 x i64 1 %#12 = gep inbounds ptr %a, 4 x i64 %#5, 2 x i64 1 %#13 = trunc <4 x i32> %#8 to <4 x i16> %#14 = extractelement <4 x i16> %#13, i32 0 store i16 %#14, ptr %#9, align 2 %#15 = extractelement <4 x i16> %#13, i32 1 store i16 %#15, ptr %#10, align 2 %#16 = extractelement <4 x i16> %#13, i32 2 store i16 %#16, ptr %#11, align 2 %#17 = extractelement <4 x i16> %#13, i32 3 store i16 %#17, ptr %#12, align 2 %index.next = add nuw i64 %index, 4 %#18 = icmp eq i64 %index.next, %n.vec br i1 %#18, label %middle.block, label %vector.body middle.block: %cmp.n = icmp eq i64 %n, %n.vec br i1 %cmp.n, label %exit, label %scalar.ph scalar.ph: %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ], [ 0, %vector.memcheck ] br label %loop loop: %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ] %gep.b = gep inbounds ptr %b, 4 x i64 %iv %l = load i32, ptr %gep.b, align 4 %mul = mul nsw i32 %l, 3 %gep.a = gep inbounds ptr %a, 4 x i64 %iv, 2 x i64 1 %trunc = trunc i32 %mul to i16 store i16 %trunc, ptr %gep.a, align 2 %iv.next = add nsw nuw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %exit, label %loop exit: ret void } Transformation doesn't verify! (not unsound) ERROR: Timeout -- 20. PassManager : Skipping NOP -- 21. PassManager : Skipping NOP -- 22. LoopVectorizePass ---------------------------------------- define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) { entry: br label %outer.header outer.header: %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ] %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv br label %inner.body inner.body: %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ] %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv %l = load i32, ptr %gep.b, align 4 %sub = sub i32 %l, 10 store i32 %sub, ptr %gep.a, align 4 %inner.iv.next = add nsw nuw i64 %inner.iv, 1 %inner.cond = icmp eq i64 %inner.iv.next, %n br i1 %inner.cond, label %outer.latch, label %inner.body outer.latch: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.cond = icmp eq i64 %outer.iv.next, %n br i1 %outer.cond, label %exit, label %outer.header exit: ret void } Transformation seems to be correct! (syntactically equal) -- 23. LoopVectorizePass ---------------------------------------- define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) { entry: br label %outer.header outer.header: %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ] %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv br label %inner.body inner.body: %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ] %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv %l = load i32, ptr %gep.b, align 4 %sub = sub i32 %l, 10 store i32 %sub, ptr %gep.a, align 4 %inner.iv.next = add nsw nuw i64 %inner.iv, 1 %inner.cond = icmp eq i64 %inner.iv.next, %n br i1 %inner.cond, label %outer.latch, label %inner.body outer.latch: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.cond = icmp eq i64 %outer.iv.next, %n br i1 %outer.cond, label %exit, label %outer.header exit: ret void } => define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) { entry: %#0 = shl i64 %n, 2 %scevgep2 = gep ptr %b, 1 x i64 %#0 br label %outer.header outer.header: %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ] %#1 = shl i64 %outer.iv, 2 %scevgep = gep ptr %a, 1 x i64 %#1 %#2 = add i64 %#1, 4 %scevgep1 = gep ptr %a, 1 x i64 %#2 %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv %min.iters.check = icmp ult i64 %n, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck vector.memcheck: %bound0 = icmp ult ptr %scevgep, %scevgep2 %bound1 = icmp ult ptr %b, %scevgep1 %found.conflict = and i1 %bound0, %bound1 br i1 %found.conflict, label %scalar.ph, label %vector.ph vector.ph: %n.mod.vf = urem i64 %n, 4 %n.vec = sub i64 %n, %n.mod.vf br label %vector.body vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %#3 = add i64 %index, 0 %#4 = gep inbounds ptr %b, 4 x i64 %#3 %#5 = gep inbounds ptr %#4, 4 x i32 0 %wide.load = load <4 x i32>, ptr %#5, align 4 %#6 = sub <4 x i32> %wide.load, { 10, 10, 10, 10 } %#7 = extractelement <4 x i32> %#6, i32 3 store i32 %#7, ptr %gep.a, align 4 %index.next = add nuw i64 %index, 4 %#8 = icmp eq i64 %index.next, %n.vec br i1 %#8, label %middle.block, label %vector.body middle.block: %cmp.n = icmp eq i64 %n, %n.vec br i1 %cmp.n, label %outer.latch, label %scalar.ph scalar.ph: %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %outer.header ], [ 0, %vector.memcheck ] br label %inner.body inner.body: %inner.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %inner.iv.next, %inner.body ] %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv %l = load i32, ptr %gep.b, align 4 %sub = sub i32 %l, 10 store i32 %sub, ptr %gep.a, align 4 %inner.iv.next = add nsw nuw i64 %inner.iv, 1 %inner.cond = icmp eq i64 %inner.iv.next, %n br i1 %inner.cond, label %outer.latch, label %inner.body outer.latch: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.cond = icmp eq i64 %outer.iv.next, %n br i1 %outer.cond, label %exit, label %outer.header exit: ret void } Transformation seems to be correct! ERROR: Unsupported metadata: 7 -- 24. PassManager : Skipping NOP ERROR: Unsupported metadata: 7 -- 25. PassManager : Skipping NOP -- 26. LoopVectorizePass ---------------------------------------- define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) { entry: br label %outer.header outer.header: %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ] %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv br label %inner.body inner.body: %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ] %l = load i32, ptr %gep.a, align 4 %sub = sub i32 %l, 10 %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv store i32 %sub, ptr %gep.b, align 4 %inner.iv.next = add nsw nuw i64 %inner.iv, 1 %inner.cond = icmp eq i64 %inner.iv.next, %n br i1 %inner.cond, label %outer.latch, label %inner.body outer.latch: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.cond = icmp eq i64 %outer.iv.next, %n br i1 %outer.cond, label %exit, label %outer.header exit: ret void } Transformation seems to be correct! (syntactically equal) -- 27. LoopVectorizePass ---------------------------------------- define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) { entry: br label %outer.header outer.header: %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ] %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv br label %inner.body inner.body: %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ] %l = load i32, ptr %gep.a, align 4 %sub = sub i32 %l, 10 %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv store i32 %sub, ptr %gep.b, align 4 %inner.iv.next = add nsw nuw i64 %inner.iv, 1 %inner.cond = icmp eq i64 %inner.iv.next, %n br i1 %inner.cond, label %outer.latch, label %inner.body outer.latch: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.cond = icmp eq i64 %outer.iv.next, %n br i1 %outer.cond, label %exit, label %outer.header exit: ret void } => define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) { entry: %#0 = shl i64 %n, 2 %scevgep = gep ptr %b, 1 x i64 %#0 br label %outer.header outer.header: %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ] %#1 = shl i64 %outer.iv, 2 %scevgep1 = gep ptr %a, 1 x i64 %#1 %#2 = add i64 %#1, 4 %scevgep2 = gep ptr %a, 1 x i64 %#2 %gep.a = gep inbounds ptr %a, 4 x i64 %outer.iv %min.iters.check = icmp ult i64 %n, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck vector.memcheck: %bound0 = icmp ult ptr %b, %scevgep2 %bound1 = icmp ult ptr %scevgep1, %scevgep %found.conflict = and i1 %bound0, %bound1 br i1 %found.conflict, label %scalar.ph, label %vector.ph vector.ph: %n.mod.vf = urem i64 %n, 4 %n.vec = sub i64 %n, %n.mod.vf br label %vector.body vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %#3 = add i64 %index, 0 %#4 = load i32, ptr %gep.a, align 4 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %#4, i64 0 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, 0, 0, 0, 0 %#5 = sub <4 x i32> %broadcast.splat, { 10, 10, 10, 10 } %#6 = gep inbounds ptr %b, 4 x i64 %#3 %#7 = gep inbounds ptr %#6, 4 x i32 0 store <4 x i32> %#5, ptr %#7, align 4 %index.next = add nuw i64 %index, 4 %#8 = icmp eq i64 %index.next, %n.vec br i1 %#8, label %middle.block, label %vector.body middle.block: %cmp.n = icmp eq i64 %n, %n.vec br i1 %cmp.n, label %outer.latch, label %scalar.ph scalar.ph: %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %outer.header ], [ 0, %vector.memcheck ] br label %inner.body inner.body: %inner.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %inner.iv.next, %inner.body ] %l = load i32, ptr %gep.a, align 4 %sub = sub i32 %l, 10 %gep.b = gep inbounds ptr %b, 4 x i64 %inner.iv store i32 %sub, ptr %gep.b, align 4 %inner.iv.next = add nsw nuw i64 %inner.iv, 1 %inner.cond = icmp eq i64 %inner.iv.next, %n br i1 %inner.cond, label %outer.latch, label %inner.body outer.latch: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.cond = icmp eq i64 %outer.iv.next, %n br i1 %outer.cond, label %exit, label %outer.header exit: ret void } Transformation seems to be correct! ERROR: Unsupported metadata: 7 -- 28. PassManager : Skipping NOP ERROR: Unsupported metadata: 7 -- 29. PassManager : Skipping NOP -- 30. LoopVectorizePass ---------------------------------------- define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) { entry: br label %outer.loop outer.loop: %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ] %mul = mul nsw i64 %outer.iv, noundef %n br label %inner.loop inner.loop: %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ] %idx = add nsw nuw i64 %iv.inner, %mul %gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx %l = load i32, ptr %gep.src, align 4 %gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx %add = add nsw i32 %l, 10 store i32 %add, ptr %gep.dst, align 4 %iv.inner.next = add nsw nuw i64 %iv.inner, 1 %inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n br i1 %inner.exit.cond, label %inner.exit, label %inner.loop inner.exit: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m br i1 %outer.exit.cond, label %outer.exit, label %outer.loop outer.exit: ret void } Transformation seems to be correct! (syntactically equal) -- 31. LoopVectorizePass ---------------------------------------- define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) { entry: br label %outer.loop outer.loop: %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ] %mul = mul nsw i64 %outer.iv, noundef %n br label %inner.loop inner.loop: %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ] %idx = add nsw nuw i64 %iv.inner, %mul %gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx %l = load i32, ptr %gep.src, align 4 %gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx %add = add nsw i32 %l, 10 store i32 %add, ptr %gep.dst, align 4 %iv.inner.next = add nsw nuw i64 %iv.inner, 1 %inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n br i1 %inner.exit.cond, label %inner.exit, label %inner.loop inner.exit: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m br i1 %outer.exit.cond, label %outer.exit, label %outer.loop outer.exit: ret void } => define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture nowrite noundef %src, i64 noundef %m, i64 noundef %n) { entry: %src2 = ptrtoint ptr nocapture nowrite noundef %src to i64 %dst1 = ptrtoint ptr nocapture noundef %dst to i64 %#0 = sub i64 %dst1, %src2 br label %outer.loop outer.loop: %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ] %mul = mul nsw i64 %outer.iv, noundef %n %min.iters.check = icmp ult i64 noundef %n, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck vector.memcheck: %diff.check = icmp ult i64 %#0, 16 br i1 %diff.check, label %scalar.ph, label %vector.ph vector.ph: %n.mod.vf = urem i64 noundef %n, 4 %n.vec = sub i64 noundef %n, %n.mod.vf br label %vector.body vector.body: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %#1 = add i64 %index, 0 %#2 = add nsw nuw i64 %#1, %mul %#3 = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %#2 %#4 = gep inbounds ptr %#3, 4 x i32 0 %wide.load = load <4 x i32>, ptr %#4, align 4 %#5 = gep inbounds ptr nocapture noundef %dst, 4 x i64 %#2 %#6 = add nsw <4 x i32> %wide.load, { 10, 10, 10, 10 } %#7 = gep inbounds ptr %#5, 4 x i32 0 store <4 x i32> %#6, ptr %#7, align 4 %index.next = add nuw i64 %index, 4 %#8 = icmp eq i64 %index.next, %n.vec br i1 %#8, label %middle.block, label %vector.body middle.block: %cmp.n = icmp eq i64 noundef %n, %n.vec br i1 %cmp.n, label %inner.exit, label %scalar.ph scalar.ph: %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %outer.loop ], [ 0, %vector.memcheck ] br label %inner.loop inner.loop: %iv.inner = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.inner.next, %inner.loop ] %idx = add nsw nuw i64 %iv.inner, %mul %gep.src = gep inbounds ptr nocapture nowrite noundef %src, 4 x i64 %idx %l = load i32, ptr %gep.src, align 4 %gep.dst = gep inbounds ptr nocapture noundef %dst, 4 x i64 %idx %add = add nsw i32 %l, 10 store i32 %add, ptr %gep.dst, align 4 %iv.inner.next = add nsw nuw i64 %iv.inner, 1 %inner.exit.cond = icmp eq i64 %iv.inner.next, noundef %n br i1 %inner.exit.cond, label %inner.exit, label %inner.loop inner.exit: %outer.iv.next = add nsw nuw i64 %outer.iv, 1 %outer.exit.cond = icmp eq i64 %outer.iv.next, noundef %m br i1 %outer.exit.cond, label %outer.exit, label %outer.loop outer.exit: ret void } Transformation doesn't verify! (unsound) ERROR: Source is more defined than target Example: ptr nocapture noundef %dst = pointer(non-local, block_id=1, offset=1, attrs=1) ptr nocapture nowrite noundef %src = pointer(non-local, block_id=1, offset=1, attrs=3) i64 noundef %m = #x0000000000000001 (1) i64 noundef %n = #x0000000000000001 (1) Source: >> Jump to %outer.loop i64 %outer.iv = #x0000000000000000 (0) i64 %mul = #x0000000000000000 (0) >> Jump to %inner.loop i64 %iv.inner = #x0000000000000000 (0) i64 %idx = #x0000000000000000 (0) ptr %gep.src = pointer(non-local, block_id=1, offset=1, attrs=3) i32 %l = poison ptr %gep.dst = pointer(non-local, block_id=1, offset=1, attrs=1) i32 %add = poison i64 %iv.inner.next = #x0000000000000001 (1) i1 %inner.exit.cond = #x1 (1) >> Jump to %inner.exit i64 %outer.iv.next = #x0000000000000001 (1) i1 %outer.exit.cond = #x1 (1) >> Jump to %outer.exit SOURCE MEMORY STATE =================== NON-LOCAL BLOCKS: Block 0 > size: 0 align: 4 alloc type: 0 alive: false address: 0 Block 1 > size: 6 align: 1 alloc type: 0 alive: true address: 3 Block 2 > size: 0 align: 1 alloc type: 0 alive: true address: 9 Target: i64 %src2 = UB triggered! Pass: LoopVectorizePass Wrote bitcode to: "/Users/flash/Documents/Code/external_code/Alive2_ToT/alive2/build/logs/runtime-checks-difference_Ck4mXqEP_zAj1.bc" ------------------- SMT STATS ------------------- Num queries: 72 Num invalid: 0 Num skips: 0 Num trivial: 42 (36.8%) Num timeout: 1 (1.4%) Num errors: 0 (0.0%) Num SAT: 47 (65.3%) Num UNSAT: 24 (33.3%) Alive2: Transform doesn't verify; aborting!