Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kernel arguments passed by reference and passed by value are compiled into identical HSAIL code #25

Open
ghost opened this issue Apr 4, 2016 · 0 comments

Comments

@ghost
Copy link

ghost commented Apr 4, 2016

I'm on the latest HLC compiler from the branch hsail-stable-3.7. The generated HSAIL code for the following two kernels is identical, even though the first kernel takes args by reference (pointer) and the second kernel takes args by value. I compiled both kernels with -O2.

First kernel:

struct KernArgs
{
    uint  Arg32;
    ulong Arg64;
};

__kernel void Main(__global ulong *res, const __global struct KernArgs *args)
{
    *res = (ulong)args->Arg32|args->Arg64;
}

Second kernel:

struct KernArgs
{
    uint  Arg32;
    ulong Arg64;
};

__kernel void Main(__global ulong *res, const struct KernArgs args)
{
    *res = (ulong)args.Arg32|args.Arg64;
}

Compiling both kernels yields identical HSAIL code:

module &__llvm_hsail_module:1:0:$full:$large:$near;

prog kernel &__OpenCL_Main_kernel(
    kernarg_u64 %__global_offset_0,
    kernarg_u64 %__global_offset_1,
    kernarg_u64 %__global_offset_2,
    kernarg_u64 %__printf_buffer,
    kernarg_u64 %__vqueue_pointer,
    kernarg_u64 %__aqlwrap_pointer,
    kernarg_u64 %res,
    kernarg_u64 %args)
{
    // BB#0:
    ld_kernarg_align(8)_width(all)_u64  $d0, [%args];
    ld_global_align(8)_u64  $d1, [$d0+8];
    ld_global_align(4)_u32  $s0, [$d0];
    cvt_u64_u32 $d0, $s0;
    or_b64  $d0, $d0, $d1;
    ld_kernarg_align(8)_width(all)_u64  $d1, [%res];
    st_global_align(8)_u64  $d0, [$d1];
    ret;
};

The LLVM IR for the two kernels is:

First kernel:

; ModuleID = '/tmp/cloc23414/bug.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
target triple = "spir64-unknown-unknown"

%struct.KernArgs = type { i32, i64 }

; Function Attrs: nounwind
define spir_kernel void @__OpenCL_Main_kernel(i64 addrspace(1)* %res, %struct.KernArgs addrspace(1)* %args) #0 {
  %1 = alloca i64 addrspace(1)*, align 8
  %2 = alloca %struct.KernArgs addrspace(1)*, align 8
  store i64 addrspace(1)* %res, i64 addrspace(1)** %1, align 8
  store %struct.KernArgs addrspace(1)* %args, %struct.KernArgs addrspace(1)** %2, align 8
  %3 = load %struct.KernArgs addrspace(1)*, %struct.KernArgs addrspace(1)** %2, align 8
  %4 = getelementptr inbounds %struct.KernArgs, %struct.KernArgs addrspace(1)* %3, i32 0, i32 0
  %5 = load i32, i32 addrspace(1)* %4, align 4
  %6 = zext i32 %5 to i64
  %7 = load %struct.KernArgs addrspace(1)*, %struct.KernArgs addrspace(1)** %2, align 8
  %8 = getelementptr inbounds %struct.KernArgs, %struct.KernArgs addrspace(1)* %7, i32 0, i32 1
  %9 = load i64, i64 addrspace(1)* %8, align 8
  %10 = or i64 %6, %9
  %11 = load i64 addrspace(1)*, i64 addrspace(1)** %1, align 8
  store i64 %10, i64 addrspace(1)* %11, align 8
  ret void
}

attributes #0 = { nounwind }

!opencl.kernels = !{!0}
!opencl.enable.FP_CONTRACT = !{}
!opencl.ocl.version = !{!6}
!opencl.spir.version = !{!6}

!0 = !{void (i64 addrspace(1)*, %struct.KernArgs addrspace(1)*)* @__OpenCL_Main_kernel, !1, !2, !3, !4, !5}
!1 = !{!"kernel_arg_addr_space", i32 1, i32 1}
!2 = !{!"kernel_arg_access_qual", !"none", !"none"}
!3 = !{!"kernel_arg_type", !"ulong*", !"struct KernArgs*"}
!4 = !{!"kernel_arg_type_qual", !"", !"const"}
!5 = !{!"kernel_arg_base_type", !"ulong*", !"struct KernArgs*"}
!6 = !{i32 2, i32 0}

Second kernel:

; ModuleID = '/tmp/cloc23566/bug.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
target triple = "spir64-unknown-unknown"

%struct.KernArgs = type { i32, i64 }

; Function Attrs: nounwind
define spir_kernel void @__OpenCL_Main_kernel(i64 addrspace(1)* %res, %struct.KernArgs* byval %args) #0 {
  %1 = alloca i64 addrspace(1)*, align 8
  store i64 addrspace(1)* %res, i64 addrspace(1)** %1, align 8
  %2 = getelementptr inbounds %struct.KernArgs, %struct.KernArgs* %args, i32 0, i32 0
  %3 = load i32, i32* %2, align 4
  %4 = zext i32 %3 to i64
  %5 = getelementptr inbounds %struct.KernArgs, %struct.KernArgs* %args, i32 0, i32 1
  %6 = load i64, i64* %5, align 8
  %7 = or i64 %4, %6
  %8 = load i64 addrspace(1)*, i64 addrspace(1)** %1, align 8
  store i64 %7, i64 addrspace(1)* %8, align 8
  ret void
}

attributes #0 = { nounwind }

!opencl.kernels = !{!0}
!opencl.enable.FP_CONTRACT = !{}
!opencl.ocl.version = !{!6}
!opencl.spir.version = !{!6}

!0 = !{void (i64 addrspace(1)*, %struct.KernArgs*)* @__OpenCL_Main_kernel, !1, !2, !3, !4, !5}
!1 = !{!"kernel_arg_addr_space", i32 1, i32 0}
!2 = !{!"kernel_arg_access_qual", !"none", !"none"}
!3 = !{!"kernel_arg_type", !"ulong*", !"struct KernArgs"}
!4 = !{!"kernel_arg_type_qual", !"", !"const"}
!5 = !{!"kernel_arg_base_type", !"ulong*", !"struct KernArgs"}
!6 = !{i32 2, i32 0}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

0 participants