Skip to content

Commit

Permalink
Implement more SIMD
Browse files Browse the repository at this point in the history
  • Loading branch information
antoyo committed Jun 7, 2022
1 parent e8dca3e commit 3b35940
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 136 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ default = ["master"]
master = ["gccjit/master"]

[dependencies]
gccjit = { git = "https://github.com/antoyo/gccjit.rs" }
#gccjit = { git = "https://github.com/antoyo/gccjit.rs" }

# Local copy.
#gccjit = { path = "../gccjit.rs" }
gccjit = { path = "../gccjit.rs" }

target-lexicon = "0.10.0"

Expand Down
2 changes: 1 addition & 1 deletion src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1409,7 +1409,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
}

#[cfg(not(feature="master"))]
pub fn vector_reduce<F>(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc>
pub fn vector_reduce<F>(&mut self, _src: RValue<'gcc>, _op: F) -> RValue<'gcc>
where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
{
unimplemented!();
Expand Down
111 changes: 83 additions & 28 deletions src/intrinsic/llvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,38 +75,38 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
"__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
| "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
| "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
let mut new_args = args.to_vec();
let arg5_type = gcc_func.get_param_type(4);
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
new_args.push(minus_one);
args = new_args.into();
},
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
let mut new_args = args.to_vec();
let mut new_args = args.to_vec();
let arg5_type = gcc_func.get_param_type(4);
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
new_args.push(minus_one);
args = new_args.into();
},
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
let mut new_args = args.to_vec();

let mut last_arg = None;
if args.len() == 4 {
last_arg = new_args.pop();
}
let mut last_arg = None;
if args.len() == 4 {
last_arg = new_args.pop();
}

let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);
let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);

if args.len() == 3 {
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
// the same GCC intrinsic, but the former has 3 parameters and the
// latter has 4 so it doesn't require this additional argument.
let arg5_type = gcc_func.get_param_type(4);
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
}
if args.len() == 3 {
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
// the same GCC intrinsic, but the former has 3 parameters and the
// latter has 4 so it doesn't require this additional argument.
let arg5_type = gcc_func.get_param_type(4);
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
}

if let Some(last_arg) = last_arg {
new_args.push(last_arg);
}
if let Some(last_arg) = last_arg {
new_args.push(last_arg);
}

args = new_args.into();
},
args = new_args.into();
},
"__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
Expand All @@ -131,6 +131,18 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
new_args.push(last_arg);
args = new_args.into();
},
"__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => {
let mut new_args = args.to_vec();
let last_arg = new_args.pop().expect("last arg");
let arg2_type = gcc_func.get_param_type(1);
let undefined = builder.current_func().new_local(None, arg2_type, "undefined_for_intrinsic").to_rvalue();
new_args.push(undefined);
let arg3_type = gcc_func.get_param_type(2);
let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
new_args.push(minus_one);
new_args.push(last_arg);
args = new_args.into();
},
_ => (),
}
}
Expand All @@ -149,7 +161,8 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask"
| "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => {
if index == args_len - 1 {
return true;
}
Expand Down Expand Up @@ -221,6 +234,48 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask",
"llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask",
"llvm.x86.avx512.vfmadd.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
"llvm.x86.avx512.sitofp.round.v16f32.v16i32" => "__builtin_ia32_cvtdq2ps512_mask",
"llvm.x86.avx512.uitofp.round.v16f32.v16i32" => "__builtin_ia32_cvtudq2ps512_mask",
"llvm.x86.avx512.mask.cvttps2dq.256" => "__builtin_ia32_cvttps2dq256_mask",
"llvm.x86.avx512.mask.cvttps2dq.128" => "__builtin_ia32_cvttps2dq128_mask",
"llvm.x86.avx512.mask.cvttpd2dq.256" => "__builtin_ia32_cvttpd2dq256_mask",
"llvm.x86.avx512.mask.compress.d.512" => "__builtin_ia32_compresssi512_mask",
"llvm.x86.avx512.mask.compress.d.256" => "__builtin_ia32_compresssi256_mask",
"llvm.x86.avx512.mask.compress.d.128" => "__builtin_ia32_compresssi128_mask",
"llvm.x86.avx512.mask.compress.q.512" => "__builtin_ia32_compressdi512_mask",
"llvm.x86.avx512.mask.compress.q.256" => "__builtin_ia32_compressdi256_mask",
"llvm.x86.avx512.mask.compress.q.128" => "__builtin_ia32_compressdi128_mask",
"llvm.x86.avx512.mask.compress.ps.512" => "__builtin_ia32_compresssf512_mask",
"llvm.x86.avx512.mask.compress.ps.256" => "__builtin_ia32_compresssf256_mask",
"llvm.x86.avx512.mask.compress.ps.128" => "__builtin_ia32_compresssf128_mask",
"llvm.x86.avx512.mask.compress.pd.512" => "__builtin_ia32_compressdf512_mask",
"llvm.x86.avx512.mask.compress.pd.256" => "__builtin_ia32_compressdf256_mask",
"llvm.x86.avx512.mask.compress.pd.128" => "__builtin_ia32_compressdf128_mask",
"llvm.x86.avx512.mask.compress.store.d.512" => "",
"llvm.x86.avx512.mask.compress.store.d.256" => "",
"llvm.x86.avx512.mask.compress.store.d.128" => "",
"llvm.x86.avx512.mask.compress.store.q.512" => "",
"llvm.x86.avx512.mask.compress.store.q.256" => "",
"llvm.x86.avx512.mask.compress.store.q.128" => "",
"llvm.x86.avx512.mask.compress.store.ps.512" => "",
"llvm.x86.avx512.mask.compress.store.ps.256" => "",
"llvm.x86.avx512.mask.compress.store.ps.128" => "",
"llvm.x86.avx512.mask.compress.store.pd.512" => "",
"llvm.x86.avx512.mask.compress.store.pd.256" => "",
"llvm.x86.avx512.mask.compress.store.pd.128" => "",
"llvm.x86.avx512.mask.expand.d.512" => "",
"llvm.x86.avx512.mask.expand.d.256" => "",
"llvm.x86.avx512.mask.expand.d.128" => "",
"llvm.x86.avx512.mask.expand.q.512" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",

// The above doc points to unknown builtins for the following, so override them:
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
Expand Down
130 changes: 25 additions & 105 deletions src/intrinsic/simd.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::cmp::Ordering;

use gccjit::{BinaryOp, RValue, Type, ToRValue};
use rustc_codegen_ssa::base::compare_simd_types;
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
Expand Down Expand Up @@ -309,117 +307,37 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,

enum Style {
Float,
Int(/* is signed? */ bool),
Int,
Unsupported,
}

let (in_style, in_width) = match in_elem.kind() {
// vectors of pointer-sized integers should've been
// disallowed before here, so this unwrap is safe.
ty::Int(i) => (
Style::Int(true),
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Uint(u) => (
Style::Int(false),
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Float(f) => (Style::Float, f.bit_width()),
_ => (Style::Unsupported, 0),
};
let (out_style, out_width) = match out_elem.kind() {
ty::Int(i) => (
Style::Int(true),
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Uint(u) => (
Style::Int(false),
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Float(f) => (Style::Float, f.bit_width()),
_ => (Style::Unsupported, 0),
};

let extend = |in_type, out_type| {
let vector_type = bx.context.new_vector_type(out_type, 8);
let vector = args[0].immediate();
let array_type = bx.context.new_array_type(None, in_type, 8);
// TODO(antoyo): switch to using new_vector_access or __builtin_convertvector for vector casting.
let array = bx.context.new_bitcast(None, vector, array_type);

let cast_vec_element = |index| {
let index = bx.context.new_rvalue_from_int(bx.int_type, index);
bx.context.new_cast(None, bx.context.new_array_access(None, array, index).to_rvalue(), out_type)
let in_style =
match in_elem.kind() {
ty::Int(_) | ty::Uint(_) => Style::Int,
ty::Float(_) => Style::Float,
_ => Style::Unsupported,
};

bx.context.new_rvalue_from_vector(None, vector_type, &[
cast_vec_element(0),
cast_vec_element(1),
cast_vec_element(2),
cast_vec_element(3),
cast_vec_element(4),
cast_vec_element(5),
cast_vec_element(6),
cast_vec_element(7),
])
};
let out_style =
match out_elem.kind() {
ty::Int(_) | ty::Uint(_) => Style::Int,
ty::Float(_) => Style::Float,
_ => Style::Unsupported,
};

match (in_style, out_style) {
(Style::Int(in_is_signed), Style::Int(_)) => {
return Ok(match in_width.cmp(&out_width) {
Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty),
Ordering::Equal => args[0].immediate(),
Ordering::Less => {
if in_is_signed {
match (in_width, out_width) {
// FIXME(antoyo): the function _mm_cvtepi8_epi16 should directly
// call an intrinsic equivalent to __builtin_ia32_pmovsxbw128 so that
// we can generate a call to it.
(8, 16) => extend(bx.i8_type, bx.i16_type),
(8, 32) => extend(bx.i8_type, bx.i32_type),
(8, 64) => extend(bx.i8_type, bx.i64_type),
(16, 32) => extend(bx.i16_type, bx.i32_type),
(32, 64) => extend(bx.i32_type, bx.i64_type),
(16, 64) => extend(bx.i16_type, bx.i64_type),
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
}
} else {
match (in_width, out_width) {
(8, 16) => extend(bx.u8_type, bx.u16_type),
(8, 32) => extend(bx.u8_type, bx.u32_type),
(8, 64) => extend(bx.u8_type, bx.u64_type),
(16, 32) => extend(bx.u16_type, bx.u32_type),
(16, 64) => extend(bx.u16_type, bx.u64_type),
(32, 64) => extend(bx.u32_type, bx.u64_type),
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
}
}
}
});
}
(Style::Int(_), Style::Float) => {
// TODO: add support for internal functions in libgccjit to get access to IFN_VEC_CONVERT which is
// doing like __builtin_convertvector?
// Or maybe provide convert_vector as an API since it might not easy to get the
// types of internal functions.
unimplemented!();
}
(Style::Float, Style::Int(_)) => {
unimplemented!();
}
(Style::Float, Style::Float) => {
unimplemented!();
}
_ => { /* Unsupported. Fallthrough. */ }
(Style::Unsupported, Style::Unsupported) => {
require!(
false,
"unsupported cast from `{}` with element `{}` to `{}` with element `{}`",
in_ty,
in_elem,
ret_ty,
out_elem
);
},
_ => return Ok(bx.context.convert_vector(None, args[0].immediate(), llret_ty)),
}
require!(
false,
"unsupported cast from `{}` with element `{}` to `{}` with element `{}`",
in_ty,
in_elem,
ret_ty,
out_elem
);
}

macro_rules! arith_binary {
Expand Down Expand Up @@ -590,6 +508,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
);
}
};
// TODO(antoyo): don't use target specific builtins here.
// Not sure how easy it would be to avoid theme here.
let builtin_name =
match (signed, is_add, in_len, elem_width) {
(true, true, 32, 8) => "__builtin_ia32_paddsb256", // TODO(antoyo): cast arguments to unsigned.
Expand Down

0 comments on commit 3b35940

Please sign in to comment.