Skip to content

Commit

Permalink
Merge branch 'upstream-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
Datadog Syncup Service committed Jun 8, 2024
2 parents 4d63e8d + 8d2f9e5 commit 7e4b16c
Show file tree
Hide file tree
Showing 44 changed files with 4,504 additions and 216 deletions.
18 changes: 16 additions & 2 deletions src/hotspot/cpu/s390/assembler_s390.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2023 SAP SE. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -986,6 +986,9 @@ class Assembler : public AbstractAssembler {
#define BCR_ZOPC (unsigned int)(7 << 8)
#define BALR_ZOPC (unsigned int)(5 << 8)
#define BASR_ZOPC (unsigned int)(13 << 8)
#define BCT_ZOPC (unsigned int)(70 << 24)
#define BCTR_ZOPC (unsigned int)(6 << 8)
#define BCTG_ZOPC (unsigned int)(227L << 40 | 70)
#define BCTGR_ZOPC (unsigned long)(0xb946 << 16)
// Absolute
#define BC_ZOPC (unsigned int)(71 << 24)
Expand Down Expand Up @@ -1887,7 +1890,14 @@ class Assembler : public AbstractAssembler {
//inline void z_brcl(branch_condition i1, int64_t i2); // branch i1 ? pc = pc + i2_imm32
inline void z_brcl(branch_condition i1, address a); // branch i1 ? pc = a
inline void z_brcl(branch_condition i1, Label& L); // branch i1 ? pc = Label
inline void z_bctgr(Register r1, Register r2); // branch on count r1 -= 1; (r1!=0) ? pc = r2 ; r1 is int64

// branch on count Instructions
inline void z_bct( Register r1, int64_t d2, Register x2, Register b2); // branch on count r1 -= 1; (r1!=0) ? pc = (d2_uimm12+x2+b2) ; r1 is int32
inline void z_bct( Register r1, const Address &a); // branch on count r1 -= 1; (r1!=0) ? pc = *(a); r1 is int32
inline void z_bctr( Register r1, Register r2); // branch on count r1 -= 1; (r1!=0) ? pc = r2 ; r1 is int32
inline void z_bctgr(Register r1, Register r2); // branch on count r1 -= 1; (r1!=0) ? pc = r2 ; r1 is int64
inline void z_bctg( Register r1, const Address &a); // branch on count r1 -= 1; (r1!=0) ? pc = *(a); r1 is int64
inline void z_bctg( Register r1, int64_t d2, Register x2, Register b2); // branch on count r1 -= 1; (r1!=0) ? pc = (d2_imm20+x2+b2) ; r1 is int64

// branch unconditional / always
inline void z_br(Register r2); // branch to r2, nop if r2 == Z_R0
Expand Down Expand Up @@ -3061,6 +3071,10 @@ class Assembler : public AbstractAssembler {
inline void z_braz(Label& L);
inline void z_brnp(Label& L);

// Branch on count;
inline void z_bct( Register r1, int64_t d2, Register b2);
inline void z_bctg(Register r1, int64_t d2, Register b2);

inline void z_btrue( Label& L);
inline void z_bfalse(Label& L);

Expand Down
15 changes: 12 additions & 3 deletions src/hotspot/cpu/s390/assembler_s390.inline.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2023 SAP SE. All rights reserved.
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -724,7 +724,14 @@ inline void Assembler::z_bcr( branch_condition m1, Register r2) { emit_16( BCR_Z
inline void Assembler::z_brc( branch_condition i1, int64_t i2) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(i2, 16, 32)); }
inline void Assembler::z_brc( branch_condition i1, address a) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
inline void Assembler::z_brcl(branch_condition i1, address a) { emit_48( BRCL_ZOPC | uimm4(i1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
inline void Assembler::z_bctgr(Register r1, Register r2) { emit_32( BCTGR_ZOPC | reg( r1, 24, 32) | reg( r2, 28, 32)); };

// branch on count
inline void Assembler::z_bct( Register r1, const Address &a) { z_bct( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_bct( Register r1, int64_t d2, Register x2, Register b2) { emit_32( BCT_ZOPC | reg(r1, 8, 32) | rxmask_32(d2, x2, b2)); }
inline void Assembler::z_bctr (Register r1, Register r2) { emit_16( BCTR_ZOPC | reg( r1, 8, 16) | reg( r2, 12, 16)); };
inline void Assembler::z_bctgr(Register r1, Register r2) { emit_32( BCTGR_ZOPC | reg( r1, 24, 32) | reg( r2, 28, 32)); };
inline void Assembler::z_bctg( Register r1, const Address &a) { z_bctg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_bctg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( BCTG_ZOPC | reg(r1, 8, 48) | rxymask_48(d2, x2, b2)); }

inline void Assembler::z_basr( Register r1, Register r2) { emit_16( BASR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
inline void Assembler::z_brasl(Register r1, address a) { emit_48( BRASL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
Expand Down Expand Up @@ -1396,6 +1403,8 @@ inline void Assembler::z_brno( Label& L) { z_brc(bcondNotOrdered, target(L)); }
inline void Assembler::z_brc( branch_condition m, Label& L) { z_brc(m, target(L)); }
inline void Assembler::z_brcl(branch_condition m, Label& L) { z_brcl(m, target(L)); }

inline void Assembler::z_bct( Register r1, int64_t d2, Register b2) { z_bct( r1, d2, Z_R0, b2);}
inline void Assembler::z_bctg(Register r1, int64_t d2, Register b2) { z_bctg(r1, d2, Z_R0, b2);}

// Instruction len bits must be stored right-justified in argument.
inline unsigned int Assembler::instr_len(unsigned char len_bits) {
Expand Down
105 changes: 84 additions & 21 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4491,13 +4491,21 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len,
// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
XMMRegister vec1, XMMRegister vec2, bool is_char,
KRegister mask, bool expand_ary2) {
// for expand_ary2, limit is the (smaller) size of the second array.
ShortBranchVerifier sbv(this);
Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;

assert((!expand_ary2) || ((expand_ary2) && (UseAVX == 2)),
"Expansion only implemented for AVX2");

int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);

Address::ScaleFactor scaleFactor = expand_ary2 ? Address::times_2 : Address::times_1;
int scaleIncr = expand_ary2 ? 8 : 16;

if (is_array_equ) {
// Check the input args
cmpoop(ary1, ary2);
Expand Down Expand Up @@ -4533,14 +4541,20 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register

if (UseAVX >= 2) {
// With AVX2, use 32-byte vector compare
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
Label COMPARE_WIDE_VECTORS, COMPARE_WIDE_VECTORS_16, COMPARE_TAIL, COMPARE_TAIL_16;

// Compare 32-byte vectors
andl(result, 0x0000001f); // tail count (in bytes)
andl(limit, 0xffffffe0); // vector count (in bytes)
jcc(Assembler::zero, COMPARE_TAIL);
if (expand_ary2) {
andl(result, 0x0000000f); // tail count (in bytes)
andl(limit, 0xfffffff0); // vector count (in bytes)
jcc(Assembler::zero, COMPARE_TAIL);
} else {
andl(result, 0x0000001f); // tail count (in bytes)
andl(limit, 0xffffffe0); // vector count (in bytes)
jcc(Assembler::zero, COMPARE_TAIL_16);
}

lea(ary1, Address(ary1, limit, Address::times_1));
lea(ary1, Address(ary1, limit, scaleFactor));
lea(ary2, Address(ary2, limit, Address::times_1));
negptr(limit);

Expand Down Expand Up @@ -4583,25 +4597,59 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
}//if (VM_Version::supports_avx512vlbw())
#endif //_LP64
bind(COMPARE_WIDE_VECTORS);
vmovdqu(vec1, Address(ary1, limit, Address::times_1));
vmovdqu(vec2, Address(ary2, limit, Address::times_1));
vmovdqu(vec1, Address(ary1, limit, scaleFactor));
if (expand_ary2) {
vpmovzxbw(vec2, Address(ary2, limit, Address::times_1), Assembler::AVX_256bit);
} else {
vmovdqu(vec2, Address(ary2, limit, Address::times_1));
}
vpxor(vec1, vec2);

vptest(vec1, vec1);
jcc(Assembler::notZero, FALSE_LABEL);
addptr(limit, 32);
addptr(limit, scaleIncr * 2);
jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);

testl(result, result);
jcc(Assembler::zero, TRUE_LABEL);

vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
vmovdqu(vec1, Address(ary1, result, scaleFactor, -32));
if (expand_ary2) {
vpmovzxbw(vec2, Address(ary2, result, Address::times_1, -16), Assembler::AVX_256bit);
} else {
vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
}
vpxor(vec1, vec2);

vptest(vec1, vec1);
jccb(Assembler::notZero, FALSE_LABEL);
jmpb(TRUE_LABEL);
jcc(Assembler::notZero, FALSE_LABEL);
jmp(TRUE_LABEL);

bind(COMPARE_TAIL_16); // limit is zero
movl(limit, result);

// Compare 16-byte chunks
andl(result, 0x0000000f); // tail count (in bytes)
andl(limit, 0xfffffff0); // vector count (in bytes)
jcc(Assembler::zero, COMPARE_TAIL);

lea(ary1, Address(ary1, limit, scaleFactor));
lea(ary2, Address(ary2, limit, Address::times_1));
negptr(limit);

bind(COMPARE_WIDE_VECTORS_16);
movdqu(vec1, Address(ary1, limit, scaleFactor));
if (expand_ary2) {
vpmovzxbw(vec2, Address(ary2, limit, Address::times_1), Assembler::AVX_128bit);
} else {
movdqu(vec2, Address(ary2, limit, Address::times_1));
}
pxor(vec1, vec2);

ptest(vec1, vec1);
jcc(Assembler::notZero, FALSE_LABEL);
addptr(limit, scaleIncr);
jcc(Assembler::notZero, COMPARE_WIDE_VECTORS_16);

bind(COMPARE_TAIL); // limit is zero
movl(limit, result);
Expand Down Expand Up @@ -4646,19 +4694,34 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
}

// Compare 4-byte vectors
andl(limit, 0xfffffffc); // vector count (in bytes)
jccb(Assembler::zero, COMPARE_CHAR);
if (expand_ary2) {
testl(result, result);
jccb(Assembler::zero, TRUE_LABEL);
} else {
andl(limit, 0xfffffffc); // vector count (in bytes)
jccb(Assembler::zero, COMPARE_CHAR);
}

lea(ary1, Address(ary1, limit, Address::times_1));
lea(ary1, Address(ary1, limit, scaleFactor));
lea(ary2, Address(ary2, limit, Address::times_1));
negptr(limit);

bind(COMPARE_VECTORS);
movl(chr, Address(ary1, limit, Address::times_1));
cmpl(chr, Address(ary2, limit, Address::times_1));
jccb(Assembler::notEqual, FALSE_LABEL);
addptr(limit, 4);
jcc(Assembler::notZero, COMPARE_VECTORS);
if (expand_ary2) {
// There are no "vector" operations for bytes to shorts
movzbl(chr, Address(ary2, limit, Address::times_1));
cmpw(Address(ary1, limit, Address::times_2), chr);
jccb(Assembler::notEqual, FALSE_LABEL);
addptr(limit, 1);
jcc(Assembler::notZero, COMPARE_VECTORS);
jmp(TRUE_LABEL);
} else {
movl(chr, Address(ary1, limit, Address::times_1));
cmpl(chr, Address(ary2, limit, Address::times_1));
jccb(Assembler::notEqual, FALSE_LABEL);
addptr(limit, 4);
jcc(Assembler::notZero, COMPARE_VECTORS);
}

// Compare trailing char (final 2 bytes), if any
bind(COMPARE_CHAR);
Expand Down
7 changes: 4 additions & 3 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,11 @@
void count_positives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);

// Compare char[] or byte[] arrays.
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);
void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
Register result, Register chr, XMMRegister vec1, XMMRegister vec2,
bool is_char, KRegister mask = knoreg, bool expand_ary2 = false);

void arrays_hashcode(Register str1, Register cnt1, Register result,
Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext,
Expand Down
Loading

0 comments on commit 7e4b16c

Please sign in to comment.