From 6dc0f542bc03ba43b94bc855241a69746768da9c Mon Sep 17 00:00:00 2001 From: TadaoYamaoka Date: Sat, 18 Dec 2021 17:46:22 +0900 Subject: [PATCH] =?UTF-8?q?=E9=A3=9B=E8=BB=8A=E3=81=A8=E8=A7=92=E3=81=AE?= =?UTF-8?q?=E5=88=A9=E3=81=8D=E3=82=92=E3=83=93=E3=83=83=E3=83=88=E6=BC=94?= =?UTF-8?q?=E7=AE=97=E3=81=A7=E6=B1=82=E3=82=81=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build_onnx/build_onnx.vcxproj | 6 +- cppshogi/bitboard.cpp | 139 +-------------- cppshogi/bitboard.hpp | 319 ++++++++++++++++++++++++++++------ cppshogi/common.hpp | 6 + cppshogi/init.cpp | 219 +++++++++++------------ cppshogi/position.cpp | 2 +- test/test.cpp | 27 ++- test/test.vcxproj | 12 +- 8 files changed, 407 insertions(+), 323 deletions(-) diff --git a/build_onnx/build_onnx.vcxproj b/build_onnx/build_onnx.vcxproj index f50b0984..dcd62b07 100644 --- a/build_onnx/build_onnx.vcxproj +++ b/build_onnx/build_onnx.vcxproj @@ -170,7 +170,7 @@ Level3 true - _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true @@ -185,7 +185,7 @@ true true true - NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true @@ -202,7 +202,7 @@ true true true - NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true diff --git a/cppshogi/bitboard.cpp b/cppshogi/bitboard.cpp index c9ab5dc6..7dbdeac1 100644 --- a/cppshogi/bitboard.cpp +++ b/cppshogi/bitboard.cpp @@ -106,133 +106,6 @@ const Bitboard SetMaskBB[SquareNum] = { Bitboard( 0, UINT64_C(1) << 17) // 80, SQ99 }; -// 各マスのrookが利きを調べる必要があるマスの数 -const int RookBlockBits[SquareNum] = { - 14, 13, 13, 13, 13, 13, 13, 13, 14, - 13, 12, 12, 12, 12, 12, 12, 12, 13, - 13, 12, 12, 12, 12, 12, 12, 12, 13, - 13, 12, 12, 12, 12, 12, 12, 12, 13, - 13, 12, 12, 12, 12, 12, 12, 12, 13, - 13, 12, 12, 12, 12, 12, 12, 12, 13, - 13, 12, 12, 12, 12, 12, 12, 12, 13, - 13, 12, 12, 12, 12, 12, 12, 12, 13, - 14, 13, 13, 13, 13, 13, 13, 13, 14 -}; - -// 各マスのbishopが利きを調べる必要があるマスの数 -const int BishopBlockBits[SquareNum] = { - 7, 6, 6, 6, 6, 6, 6, 6, 7, - 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 8, 8, 8, 8, 8, 6, 6, - 6, 6, 8, 10, 10, 10, 8, 6, 6, - 6, 6, 8, 10, 12, 10, 8, 6, 6, - 6, 6, 8, 10, 10, 10, 8, 6, 6, - 6, 6, 8, 8, 8, 8, 8, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 6, 6, 6, 6, 6, 7 -}; - -// Magic Bitboard で利きを求める際のシフト量 -// RookShiftBits[17], RookShiftBits[53] はマジックナンバーが見つからなかったため、 -// シフト量を 1 つ減らす。(テーブルサイズを 2 倍にする。) -// この方法は issei_y さんに相談したところ、教えて頂いた方法。 -// PEXT Bitboardを使用する際はシフト量を減らす必要が無い。 -const int RookShiftBits[SquareNum] = { - 50, 51, 51, 51, 51, 51, 51, 51, 50, -#if defined HAVE_BMI2 - 51, 52, 52, 52, 52, 52, 52, 52, 51, -#else - 51, 52, 52, 52, 52, 52, 52, 52, 50, // [17]: 51 -> 50 -#endif - 51, 52, 52, 52, 52, 52, 52, 52, 51, - 51, 52, 52, 52, 52, 52, 52, 52, 51, - 51, 52, 52, 52, 52, 52, 52, 52, 51, -#if defined HAVE_BMI2 - 51, 52, 52, 52, 52, 52, 52, 52, 51, -#else - 51, 52, 52, 52, 52, 52, 52, 52, 50, // [53]: 51 -> 50 -#endif - 51, 52, 52, 52, 52, 52, 52, 52, 51, - 51, 52, 52, 52, 52, 52, 52, 52, 51, - 50, 51, 51, 51, 51, 51, 51, 51, 50 -}; - -// Magic Bitboard で利きを求める際のシフト量 -const int BishopShiftBits[SquareNum] = { - 57, 58, 58, 58, 58, 58, 58, 58, 57, - 58, 58, 58, 58, 58, 58, 58, 58, 58, - 58, 58, 56, 56, 56, 56, 56, 58, 58, - 58, 58, 56, 54, 54, 54, 56, 58, 58, - 58, 58, 56, 54, 52, 54, 56, 58, 58, - 58, 58, 56, 54, 54, 54, 56, 58, 58, - 58, 58, 56, 56, 56, 56, 56, 58, 58, - 58, 58, 58, 58, 58, 58, 58, 58, 58, - 57, 58, 58, 58, 58, 58, 58, 58, 57 -}; - -#if defined HAVE_BMI2 -#else -const u64 RookMagic[SquareNum] = { - UINT64_C(0x140000400809300), UINT64_C(0x1320000902000240), UINT64_C(0x8001910c008180), - UINT64_C(0x40020004401040), UINT64_C(0x40010000d01120), UINT64_C(0x80048020084050), - UINT64_C(0x40004000080228), UINT64_C(0x400440000a2a0a), UINT64_C(0x40003101010102), - UINT64_C(0x80c4200012108100), UINT64_C(0x4010c00204000c01), UINT64_C(0x220400103250002), - UINT64_C(0x2600200004001), UINT64_C(0x40200052400020), UINT64_C(0xc00100020020008), - UINT64_C(0x9080201000200004), UINT64_C(0x2200201000080004), UINT64_C(0x80804c0020200191), - UINT64_C(0x45383000009100), UINT64_C(0x30002800020040), UINT64_C(0x40104000988084), - UINT64_C(0x108001000800415), UINT64_C(0x14005000400009), UINT64_C(0xd21001001c00045), - UINT64_C(0xc0003000200024), UINT64_C(0x40003000280004), UINT64_C(0x40021000091102), - UINT64_C(0x2008a20408000d00), UINT64_C(0x2000100084010040), UINT64_C(0x144080008008001), - UINT64_C(0x50102400100026a2), UINT64_C(0x1040020008001010), UINT64_C(0x1200200028005010), - UINT64_C(0x4280030030020898), UINT64_C(0x480081410011004), UINT64_C(0x34000040800110a), - UINT64_C(0x101000010c0021), UINT64_C(0x9210800080082), UINT64_C(0x6100002000400a7), - UINT64_C(0xa2240800900800c0), UINT64_C(0x9220082001000801), UINT64_C(0x1040008001140030), - UINT64_C(0x40002220040008), UINT64_C(0x28000124008010c), UINT64_C(0x40008404940002), - UINT64_C(0x40040800010200), UINT64_C(0x90000809002100), UINT64_C(0x2800080001000201), - UINT64_C(0x1400020001000201), UINT64_C(0x180081014018004), UINT64_C(0x1100008000400201), - UINT64_C(0x80004000200201), UINT64_C(0x420800010000201), UINT64_C(0x2841c00080200209), - UINT64_C(0x120002401040001), UINT64_C(0x14510000101000b), UINT64_C(0x40080000808001), - UINT64_C(0x834000188048001), UINT64_C(0x4001210000800205), UINT64_C(0x4889a8007400201), - UINT64_C(0x2080044080200062), UINT64_C(0x80004002861002), UINT64_C(0xc00842049024), - UINT64_C(0x8040000202020011), UINT64_C(0x400404002c0100), UINT64_C(0x2080028202000102), - UINT64_C(0x8100040800590224), UINT64_C(0x2040009004800010), UINT64_C(0x40045000400408), - UINT64_C(0x2200240020802008), UINT64_C(0x4080042002200204), UINT64_C(0x4000b0000a00a2), - UINT64_C(0xa600000810100), UINT64_C(0x1410000d001180), UINT64_C(0x2200101001080), - UINT64_C(0x100020014104e120), UINT64_C(0x2407200100004810), UINT64_C(0x80144000a0845050), - UINT64_C(0x1000200060030c18), UINT64_C(0x4004200020010102), UINT64_C(0x140600021010302) -}; - -const u64 BishopMagic[SquareNum] = { - UINT64_C(0x20101042c8200428), UINT64_C(0x840240380102), UINT64_C(0x800800c018108251), - UINT64_C(0x82428010301000), UINT64_C(0x481008201000040), UINT64_C(0x8081020420880800), - UINT64_C(0x804222110000), UINT64_C(0xe28301400850), UINT64_C(0x2010221420800810), - UINT64_C(0x2600010028801824), UINT64_C(0x8048102102002), UINT64_C(0x4000248100240402), - UINT64_C(0x49200200428a2108), UINT64_C(0x460904020844), UINT64_C(0x2001401020830200), - UINT64_C(0x1009008120), UINT64_C(0x4804064008208004), UINT64_C(0x4406000240300ca0), - UINT64_C(0x222001400803220), UINT64_C(0x226068400182094), UINT64_C(0x95208402010d0104), - UINT64_C(0x4000807500108102), UINT64_C(0xc000200080500500), UINT64_C(0x5211000304038020), - UINT64_C(0x1108100180400820), UINT64_C(0x10001280a8a21040), UINT64_C(0x100004809408a210), - UINT64_C(0x202300002041112), UINT64_C(0x4040a8000460408), UINT64_C(0x204020021040201), - UINT64_C(0x8120013180404), UINT64_C(0xa28400800d020104), UINT64_C(0x200c201000604080), - UINT64_C(0x1082004000109408), UINT64_C(0x100021c00c410408), UINT64_C(0x880820905004c801), - UINT64_C(0x1054064080004120), UINT64_C(0x30c0a0224001030), UINT64_C(0x300060100040821), - UINT64_C(0x51200801020c006), UINT64_C(0x2100040042802801), UINT64_C(0x481000820401002), - UINT64_C(0x40408a0450000801), UINT64_C(0x810104200000a2), UINT64_C(0x281102102108408), - UINT64_C(0x804020040280021), UINT64_C(0x2420401200220040), UINT64_C(0x80010144080c402), - UINT64_C(0x80104400800002), UINT64_C(0x1009048080400081), UINT64_C(0x100082000201008c), - UINT64_C(0x10001008080009), UINT64_C(0x2a5006b80080004), UINT64_C(0xc6288018200c2884), - UINT64_C(0x108100104200a000), UINT64_C(0x141002030814048), UINT64_C(0x200204080010808), - UINT64_C(0x200004013922002), UINT64_C(0x2200000020050815), UINT64_C(0x2011010400040800), - UINT64_C(0x1020040004220200), UINT64_C(0x944020104840081), UINT64_C(0x6080a080801c044a), - UINT64_C(0x2088400811008020), UINT64_C(0xc40aa04208070), UINT64_C(0x4100800440900220), - UINT64_C(0x48112050), UINT64_C(0x818200d062012a10), UINT64_C(0x402008404508302), - UINT64_C(0x100020101002), UINT64_C(0x20040420504912), UINT64_C(0x2004008118814), - UINT64_C(0x1000810650084024), UINT64_C(0x1002a03002408804), UINT64_C(0x2104294801181420), - UINT64_C(0x841080240500812), UINT64_C(0x4406009000004884), UINT64_C(0x80082004012412), - UINT64_C(0x80090880808183), UINT64_C(0x300120020400410), UINT64_C(0x21a090100822002) -}; -#endif - const Bitboard FileMask[FileNum] = { File1Mask, File2Mask, File3Mask, File4Mask, File5Mask, File6Mask, File7Mask, File8Mask, File9Mask }; @@ -248,17 +121,9 @@ const Bitboard InFrontMask[ColorNum][RankNum] = { // これらは一度値を設定したら二度と変更しない。 // 本当は const 化したい。 -#if defined HAVE_BMI2 -Bitboard RookAttack[495616]; -#else -Bitboard RookAttack[512000]; -#endif -int RookAttackIndex[SquareNum]; -Bitboard RookBlockMask[SquareNum]; -Bitboard BishopAttack[20224]; -int BishopAttackIndex[SquareNum]; -Bitboard BishopBlockMask[SquareNum]; Bitboard LanceAttack[ColorNum][SquareNum][128]; +Bitboard RookAttackRankToMask[SquareNum][2]; +Bitboard256 BishopAttackToMask[SquareNum][2]; Bitboard KingAttack[SquareNum]; Bitboard GoldAttack[ColorNum][SquareNum]; diff --git a/cppshogi/bitboard.hpp b/cppshogi/bitboard.hpp index 81690a66..a7a39e73 100644 --- a/cppshogi/bitboard.hpp +++ b/cppshogi/bitboard.hpp @@ -212,6 +212,55 @@ class Bitboard { return !(this->p(1) & (this->p(1) - 1)); #endif } + // byte単位で入れ替えたBitboardを返す。 + // 飛車の利きの右方向と角の利きの右上、右下方向を求める時に使う。 + Bitboard byteReverse() const { +#if defined (HAVE_SSE4) + const __m128i shuffle = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + Bitboard b0; + b0.m_ = _mm_shuffle_epi8(m_, shuffle); + return b0; +#else + Bitboard b0; + b0.p_[0] = bswap64(p_[1]); + b0.p_[1] = bswap64(p_[0]); + return b0; +#endif + } + // SSE2のunpackを実行して返す。 + static void unpack(const Bitboard hiIn, const Bitboard loIn, Bitboard& hiOut, Bitboard& loOut) { +#if defined (HAVE_SSE2) || defined (HAVE_SSE4) + hiOut.m_ = _mm_unpackhi_epi64(loIn.m_, hiIn.m_); + loOut.m_ = _mm_unpacklo_epi64(loIn.m_, hiIn.m_); +#else + hiOut.p_[0] = loIn.p_[1]; + hiOut.p_[1] = hiIn.p_[1]; + + loOut.p_[0] = loIn.p_[0]; + loOut.p_[1] = hiIn.p_[0]; +#endif + } + // 2組のBitboardを、それぞれ64bitのhi×2とlo×2と見たときに(unpackするとそうなる) + // 128bit整数とみなして1引き算したBitboardを返す。 + static void decrement(const Bitboard hiIn, const Bitboard loIn, Bitboard& hiOut, Bitboard& loOut) + { +#if defined (HAVE_SSE42) + // loが0の時だけ1減算するときにhiからの桁借りが生じるので、 + // hi += (lo == 0) ? -1 : 0; + // みたいな処理で良い。 + hiOut.m_ = _mm_add_epi64(hiIn.m_, _mm_cmpeq_epi64(loIn.m_, _mm_setzero_si128())); + + // 1減算する + loOut.m_ = _mm_add_epi64(loIn.m_, _mm_set1_epi64x(-1LL)); +#else + // bool型はtrueだと(暗黙の型変換で)1だとみなされる。 + hiOut.p_[0] = hiIn.p_[0] - (loIn.p_[0] == 0); + hiOut.p_[1] = hiIn.p_[1] - (loIn.p_[1] == 0); + + loOut.p_[0] = loIn.p_[0] - 1; + loOut.p_[1] = loIn.p_[1] - 1; +#endif + } // for debug void printBoard() const { @@ -239,6 +288,155 @@ class Bitboard { u64 p_[2]; // p_[0] : 先手から見て、1一から7九までを縦に並べたbit. 63bit使用. right と呼ぶ。 // p_[1] : 先手から見て、8一から1九までを縦に並べたbit. 18bit使用. left と呼ぶ。 #endif + + friend class Bitboard256; +}; + +// Bitboard 2つを256bit registerで扱う。 +// Qugiyの角の利きに使用する。 +// cf. https://www.apply.computer-shogi.org/wcsc31/appeal/Qugiy/appeal_210518.pdf +// やねうら王の実装を参考にした +class Bitboard256 { +public: + Bitboard256() {} +#if defined (HAVE_AVX2) + Bitboard256(const Bitboard256& bb) { _mm256_store_si256(&this->m_, bb.m_); } + + // 同じBitboardを2つに複製し、それをBitboard256とする。 + Bitboard256(const Bitboard& b1) { m_ = _mm256_broadcastsi128_si256(b1.m_); } + + // 2つのBitboardを合わせたBitboard256を作る。 + Bitboard256(const Bitboard& b1, const Bitboard& b2) { + // m = _mm256_set_epi64x(b2.p[1],b2.p[0],b1.p[1],b1.p[0]); + m_ = _mm256_castsi128_si256(b1.m_); // 256bitにcast(上位は0)。これはcompiler向けの命令。 + m_ = _mm256_inserti128_si256(m_, b2.m_, 1); // 上位128bitにb2.mを代入 + } +#else + Bitboard256(const Bitboard& b1, const Bitboard& b2) { p_[0] = b1.p_[0]; p_[1] = b1.p_[1]; p_[2] = b2.p_[0]; p_[3] = b2.p_[1]; } + Bitboard256(const Bitboard& b1) { p_[0] = p_[2] = b1.p_[0]; p_[1] = p_[3] = b1.p_[1]; } +#endif + Bitboard256 operator &= (const Bitboard256& rhs) { +#if defined (HAVE_AVX2) + _mm256_store_si256(&this->m_, _mm256_and_si256(this->m_, rhs.m_)); +#else + this->p_[0] &= rhs.p_[0]; + this->p_[1] &= rhs.p_[1]; + this->p_[2] &= rhs.p_[2]; + this->p_[3] &= rhs.p_[3]; +#endif + return *this; + } + Bitboard256 operator |= (const Bitboard256& rhs) { +#if defined (HAVE_AVX2) + _mm256_store_si256(&this->m_, _mm256_or_si256(this->m_, rhs.m_)); +#else + this->p_[0] |= rhs.p_[0]; + this->p_[1] |= rhs.p_[1]; + this->p_[2] |= rhs.p_[2]; + this->p_[3] |= rhs.p_[3]; +#endif + return *this; + } + Bitboard256 operator ^= (const Bitboard256& rhs) { +#if defined (HAVE_AVX2) + _mm256_store_si256(&this->m_, _mm256_xor_si256(this->m_, rhs.m_)); +#else + this->p_[0] ^= rhs.p_[0]; + this->p_[1] ^= rhs.p_[1]; + this->p_[2] ^= rhs.p_[2]; + this->p_[3] ^= rhs.p_[3]; +#endif + return *this; + } + Bitboard256 operator & (const Bitboard256& rhs) const { return Bitboard256(*this) &= rhs; } + Bitboard256 operator | (const Bitboard256& rhs) const { return Bitboard256(*this) |= rhs; } + Bitboard256 operator ^ (const Bitboard256& rhs) const { return Bitboard256(*this) ^= rhs; } + // byte単位で入れ替えたBitboardを返す。 + // 角の利きの右上、右下方向を求める時に使う。 + Bitboard256 byteReverse() const { +#if defined (HAVE_AVX2) + const __m256i shuffle = _mm256_set_epi8 + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + Bitboard256 b0; + b0.m_ = _mm256_shuffle_epi8(m_, shuffle); + return b0; +#else + Bitboard256 b0; + b0.p_[0] = bswap64(p_[3]); + b0.p_[1] = bswap64(p_[2]); + b0.p_[2] = bswap64(p_[1]); + b0.p_[3] = bswap64(p_[0]); + return b0; +#endif + } + // 保持している2つの盤面を重ね合わせた(OR)Bitboardを返す。 + Bitboard merge() const + { +#if defined (HAVE_AVX2) + Bitboard b; + b.m_ = _mm_or_si128(_mm256_castsi256_si128(m_), _mm256_extracti128_si256(m_, 1)); + return b; +#else + Bitboard b; + b.p_[0] = p_[0] | p_[2]; + b.p_[1] = p_[1] | p_[3]; + return b; +#endif + } + // SSE2のunpackを実行して返す。 + static void unpack(const Bitboard256 hiIn, const Bitboard256 loIn, Bitboard256& hiOut, Bitboard256& loOut) { +#if defined (HAVE_AVX2) + hiOut.m_ = _mm256_unpackhi_epi64(loIn.m_, hiIn.m_); + loOut.m_ = _mm256_unpacklo_epi64(loIn.m_, hiIn.m_); +#else + hiOut.p_[0] = loIn.p_[1]; + hiOut.p_[1] = hiIn.p_[1]; + hiOut.p_[2] = loIn.p_[3]; + hiOut.p_[3] = hiIn.p_[3]; + + loOut.p_[0] = loIn.p_[0]; + loOut.p_[1] = hiIn.p_[0]; + loOut.p_[2] = loIn.p_[2]; + loOut.p_[3] = hiIn.p_[2]; +#endif + } + // 2組のBitboard256を、それぞれ64bitのhi×2とlo×2と見たときに(unpackするとそうなる) + // 128bit整数とみなして1引き算したBitboardを返す。 + static void decrement(const Bitboard256 hiIn, const Bitboard256 loIn, Bitboard256& hiOut, Bitboard256& loOut) + { +#if defined (HAVE_AVX2) + + // loが0の時だけ1減算するときにhiからの桁借りが生じるので、 + // hi += (lo == 0) ? -1 : 0; + // みたいな処理で良い。 + hiOut.m_ = _mm256_add_epi64(hiIn.m_, _mm256_cmpeq_epi64(loIn.m_, _mm256_setzero_si256())); + + // 1減算する + loOut.m_ = _mm256_add_epi64(loIn.m_, _mm256_set1_epi64x(-1LL)); +#else + // bool型はtrueだと(暗黙の型変換で)1だとみなされる。 + hiOut.p_[0] = hiIn.p_[0] - (loIn.p_[0] == 0); + hiOut.p_[1] = hiIn.p_[1] - (loIn.p_[1] == 0); + hiOut.p_[2] = hiIn.p_[2] - (loIn.p_[2] == 0); + hiOut.p_[3] = hiIn.p_[3] - (loIn.p_[3] == 0); + + loOut.p_[0] = loIn.p_[0] - 1; + loOut.p_[1] = loIn.p_[1] - 1; + loOut.p_[2] = loIn.p_[2] - 1; + loOut.p_[3] = loIn.p_[3] - 1; +#endif + } + +private: +#if defined (HAVE_SSE2) || defined (HAVE_SSE4) + union { + u64 p_[4]; + __m256i m_; + }; +#else + u64 p_[4]; +#endif }; inline Bitboard setMaskBB(const Square sq) { return SetMaskBB[sq]; } @@ -247,16 +445,6 @@ inline Bitboard setMaskBB(const Square sq) { return SetMaskBB[sq]; } inline Bitboard allOneBB() { return Bitboard(UINT64_C(0x7fffffffffffffff), UINT64_C(0x000000000003ffff)); } inline Bitboard allZeroBB() { return Bitboard(0, 0); } -extern const int RookBlockBits[SquareNum]; -extern const int BishopBlockBits[SquareNum]; -extern const int RookShiftBits[SquareNum]; -extern const int BishopShiftBits[SquareNum]; -#if defined HAVE_BMI2 -#else -extern const u64 RookMagic[SquareNum]; -extern const u64 BishopMagic[SquareNum]; -#endif - // 指定した位置の属する file の bit を shift し、 // index を求める為に使用する。 const int Slide[SquareNum] = { @@ -379,20 +567,10 @@ template inline Bitboard inFrontMask() { : /*R == Rank9 ?*/ InFrontOfRank9White)); } -// メモリ節約の為、1次元配列にして無駄が無いようにしている。 -#if defined HAVE_BMI2 -extern Bitboard RookAttack[495616]; -#else -extern Bitboard RookAttack[512000]; -#endif -extern int RookAttackIndex[SquareNum]; -// メモリ節約の為、1次元配列にして無駄が無いようにしている。 -extern Bitboard BishopAttack[20224]; -extern int BishopAttackIndex[SquareNum]; -extern Bitboard RookBlockMask[SquareNum]; -extern Bitboard BishopBlockMask[SquareNum]; // メモリ節約をせず、無駄なメモリを持っている。 extern Bitboard LanceAttack[ColorNum][SquareNum][128]; +extern Bitboard RookAttackRankToMask[SquareNum][2]; +extern Bitboard256 BishopAttackToMask[SquareNum][2]; extern Bitboard KingAttack[SquareNum]; extern Bitboard GoldAttack[ColorNum][SquareNum]; @@ -416,36 +594,6 @@ extern Bitboard HorseCheckTable[ColorNum][SquareNum]; extern Bitboard Neighbor5x5Table[SquareNum]; // 25 近傍 -#if defined HAVE_BMI2 -// PEXT bitboard. -inline u64 occupiedToIndex(const Bitboard& block, const Bitboard& mask) { - return _pext_u64(block.merge(), mask.merge()); -} - -inline Bitboard rookAttack(const Square sq, const Bitboard& occupied) { - const Bitboard block(occupied & RookBlockMask[sq]); - return RookAttack[RookAttackIndex[sq] + occupiedToIndex(block, RookBlockMask[sq])]; -} -inline Bitboard bishopAttack(const Square sq, const Bitboard& occupied) { - const Bitboard block(occupied & BishopBlockMask[sq]); - return BishopAttack[BishopAttackIndex[sq] + occupiedToIndex(block, BishopBlockMask[sq])]; -} -#else -// magic bitboard. -// magic number を使って block の模様から利きのテーブルへのインデックスを算出 -inline u64 occupiedToIndex(const Bitboard& block, const u64 magic, const int shiftBits) { - return (block.merge() * magic) >> shiftBits; -} - -inline Bitboard rookAttack(const Square sq, const Bitboard& occupied) { - const Bitboard block(occupied & RookBlockMask[sq]); - return RookAttack[RookAttackIndex[sq] + occupiedToIndex(block, RookMagic[sq], RookShiftBits[sq])]; -} -inline Bitboard bishopAttack(const Square sq, const Bitboard& occupied) { - const Bitboard block(occupied & BishopBlockMask[sq]); - return BishopAttack[BishopAttackIndex[sq] + occupiedToIndex(block, BishopMagic[sq], BishopShiftBits[sq])]; -} -#endif // todo: 香車の筋がどこにあるか先に分かっていれば、Bitboard の片方の変数だけを調べれば良くなる。 inline Bitboard lanceAttack(const Color c, const Square sq, const Bitboard& occupied) { const int part = Bitboard::part(sq); @@ -458,6 +606,73 @@ inline Bitboard rookAttackFile(const Square sq, const Bitboard& occupied) { const int index = (occupied.p(part) >> Slide[sq]) & 127; return LanceAttack[Black][sq][index] | LanceAttack[White][sq][index]; } +// 飛車の横だけの利き +// cf. https://www.apply.computer-shogi.org/wcsc31/appeal/Qugiy/appeal_210518.pdf +inline Bitboard rookAttackRank(const Square sq, const Bitboard& occupied) { + Bitboard hi, lo, t1, t0; + + const Bitboard mask_lo = RookAttackRankToMask[sq][0]; + const Bitboard mask_hi = RookAttackRankToMask[sq][1]; + + // occupiedを逆順にする + Bitboard rocc = occupied.byteReverse(); + + // roccとoccを2枚並べて、その上位u64をhi、下位u64をloに集める。 + // occ側は(先手から見て)左方向への利き、roccは右方向への利き。 + Bitboard::unpack(rocc, occupied, hi, lo); + + // 飛車の横方向の利きでmask + hi &= mask_hi; + lo &= mask_lo; + + // 1減算することにより、利きが通るマスまでが変化する。 + Bitboard::decrement(hi, lo, t1, t0); + + // 減算して変化したマスを抽出してmask + t1 = (t1 ^ hi) & mask_hi; + t0 = (t0 ^ lo) & mask_lo; + + // unpackしていたものを元の状態に戻す(unpackの逆変換はunpack) + Bitboard::unpack(t1, t0, hi, lo); + + // byte_reverseして元の状態に戻して、重ね合わせる。 + // hiの方には、右方向の利き、loは左方向の利きが得られている。 + return hi.byteReverse() | lo; +} +inline Bitboard rookAttack(const Square sq, const Bitboard& occupied) { + return rookAttackRank(sq, occupied) | rookAttackFile(sq, occupied); +} +// 角の利き +// cf. https://www.apply.computer-shogi.org/wcsc31/appeal/Qugiy/appeal_210518.pdf +inline Bitboard bishopAttack(const Square sq, const Bitboard& occupied) { + const Bitboard256 mask_lo = BishopAttackToMask[sq][0]; + const Bitboard256 mask_hi = BishopAttackToMask[sq][1]; + + // occupiedを2枚並べたBitboard256を用意する。 + const Bitboard256 occ2(occupied); + + // occupiedを(byte単位で)左右反転させたBitboardを2枚並べたBitboard256を用意する。 + const Bitboard256 rocc2(occupied.byteReverse()); + + Bitboard256 hi, lo, t1, t0; + Bitboard256::unpack(rocc2, occ2, hi, lo); + + hi &= mask_hi; + lo &= mask_lo; + + Bitboard256::decrement(hi, lo, t1, t0); + + // xorで変化した升を抽出して、step effectでmaskすれば完成 + t1 = (t1 ^ hi) & mask_hi; + t0 = (t0 ^ lo) & mask_lo; + + // unpackしていたものを元の状態に戻す(unpackの逆変換はunpack) + Bitboard256::unpack(t1, t0, hi, lo); + + // byte_reverseして元の状態に戻して、重ね合わせる。 + // hiの方には、右方向の利き、loは左方向の利きが得られている。 + return (hi.byteReverse() | lo).merge(); +} inline Bitboard goldAttack(const Color c, const Square sq) { return GoldAttack[c][sq]; } inline Bitboard silverAttack(const Color c, const Square sq) { return SilverAttack[c][sq]; } inline Bitboard knightAttack(const Color c, const Square sq) { return KnightAttack[c][sq]; } diff --git a/cppshogi/common.hpp b/cppshogi/common.hpp index 5b2b51d4..214d0158 100644 --- a/cppshogi/common.hpp +++ b/cppshogi/common.hpp @@ -145,6 +145,9 @@ FORCE_INLINE int firstOneFromMSB(const u64 b) { FORCE_INLINE int msb(const u64 b) { return 63 - firstOneFromMSB(b); } +FORCE_INLINE u64 bswap64(const u64 b) { + return _byteswap_uint64(b); +} #elif defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) FORCE_INLINE int firstOneFromLSB(const u64 b) { return __builtin_ctzll(b); @@ -158,6 +161,9 @@ FORCE_INLINE int firstOneFromMSB(const u64 b) { FORCE_INLINE int msb(const u64 b) { return 63 - __builtin_clzll(b); } +FORCE_INLINE u64 bswap64(const u64 b) { + return __builtin_bswap64(u); +} #else // firstOneFromLSB() で使用する table const int BitTable[64] = { diff --git a/cppshogi/init.cpp b/cppshogi/init.cpp index f1f30f3f..31e0f777 100644 --- a/cppshogi/init.cpp +++ b/cppshogi/init.cpp @@ -26,33 +26,6 @@ #include "search.hpp" namespace { - // square のマスにおける、障害物を調べる必要がある場所を調べて Bitboard で返す。 - Bitboard rookBlockMaskCalc(const Square square) { - Bitboard result = squareFileMask(square) ^ squareRankMask(square); - if (makeFile(square) != File9) result &= ~fileMask(); - if (makeFile(square) != File1) result &= ~fileMask(); - if (makeRank(square) != Rank9) result &= ~rankMask(); - if (makeRank(square) != Rank1) result &= ~rankMask(); - return result; - } - - // square のマスにおける、障害物を調べる必要がある場所を調べて Bitboard で返す。 - Bitboard bishopBlockMaskCalc(const Square square) { - const Rank rank = makeRank(square); - const File file = makeFile(square); - Bitboard result = allZeroBB(); - for (Square sq = SQ11; sq < SquareNum; ++sq) { - const Rank r = makeRank(sq); - const File f = makeFile(sq); - if (abs(rank - r) == abs(file - f)) - result.setBit(sq); - } - result &= ~(rankMask() | rankMask() | fileMask() | fileMask()); - result.clearBit(square); - - return result; - } - // square のマスにおける、障害物を調べる必要がある場所を Bitboard で返す。 // lance の前方だけを調べれば良さそうだけど、Rank2 ~ Rank8 の状態をそのまま index に使いたいので、 // 縦方向全て(端を除く)の occupied を全て調べる。 @@ -60,30 +33,29 @@ namespace { return squareFileMask(square) & ~(rankMask() | rankMask()); } - // Rook or Bishop の利きの範囲を調べて bitboard で返す。 - // occupied 障害物があるマスが 1 の bitboard - Bitboard attackCalc(const Square square, const Bitboard& occupied, const bool isBishop) { - const SquareDelta deltaArray[2][4] = {{DeltaN, DeltaS, DeltaE, DeltaW}, {DeltaNE, DeltaSE, DeltaSW, DeltaNW}}; - Bitboard result = allZeroBB(); - for (SquareDelta delta : deltaArray[isBishop]) { - for (Square sq = square + delta; - isInSquare(sq) && abs(makeRank(sq - delta) - makeRank(sq)) <= 1; - sq += delta) - { - result.setBit(sq); - if (occupied.isSet(sq)) - break; - } - } - - return result; - } - // lance の利きを返す。 - // 香車の利きは常にこれを使っても良いけど、もう少し速くする為に、テーブル化する為だけに使う。 // occupied 障害物があるマスが 1 の bitboard Bitboard lanceAttackCalc(const Color c, const Square square, const Bitboard& occupied) { - return rookAttack(square, occupied) & inFrontMask(c, makeRank(square)); + File file = makeFile(square); + Bitboard bb{ 0, 0 }; + // 上方向 + for (Rank rank = makeRank(square); rank > Rank1;) { + rank += DeltaN; + const Square sq = makeSquare(file, rank); + bb |= setMaskBB(sq); + if (occupied.isSet(sq)) + break; + } + // 下方向 + for (Rank rank = makeRank(square); rank < Rank9;) { + rank += DeltaS; + const Square sq = makeSquare(file, rank); + bb |= setMaskBB(sq); + if (occupied.isSet(sq)) + break; + } + + return bb & inFrontMask(c, makeRank(square)); } // index, bits の情報を元にして、occupied の 1 のbit を いくつか 0 にする。 @@ -103,34 +75,6 @@ namespace { return result; } - void initAttacks(const bool isBishop) - { - auto* attacks = (isBishop ? BishopAttack : RookAttack ); - auto* attackIndex = (isBishop ? BishopAttackIndex : RookAttackIndex); - auto* blockMask = (isBishop ? BishopBlockMask : RookBlockMask ); - auto* shift = (isBishop ? BishopShiftBits : RookShiftBits ); -#if defined HAVE_BMI2 -#else - auto* magic = (isBishop ? BishopMagic : RookMagic ); -#endif - int index = 0; - for (Square sq = SQ11; sq < SquareNum; ++sq) { - blockMask[sq] = (isBishop ? bishopBlockMaskCalc(sq) : rookBlockMaskCalc(sq)); - attackIndex[sq] = index; - - const int num1s = (isBishop ? BishopBlockBits[sq] : RookBlockBits[sq]); - for (int i = 0; i < (1 << num1s); ++i) { - const Bitboard occupied = indexToOccupied(i, num1s, blockMask[sq]); -#if defined HAVE_BMI2 - attacks[index + occupiedToIndex(occupied & blockMask[sq], blockMask[sq])] = attackCalc(sq, occupied, isBishop); -#else - attacks[index + occupiedToIndex(occupied, magic[sq], shift[sq])] = attackCalc(sq, occupied, isBishop); -#endif - } - index += 1 << (64 - shift[sq]); - } - } - // LanceBlockMask, LanceAttack の値を設定する。 void initLanceAttacks() { for (Color c = Black; c < ColorNum; ++c) { @@ -147,6 +91,80 @@ namespace { } } + void initRookAttacks() { + for (File file = File1; file < FileNum; ++file) { + for (Rank rank = Rank1; rank < RankNum; ++rank) { + Bitboard left{ 0, 0 }, right{ 0, 0 }; + + // SQのマスから左方向 + for (File file2 = (File)(file + 1); file2 < FileNum; ++file2) + left |= setMaskBB(makeSquare(file2, rank)); + + // SQのマスから右方向 + for (File file2 = (File)(file - 1); file2 >= File1; --file2) + right |= setMaskBB(makeSquare(file2, rank)); + + Bitboard rightRev = right.byteReverse(); + + Bitboard hi, lo; + Bitboard::unpack(rightRev, left, hi, lo); + + RookAttackRankToMask[makeSquare(file, rank)][0] = lo; + RookAttackRankToMask[makeSquare(file, rank)][1] = hi; + } + } + } + + void initBishopAttacks() { + // 4方向 + constexpr SquareDelta bishopDelta[4] = { + DeltaNW, // 左上 + DeltaSW, // 左下 + DeltaNE, // 右上 + DeltaSE, // 右下 + }; + for (File file = File1; file < FileNum; ++file) { + for (Rank rank = Rank1; rank < RankNum; ++rank) { + // 対象升から + const Square sq = makeSquare(file, rank); + + // 角の左上、左下、右上、右下それぞれへのstep effect + Bitboard bishopToBB[4]; + + // 4方向の利きをループで求める + for (int i = 0; i < 4; ++i) + { + Bitboard bb{ 0, 0 }; + + const auto delta = bishopDelta[i]; + // 壁に突き当たるまで進む + Square sq2 = sq; + while (true) { + if ((delta == DeltaNW || delta == DeltaNE) && makeRank(sq2) == Rank1) break; + if ((delta == DeltaSW || delta == DeltaSE) && makeRank(sq2) == Rank9) break; + if ((delta == DeltaNW || delta == DeltaSW) && makeFile(sq2) == File9) break; + if ((delta == DeltaNE || delta == DeltaSE) && makeFile(sq2) == File1) break; + sq2 += delta; + bb |= setMaskBB(sq2); + } + + bishopToBB[i] = bb; + } + + // 右上、右下はbyte reverseしておかないとうまく求められない。(先手の香の利きがうまく求められないのと同様) + + bishopToBB[2] = bishopToBB[2].byteReverse(); + bishopToBB[3] = bishopToBB[3].byteReverse(); + + for (int i = 0; i < 2; ++i) + BishopAttackToMask[sq][i] = Bitboard256( + Bitboard(bishopToBB[0].p(i), bishopToBB[2].p(i)), + Bitboard(bishopToBB[1].p(i), bishopToBB[3].p(i)) + ); + } + } + } + void initKingAttacks() { for (Square sq = SQ11; sq < SquareNum; ++sq) KingAttack[sq] = rookAttack(sq, allOneBB()) | bishopAttack(sq, allOneBB()); @@ -415,14 +433,14 @@ namespace { } void initTable() { - initAttacks(false); - initAttacks(true); + initLanceAttacks(); + initRookAttacks(); + initBishopAttacks(); initKingAttacks(); initGoldAttacks(); initSilverAttacks(); initPawnAttacks(); initKnightAttacks(); - initLanceAttacks(); initSquareRelation(); initAttackToEdge(); initBetweenBB(); @@ -432,46 +450,3 @@ void initTable() { Book::init(); } - -#if defined FIND_MAGIC -// square の位置の rook, bishop それぞれのMagic Bitboard に使用するマジックナンバーを見つける。 -// isBishop : true なら bishop, false なら rook のマジックナンバーを見つける。 -u64 findMagic(const Square square, const bool isBishop) { - Bitboard occupied[1<<14]; - Bitboard attack[1<<14]; - Bitboard attackUsed[1<<14]; - Bitboard mask = (isBishop ? bishopBlockMaskCalc(square) : rookBlockMaskCalc(square)); - int num1s = (isBishop ? BishopBlockBits[square] : RookBlockBits[square]); - - // n bit の全ての数字 (利きのあるマスの全ての 0 or 1 の組み合わせ) - for (int i = 0; i < (1 << num1s); ++i) { - occupied[i] = indexToOccupied(i, num1s, mask); - attack[i] = attackCalc(square, occupied[i], isBishop); - } - - for (u64 k = 0; k < UINT64_C(100000000); ++k) { - const u64 magic = g_mt64bit.randomFewBits(); - bool fail = false; - - // これは無くても良いけど、少しマジックナンバーが見つかるのが早くなるはず。 - if (count1s((mask.merge() * magic) & UINT64_C(0xfff0000000000000)) < 6) - continue; - - std::fill(std::begin(attackUsed), std::end(attackUsed), allZeroBB()); - - for (int i = 0; !fail && i < (1 << num1s); ++i) { - const int shiftBits = (isBishop ? BishopShiftBits[square] : RookShiftBits[square]); - const u64 index = occupiedToIndex(occupied[i], magic, shiftBits); - if (attackUsed[index] == allZeroBB()) - attackUsed[index] = attack[i]; - else if (attackUsed[index] != attack[i]) - fail = true; - } - if (!fail) - return magic; - } - - std::cout << "/***Failed***/\t"; - return 0; -} -#endif // #if defined FIND_MAGIC diff --git a/cppshogi/position.cpp b/cppshogi/position.cpp index 3d09b573..51f715ce 100644 --- a/cppshogi/position.cpp +++ b/cppshogi/position.cpp @@ -423,7 +423,7 @@ void Position::doMove(const Move move, StateInfo& newSt, const CheckInfo& ci, co st_->checkersBB |= rookAttackFile(from, occupiedBB()) & bbOf(us); break; case DirecRank: - st_->checkersBB |= attacksFrom(ksq) & bbOf(Rook, Dragon, us); + st_->checkersBB |= rookAttackRank(ksq, occupiedBB()) & bbOf(Rook, Dragon, us); break; case DirecDiagNESW: case DirecDiagNWSE: st_->checkersBB |= attacksFrom(ksq) & bbOf(Bishop, Horse, us); diff --git a/test/test.cpp b/test/test.cpp index 4548fa3a..b928de9c 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -495,7 +495,7 @@ int main() } #endif -#if 1 +#if 0 #include "dfpn.h" // DfPnのPV表示テスト int main() @@ -1603,4 +1603,27 @@ int main(int argc, char* argv[]) std::_Exit(0); return 0; } -#endif \ No newline at end of file +#endif + +#if 1 +int main() +{ + initTable(); + Position pos; + pos.set("lnsgkgsnl/7b1/ppppppppp/9/P2R3P1/6P2/1PPPPPN1P/1B5R1/LNSGKGS1L b - 1"); + + const Bitboard occ = pos.occupiedBB(); + occ.printBoard(); + + Bitboard bb; + + bb = rookAttack(SQ28, occ); + bb.printBoard(); + + bb = rookAttack(SQ65, occ); + bb.printBoard(); + + bb = lanceAttack(Black, SQ19, occ); + bb.printBoard(); +} +#endif diff --git a/test/test.vcxproj b/test/test.vcxproj index cf5fa048..6bd242e7 100644 --- a/test/test.vcxproj +++ b/test/test.vcxproj @@ -134,7 +134,7 @@ Level3 Disabled - HAVE_SSE4;HAVE_BMI2;HAVE_AVX2;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreadedDebug AdvancedVectorExtensions2 stdcpp17 @@ -153,7 +153,7 @@ MaxSpeed true true - HAVE_SSE4;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreaded AdvancedVectorExtensions2 Speed @@ -176,7 +176,7 @@ MaxSpeed true true - HAVE_SSE4;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreaded AdvancedVectorExtensions2 Speed @@ -199,7 +199,7 @@ MaxSpeed true true - HAVE_SSE4;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreaded AdvancedVectorExtensions2 Speed @@ -222,7 +222,7 @@ MaxSpeed true true - HAVE_SSE4;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreaded AdvancedVectorExtensions2 Speed @@ -245,7 +245,7 @@ Disabled true true - HAVE_SSE4;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + HAVE_SSE4;HAVE_SSE42;HAVE_BMI2;HAVE_AVX2;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreaded AdvancedVectorExtensions2 Disabled