From 9535155ad8a5c3254900329aeba4f16910ab5278 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 12 Nov 2023 20:47:00 +0100 Subject: [PATCH] Add tuning results for 4 devices (#518) --- doc/tuning.md | 7 ++++++- src/database/kernels/copy/copy_16.hpp | 8 ++++++-- src/database/kernels/copy/copy_32.hpp | 6 ++++++ src/database/kernels/copy/copy_3232.hpp | 8 +++++++- src/database/kernels/copy/copy_64.hpp | 12 +++++++++--- src/database/kernels/copy/copy_6464.hpp | 8 +++++++- .../kernels/gemm_routine/gemm_routine_16.hpp | 6 +++++- .../kernels/gemm_routine/gemm_routine_32.hpp | 14 ++++++++++---- .../kernels/gemm_routine/gemm_routine_3232.hpp | 8 +++++++- .../kernels/gemm_routine/gemm_routine_64.hpp | 12 +++++++++--- .../kernels/gemm_routine/gemm_routine_6464.hpp | 10 ++++++++-- src/database/kernels/invert/invert_16.hpp | 4 ++++ src/database/kernels/invert/invert_32.hpp | 10 ++++++++-- src/database/kernels/invert/invert_3232.hpp | 8 +++++++- src/database/kernels/invert/invert_64.hpp | 8 +++++++- src/database/kernels/invert/invert_6464.hpp | 6 ++++++ src/database/kernels/pad/pad_16.hpp | 4 ++++ src/database/kernels/pad/pad_32.hpp | 12 +++++++++--- src/database/kernels/pad/pad_3232.hpp | 8 +++++++- src/database/kernels/pad/pad_64.hpp | 12 +++++++++--- src/database/kernels/pad/pad_6464.hpp | 10 ++++++++-- .../kernels/padtranspose/padtranspose_16.hpp | 4 ++++ .../kernels/padtranspose/padtranspose_32.hpp | 6 ++++++ .../kernels/padtranspose/padtranspose_3232.hpp | 8 +++++++- .../kernels/padtranspose/padtranspose_64.hpp | 8 +++++++- .../kernels/padtranspose/padtranspose_6464.hpp | 8 +++++++- src/database/kernels/transpose/transpose_16.hpp | 4 ++++ src/database/kernels/transpose/transpose_32.hpp | 10 ++++++++-- src/database/kernels/transpose/transpose_3232.hpp | 14 ++++++++++---- src/database/kernels/transpose/transpose_64.hpp | 8 +++++++- src/database/kernels/transpose/transpose_6464.hpp | 10 ++++++++-- .../kernels/trsv_routine/trsv_routine_32.hpp | 6 ++++++ .../kernels/trsv_routine/trsv_routine_3232.hpp | 6 ++++++ .../kernels/trsv_routine/trsv_routine_64.hpp | 6 ++++++ .../kernels/trsv_routine/trsv_routine_6464.hpp | 6 ++++++ src/database/kernels/xaxpy/xaxpy_16.hpp | 8 ++++++-- src/database/kernels/xaxpy/xaxpy_32.hpp | 8 +++++++- src/database/kernels/xaxpy/xaxpy_3232.hpp | 6 ++++++ src/database/kernels/xaxpy/xaxpy_64.hpp | 10 ++++++++-- src/database/kernels/xaxpy/xaxpy_6464.hpp | 10 ++++++++-- src/database/kernels/xdot/xdot_16.hpp | 6 +++++- src/database/kernels/xdot/xdot_32.hpp | 8 +++++++- src/database/kernels/xdot/xdot_3232.hpp | 12 +++++++++--- src/database/kernels/xdot/xdot_64.hpp | 12 +++++++++--- src/database/kernels/xdot/xdot_6464.hpp | 12 +++++++++--- src/database/kernels/xgemm/xgemm_16.hpp | 4 ++++ src/database/kernels/xgemm/xgemm_32.hpp | 8 +++++++- src/database/kernels/xgemm/xgemm_3232.hpp | 8 +++++++- src/database/kernels/xgemm/xgemm_64.hpp | 10 ++++++++-- src/database/kernels/xgemm/xgemm_6464.hpp | 6 ++++++ .../kernels/xgemm_direct/xgemm_direct_16.hpp | 4 ++++ .../kernels/xgemm_direct/xgemm_direct_32.hpp | 12 +++++++++--- .../kernels/xgemm_direct/xgemm_direct_3232.hpp | 8 +++++++- .../kernels/xgemm_direct/xgemm_direct_64.hpp | 12 +++++++++--- .../kernels/xgemm_direct/xgemm_direct_6464.hpp | 8 +++++++- src/database/kernels/xgemv/xgemv_16.hpp | 4 ++++ src/database/kernels/xgemv/xgemv_32.hpp | 6 ++++++ src/database/kernels/xgemv/xgemv_3232.hpp | 6 ++++++ src/database/kernels/xgemv/xgemv_64.hpp | 6 ++++++ src/database/kernels/xgemv/xgemv_6464.hpp | 6 ++++++ src/database/kernels/xgemv_fast/xgemv_fast_16.hpp | 4 ++++ src/database/kernels/xgemv_fast/xgemv_fast_32.hpp | 10 ++++++++-- .../kernels/xgemv_fast/xgemv_fast_3232.hpp | 6 ++++++ src/database/kernels/xgemv_fast/xgemv_fast_64.hpp | 6 ++++++ .../kernels/xgemv_fast/xgemv_fast_6464.hpp | 6 ++++++ .../kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp | 4 ++++ .../kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp | 8 +++++++- .../kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp | 8 +++++++- .../kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp | 8 +++++++- .../kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp | 10 ++++++++-- src/database/kernels/xger/xger_16.hpp | 6 +++++- src/database/kernels/xger/xger_32.hpp | 12 +++++++++--- src/database/kernels/xger/xger_3232.hpp | 10 ++++++++-- src/database/kernels/xger/xger_64.hpp | 12 +++++++++--- src/database/kernels/xger/xger_6464.hpp | 10 ++++++++-- 75 files changed, 513 insertions(+), 92 deletions(-) diff --git a/doc/tuning.md b/doc/tuning.md index 32169c38..e1ef9f39 100644 --- a/doc/tuning.md +++ b/doc/tuning.md @@ -38,6 +38,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - Tesla P100 16GB - SM 6.1: - GeForce MX 150 + - GeForce GTX 1060 6GB - GeForce GTX 1070 - GeForce GTX 1070 Ti - GeForce GTX 1080 @@ -71,6 +72,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - GeForce GTX 3080 Ti - GeForce GTX 3090 - SM 8.9: + - GeForce RTX 4060 - GeForce GTX 4060 Ti - GeForce GTX 4070 Laptop - GeForce GTX 4070 Ti @@ -108,6 +110,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - gfx906: - Radeon VII - gfx90c: + - Ryzen 5600G APU - Ryzen 5700G APU - gfx1010: - Radeon RX 5700 @@ -125,7 +128,9 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - Radeon 680M - Ryzen 4600G APU - gfx1100: - - Radeon RX 7900 XTX + - Radeon RX 7900 XTX + - gfx1101: + - Radeon RX 7800 XT - gfx1102: - Radeon RX 7600 - Other: diff --git a/src/database/kernels/copy/copy_16.hpp b/src/database/kernels/copy/copy_16.hpp index 84fb93fa..e9c1dcd3 100644 --- a/src/database/kernels/copy/copy_16.hpp +++ b/src/database/kernels/copy/copy_16.hpp @@ -29,7 +29,7 @@ const DatabaseEntry CopyHalf = { { kDeviceNameDefault , Params{ 8, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 8, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 8, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -61,6 +61,10 @@ const DatabaseEntry CopyHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 32, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -76,7 +80,7 @@ const DatabaseEntry CopyHalf = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 16, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp index dce93e08..3f87489c 100644 --- a/src/database/kernels/copy/copy_32.hpp +++ b/src/database/kernels/copy/copy_32.hpp @@ -94,6 +94,10 @@ const DatabaseEntry CopySingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,6 +238,7 @@ const DatabaseEntry CopySingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 8, 16, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -275,6 +280,7 @@ const DatabaseEntry CopySingle = { { kDeviceNameDefault , Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 8, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp index 3eac5e53..231372ef 100644 --- a/src/database/kernels/copy/copy_3232.hpp +++ b/src/database/kernels/copy/copy_3232.hpp @@ -94,6 +94,10 @@ const DatabaseEntry CopyComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -228,10 +232,11 @@ const DatabaseEntry CopyComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 8, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 16, 8, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -269,6 +274,7 @@ const DatabaseEntry CopyComplexSingle = { { kDeviceNameDefault , Params{ 8, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp index f4785d57..8275495f 100644 --- a/src/database/kernels/copy/copy_64.hpp +++ b/src/database/kernels/copy/copy_64.hpp @@ -54,7 +54,7 @@ const DatabaseEntry CopyDouble = { { "default", { { Name{"AMD Radeon Pro 450 Compute Engine "}, Params{ 8, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"AMD Radeon Pro 580 Compute Engine "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -86,6 +86,10 @@ const DatabaseEntry CopyDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 16, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 16, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 16, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 16, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -193,10 +197,11 @@ const DatabaseEntry CopyDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,12 +239,13 @@ const DatabaseEntry CopyDouble = { { kDeviceNameDefault , Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 8, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 8, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp index d3f89499..2d3584b4 100644 --- a/src/database/kernels/copy/copy_6464.hpp +++ b/src/database/kernels/copy/copy_6464.hpp @@ -86,6 +86,10 @@ const DatabaseEntry CopyComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 8, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 8, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 8, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -193,10 +197,11 @@ const DatabaseEntry CopyComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 32, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 8, 16, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,6 +239,7 @@ const DatabaseEntry CopyComplexDouble = { { kDeviceNameDefault , Params{ 8, 8, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_16.hpp b/src/database/kernels/gemm_routine/gemm_routine_16.hpp index a8892254..7fbd2dd1 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_16.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_16.hpp @@ -56,6 +56,10 @@ const DatabaseEntry GemmRoutineHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -71,7 +75,7 @@ const DatabaseEntry GemmRoutineHalf = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_32.hpp b/src/database/kernels/gemm_routine/gemm_routine_32.hpp index c33e498d..cd6f988d 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_32.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_32.hpp @@ -61,6 +61,10 @@ const DatabaseEntry GemmRoutineSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 704, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 704, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -76,7 +80,7 @@ const DatabaseEntry GemmRoutineSingle = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -162,10 +166,11 @@ const DatabaseEntry GemmRoutineSingle = { { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 1472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1792, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 896, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 896, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1664, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1536, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1408, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -203,15 +208,16 @@ const DatabaseEntry GemmRoutineSingle = { { kDeviceNameDefault , Params{ 1536, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 832, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 1728, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1920, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1728, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 1408, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1344, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_3232.hpp b/src/database/kernels/gemm_routine/gemm_routine_3232.hpp index 6ea9b2c7..e38e7cb9 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_3232.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_3232.hpp @@ -61,6 +61,10 @@ const DatabaseEntry GemmRoutineComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -75,7 +79,7 @@ const DatabaseEntry GemmRoutineComplexSingle = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -148,6 +152,7 @@ const DatabaseEntry GemmRoutineComplexSingle = { { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1408, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 960, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 832, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1088, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -189,6 +194,7 @@ const DatabaseEntry GemmRoutineComplexSingle = { { kDeviceNameDefault , Params{ 1728, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_64.hpp b/src/database/kernels/gemm_routine/gemm_routine_64.hpp index c9b20b07..105fed40 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_64.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_64.hpp @@ -29,7 +29,7 @@ const DatabaseEntry GemmRoutineDouble = { } }, { "default", { { Name{"AMD Radeon Pro 450 Compute Engine "}, Params{ 832, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 960, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 832, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 1088, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -61,6 +61,10 @@ const DatabaseEntry GemmRoutineDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 960, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 960, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 1600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -75,7 +79,7 @@ const DatabaseEntry GemmRoutineDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1216, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -117,6 +121,7 @@ const DatabaseEntry GemmRoutineDouble = { { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 832, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -158,6 +163,7 @@ const DatabaseEntry GemmRoutineDouble = { { kDeviceNameDefault , Params{ 1920, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -166,7 +172,7 @@ const DatabaseEntry GemmRoutineDouble = { { kDeviceNameDefault , Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 1664, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_6464.hpp b/src/database/kernels/gemm_routine/gemm_routine_6464.hpp index 433bc84a..b2569061 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_6464.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_6464.hpp @@ -61,6 +61,10 @@ const DatabaseEntry GemmRoutineComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -117,10 +121,11 @@ const DatabaseEntry GemmRoutineComplexDouble = { { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 448, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 1984, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -158,6 +163,7 @@ const DatabaseEntry GemmRoutineComplexDouble = { { kDeviceNameDefault , Params{ 1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1536, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1600, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 960, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -166,7 +172,7 @@ const DatabaseEntry GemmRoutineComplexDouble = { { kDeviceNameDefault , Params{ 1472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 1088, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/invert/invert_16.hpp b/src/database/kernels/invert/invert_16.hpp index a12c1434..bd1ac93c 100644 --- a/src/database/kernels/invert/invert_16.hpp +++ b/src/database/kernels/invert/invert_16.hpp @@ -56,6 +56,10 @@ const DatabaseEntry InvertHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/invert/invert_32.hpp b/src/database/kernels/invert/invert_32.hpp index f2689737..67d94a03 100644 --- a/src/database/kernels/invert/invert_32.hpp +++ b/src/database/kernels/invert/invert_32.hpp @@ -57,6 +57,10 @@ const DatabaseEntry InvertSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -71,7 +75,7 @@ const DatabaseEntry InvertSingle = { { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx90c", { - { Name{"AMD Radeon(TM) Graphics "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon(TM) Graphics "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } @@ -148,9 +152,10 @@ const DatabaseEntry InvertSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -188,6 +193,7 @@ const DatabaseEntry InvertSingle = { { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/invert/invert_3232.hpp b/src/database/kernels/invert/invert_3232.hpp index 48b10d64..e1d66a96 100644 --- a/src/database/kernels/invert/invert_3232.hpp +++ b/src/database/kernels/invert/invert_3232.hpp @@ -57,6 +57,10 @@ const DatabaseEntry InvertComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -71,7 +75,7 @@ const DatabaseEntry InvertComplexSingle = { { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx90c", { - { Name{"AMD Radeon(TM) Graphics "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon(TM) Graphics "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } @@ -147,6 +151,7 @@ const DatabaseEntry InvertComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -187,6 +192,7 @@ const DatabaseEntry InvertComplexSingle = { { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/invert/invert_64.hpp b/src/database/kernels/invert/invert_64.hpp index 7e2d0724..f5cf0bd7 100644 --- a/src/database/kernels/invert/invert_64.hpp +++ b/src/database/kernels/invert/invert_64.hpp @@ -57,6 +57,10 @@ const DatabaseEntry InvertDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -116,6 +120,7 @@ const DatabaseEntry InvertDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -156,12 +161,13 @@ const DatabaseEntry InvertDouble = { { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/invert/invert_6464.hpp b/src/database/kernels/invert/invert_6464.hpp index b04c70c6..a674ed9d 100644 --- a/src/database/kernels/invert/invert_6464.hpp +++ b/src/database/kernels/invert/invert_6464.hpp @@ -61,6 +61,10 @@ const DatabaseEntry InvertComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -120,6 +124,7 @@ const DatabaseEntry InvertComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -160,6 +165,7 @@ const DatabaseEntry InvertComplexDouble = { { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_16.hpp b/src/database/kernels/pad/pad_16.hpp index c025a27e..fcf3698e 100644 --- a/src/database/kernels/pad/pad_16.hpp +++ b/src/database/kernels/pad/pad_16.hpp @@ -61,6 +61,10 @@ const DatabaseEntry PadHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp index 23d6a586..39dbb0e8 100644 --- a/src/database/kernels/pad/pad_32.hpp +++ b/src/database/kernels/pad/pad_32.hpp @@ -94,6 +94,10 @@ const DatabaseEntry PadSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -109,7 +113,7 @@ const DatabaseEntry PadSingle = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -233,10 +237,11 @@ const DatabaseEntry PadSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 16, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 32, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -274,12 +279,13 @@ const DatabaseEntry PadSingle = { { kDeviceNameDefault , Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 8, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp index 422b880f..889abd29 100644 --- a/src/database/kernels/pad/pad_3232.hpp +++ b/src/database/kernels/pad/pad_3232.hpp @@ -94,6 +94,10 @@ const DatabaseEntry PadComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -230,6 +234,7 @@ const DatabaseEntry PadComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -271,12 +276,13 @@ const DatabaseEntry PadComplexSingle = { { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 32, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp index 55db4ab7..e38a1394 100644 --- a/src/database/kernels/pad/pad_64.hpp +++ b/src/database/kernels/pad/pad_64.hpp @@ -86,6 +86,10 @@ const DatabaseEntry PadDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -101,7 +105,7 @@ const DatabaseEntry PadDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -193,10 +197,11 @@ const DatabaseEntry PadDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,12 +239,13 @@ const DatabaseEntry PadDouble = { { kDeviceNameDefault , Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 16, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp index 336af9d1..d95f9b73 100644 --- a/src/database/kernels/pad/pad_6464.hpp +++ b/src/database/kernels/pad/pad_6464.hpp @@ -54,7 +54,7 @@ const DatabaseEntry PadComplexDouble = { { "default", { { Name{"AMD Radeon Pro 450 Compute Engine "}, Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"AMD Radeon Pro 580 Compute Engine "}, Params{ 8, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -86,6 +86,10 @@ const DatabaseEntry PadComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -193,10 +197,11 @@ const DatabaseEntry PadComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 16, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,6 +239,7 @@ const DatabaseEntry PadComplexDouble = { { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_16.hpp b/src/database/kernels/padtranspose/padtranspose_16.hpp index 779956ef..78364ce4 100644 --- a/src/database/kernels/padtranspose/padtranspose_16.hpp +++ b/src/database/kernels/padtranspose/padtranspose_16.hpp @@ -61,6 +61,10 @@ const DatabaseEntry PadtransposeHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp index cfa8e087..44416202 100644 --- a/src/database/kernels/padtranspose/padtranspose_32.hpp +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -94,6 +94,10 @@ const DatabaseEntry PadtransposeSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -232,6 +236,7 @@ const DatabaseEntry PadtransposeSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -273,6 +278,7 @@ const DatabaseEntry PadtransposeSingle = { { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp index 1faaefcb..feb01ca9 100644 --- a/src/database/kernels/padtranspose/padtranspose_3232.hpp +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -94,6 +94,10 @@ const DatabaseEntry PadtransposeComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -230,6 +234,7 @@ const DatabaseEntry PadtransposeComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -271,12 +276,13 @@ const DatabaseEntry PadtransposeComplexSingle = { { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp index 54fcbf8e..7ee67335 100644 --- a/src/database/kernels/padtranspose/padtranspose_64.hpp +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -86,6 +86,10 @@ const DatabaseEntry PadtransposeDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -101,7 +105,7 @@ const DatabaseEntry PadtransposeDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 0, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -193,6 +197,7 @@ const DatabaseEntry PadtransposeDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,6 +239,7 @@ const DatabaseEntry PadtransposeDouble = { { kDeviceNameDefault , Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp index 38616037..4ddfb3a6 100644 --- a/src/database/kernels/padtranspose/padtranspose_6464.hpp +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -86,6 +86,10 @@ const DatabaseEntry PadtransposeComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -193,10 +197,11 @@ const DatabaseEntry PadtransposeComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,6 +239,7 @@ const DatabaseEntry PadtransposeComplexDouble = { { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_16.hpp b/src/database/kernels/transpose/transpose_16.hpp index 94a41677..e2174dc7 100644 --- a/src/database/kernels/transpose/transpose_16.hpp +++ b/src/database/kernels/transpose/transpose_16.hpp @@ -61,6 +61,10 @@ const DatabaseEntry TransposeHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 4, 1, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp index 619dcdc8..6851e2ca 100644 --- a/src/database/kernels/transpose/transpose_32.hpp +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -94,6 +94,10 @@ const DatabaseEntry TransposeSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -232,10 +236,11 @@ const DatabaseEntry TransposeSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 32, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 4, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -273,6 +278,7 @@ const DatabaseEntry TransposeSingle = { { kDeviceNameDefault , Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 4, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -304,7 +310,7 @@ const DatabaseEntry TransposeSingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp index a0c1f02e..37e045b9 100644 --- a/src/database/kernels/transpose/transpose_3232.hpp +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -62,7 +62,7 @@ const DatabaseEntry TransposeComplexSingle = { { "default", { { Name{"AMD Radeon Pro 450 Compute Engine "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"AMD Radeon Pro 580 Compute Engine "}, Params{ 8, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 16, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -94,6 +94,10 @@ const DatabaseEntry TransposeComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 4, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -109,7 +113,7 @@ const DatabaseEntry TransposeComplexSingle = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -229,10 +233,11 @@ const DatabaseEntry TransposeComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -270,12 +275,13 @@ const DatabaseEntry TransposeComplexSingle = { { kDeviceNameDefault , Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp index 73d5aff9..abc76ba4 100644 --- a/src/database/kernels/transpose/transpose_64.hpp +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -86,6 +86,10 @@ const DatabaseEntry TransposeDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -193,10 +197,11 @@ const DatabaseEntry TransposeDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,6 +239,7 @@ const DatabaseEntry TransposeDouble = { { kDeviceNameDefault , Params{ 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp index 0b8e3af4..70117adf 100644 --- a/src/database/kernels/transpose/transpose_6464.hpp +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -86,6 +86,10 @@ const DatabaseEntry TransposeComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -101,7 +105,7 @@ const DatabaseEntry TransposeComplexDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 4, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -184,10 +188,11 @@ const DatabaseEntry TransposeComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -225,6 +230,7 @@ const DatabaseEntry TransposeComplexDouble = { { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 16, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/trsv_routine/trsv_routine_32.hpp b/src/database/kernels/trsv_routine/trsv_routine_32.hpp index 900913e1..83f2fac9 100644 --- a/src/database/kernels/trsv_routine/trsv_routine_32.hpp +++ b/src/database/kernels/trsv_routine/trsv_routine_32.hpp @@ -61,6 +61,10 @@ const DatabaseEntry TrsvRoutineSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -144,6 +148,7 @@ const DatabaseEntry TrsvRoutineSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -184,6 +189,7 @@ const DatabaseEntry TrsvRoutineSingle = { { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/trsv_routine/trsv_routine_3232.hpp b/src/database/kernels/trsv_routine/trsv_routine_3232.hpp index 51bf8307..4dab3b43 100644 --- a/src/database/kernels/trsv_routine/trsv_routine_3232.hpp +++ b/src/database/kernels/trsv_routine/trsv_routine_3232.hpp @@ -61,6 +61,10 @@ const DatabaseEntry TrsvRoutineComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -143,6 +147,7 @@ const DatabaseEntry TrsvRoutineComplexSingle = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -183,6 +188,7 @@ const DatabaseEntry TrsvRoutineComplexSingle = { { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/trsv_routine/trsv_routine_64.hpp b/src/database/kernels/trsv_routine/trsv_routine_64.hpp index 79d93b61..b7a60056 100644 --- a/src/database/kernels/trsv_routine/trsv_routine_64.hpp +++ b/src/database/kernels/trsv_routine/trsv_routine_64.hpp @@ -61,6 +61,10 @@ const DatabaseEntry TrsvRoutineDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -112,6 +116,7 @@ const DatabaseEntry TrsvRoutineDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -152,6 +157,7 @@ const DatabaseEntry TrsvRoutineDouble = { { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/trsv_routine/trsv_routine_6464.hpp b/src/database/kernels/trsv_routine/trsv_routine_6464.hpp index 84c650cb..ec46fab9 100644 --- a/src/database/kernels/trsv_routine/trsv_routine_6464.hpp +++ b/src/database/kernels/trsv_routine/trsv_routine_6464.hpp @@ -61,6 +61,10 @@ const DatabaseEntry TrsvRoutineComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -112,6 +116,7 @@ const DatabaseEntry TrsvRoutineComplexDouble = { } }, { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -152,6 +157,7 @@ const DatabaseEntry TrsvRoutineComplexDouble = { { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_16.hpp b/src/database/kernels/xaxpy/xaxpy_16.hpp index 46b21bc5..be407f9c 100644 --- a/src/database/kernels/xaxpy/xaxpy_16.hpp +++ b/src/database/kernels/xaxpy/xaxpy_16.hpp @@ -29,7 +29,7 @@ const DatabaseEntry XaxpyHalf = { { kDeviceNameDefault , Params{ 4, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { - { kDeviceNameDefault , Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 2, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -61,6 +61,10 @@ const DatabaseEntry XaxpyHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 4, 256, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 256, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -76,7 +80,7 @@ const DatabaseEntry XaxpyHalf = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp index 5469dcad..d8644c87 100644 --- a/src/database/kernels/xaxpy/xaxpy_32.hpp +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -94,6 +94,10 @@ const DatabaseEntry XaxpySingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 2, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 4, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -233,6 +237,7 @@ const DatabaseEntry XaxpySingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -274,12 +279,13 @@ const DatabaseEntry XaxpySingle = { { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 4, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 4, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp index 98979c08..54f33bbe 100644 --- a/src/database/kernels/xaxpy/xaxpy_3232.hpp +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -94,6 +94,10 @@ const DatabaseEntry XaxpyComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 2, 256, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 256, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -230,6 +234,7 @@ const DatabaseEntry XaxpyComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 2, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -271,6 +276,7 @@ const DatabaseEntry XaxpyComplexSingle = { { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 1024, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 2, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp index 4e55760d..2072241b 100644 --- a/src/database/kernels/xaxpy/xaxpy_64.hpp +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -86,6 +86,10 @@ const DatabaseEntry XaxpyDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 2, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 2, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -101,7 +105,7 @@ const DatabaseEntry XaxpyDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -193,10 +197,11 @@ const DatabaseEntry XaxpyDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 1024, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 512, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 2, 1024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 1, 512, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,6 +239,7 @@ const DatabaseEntry XaxpyDouble = { { kDeviceNameDefault , Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 2, 512, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp index 1568e0b8..89eb37d3 100644 --- a/src/database/kernels/xaxpy/xaxpy_6464.hpp +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -86,6 +86,10 @@ const DatabaseEntry XaxpyComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -101,7 +105,7 @@ const DatabaseEntry XaxpyComplexDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 4, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -193,10 +197,11 @@ const DatabaseEntry XaxpyComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 512, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 1024, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 256, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 512, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 1, 256, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,6 +239,7 @@ const DatabaseEntry XaxpyComplexDouble = { { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 4, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 4, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 4, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_16.hpp b/src/database/kernels/xdot/xdot_16.hpp index 5a3a5922..e668a4e0 100644 --- a/src/database/kernels/xdot/xdot_16.hpp +++ b/src/database/kernels/xdot/xdot_16.hpp @@ -61,6 +61,10 @@ const DatabaseEntry XdotHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -75,7 +79,7 @@ const DatabaseEntry XdotHalf = { { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx90c", { - { Name{"AMD Radeon(TM) Graphics "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon(TM) Graphics "}, Params{ 256, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp index c451ba60..f38c6464 100644 --- a/src/database/kernels/xdot/xdot_32.hpp +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -92,6 +92,10 @@ const DatabaseEntry XdotSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 256, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -107,7 +111,7 @@ const DatabaseEntry XdotSingle = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 128, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -225,6 +229,7 @@ const DatabaseEntry XdotSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 256, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -266,6 +271,7 @@ const DatabaseEntry XdotSingle = { { kDeviceNameDefault , Params{ 256, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 256, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 512, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp index 41514d9c..b8896f4f 100644 --- a/src/database/kernels/xdot/xdot_3232.hpp +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -61,7 +61,7 @@ const DatabaseEntry XdotComplexSingle = { { "default", { { Name{"AMD Radeon Pro 450 Compute Engine "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"AMD Radeon Pro 580 Compute Engine "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -93,6 +93,10 @@ const DatabaseEntry XdotComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -108,7 +112,7 @@ const DatabaseEntry XdotComplexSingle = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 32, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -223,10 +227,11 @@ const DatabaseEntry XdotComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1024, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 1024, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -264,6 +269,7 @@ const DatabaseEntry XdotComplexSingle = { { kDeviceNameDefault , Params{ 256, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp index d6fc6804..d15a113d 100644 --- a/src/database/kernels/xdot/xdot_64.hpp +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -85,6 +85,10 @@ const DatabaseEntry XdotDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 256, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -99,7 +103,7 @@ const DatabaseEntry XdotDouble = { { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx90c", { - { Name{"AMD Radeon(TM) Graphics "}, Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon(TM) Graphics "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } @@ -180,10 +184,11 @@ const DatabaseEntry XdotDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 256, 512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -221,6 +226,7 @@ const DatabaseEntry XdotDouble = { { kDeviceNameDefault , Params{ 256, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -236,7 +242,7 @@ const DatabaseEntry XdotDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 256, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp index 1babcb22..9b87e243 100644 --- a/src/database/kernels/xdot/xdot_6464.hpp +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -85,6 +85,10 @@ const DatabaseEntry XdotComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -99,8 +103,8 @@ const DatabaseEntry XdotComplexDouble = { { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx90c", { - { Name{"AMD Radeon(TM) Graphics "}, Params{ 64, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"AMD Radeon(TM) Graphics "}, Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -180,10 +184,11 @@ const DatabaseEntry XdotComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 512, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -221,6 +226,7 @@ const DatabaseEntry XdotComplexDouble = { { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm/xgemm_16.hpp b/src/database/kernels/xgemm/xgemm_16.hpp index 80a345cb..38cea51a 100644 --- a/src/database/kernels/xgemm/xgemm_16.hpp +++ b/src/database/kernels/xgemm/xgemm_16.hpp @@ -53,6 +53,10 @@ const DatabaseEntry XgemmHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 0, 1, 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 1, 32, 2, 16, 8, 64, 16, 8, 128, 1, 1, 0, 1, 4, 8 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 8, 64, 16, 8, 128, 1, 1, 0, 1, 4, 8 } }, diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp index f7627222..25ed65f7 100644 --- a/src/database/kernels/xgemm/xgemm_32.hpp +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -94,6 +94,10 @@ const DatabaseEntry XgemmSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 32, 0, 1, 1, 0, 4, 4 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 32, 0, 1, 1, 0, 4, 4 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 0, 1, 32, 2, 16, 16, 64, 32, 8, 128, 0, 1, 1, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 64, 32, 8, 128, 0, 1, 1, 0, 1, 2 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 1, 32, 2, 16, 16, 64, 32, 8, 128, 0, 1, 1, 0, 1, 2 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 64, 32, 8, 128, 0, 1, 1, 0, 1, 2 } }, @@ -233,10 +237,11 @@ const DatabaseEntry XgemmSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 1, 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 1, 16, 2, 32, 16, 64, 16, 8, 128, 1, 1, 0, 1, 2, 8 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 0, 1, 32, 2, 16, 16, 128, 32, 8, 128, 1, 1, 1, 1, 4, 4 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 0, 1, 32, 2, 16, 16, 128, 32, 8, 128, 1, 1, 1, 1, 4, 4 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, { Name{"Tesla P4 "}, Params{ 0, 1, 32, 2, 16, 16, 64, 16, 8, 64, 0, 1, 1, 0, 4, 2 } }, - { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 4 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 0, 1, 32, 2, 16, 16, 64, 8, 8, 32, 1, 1, 0, 0, 4, 4 } }, @@ -274,6 +279,7 @@ const DatabaseEntry XgemmSingle = { { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 16, 8, 8, 16, 0, 0, 0, 0, 2, 2 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 0, 1, 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 0, 1, 32, 2, 32, 16, 128, 8, 8, 64, 0, 0, 0, 0, 4, 8 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 8, 1, 1, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 8 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 0, 1, 16, 2, 8, 8, 32, 8, 16, 128, 1, 0, 1, 1, 4, 8 } }, diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp index f8a9931a..88d9bbd6 100644 --- a/src/database/kernels/xgemm/xgemm_3232.hpp +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -90,6 +90,10 @@ const DatabaseEntry XgemmComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 0, 1, 32, 2, 8, 8, 32, 8, 16, 64, 1, 0, 1, 1, 4, 4 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 32, 8, 16, 64, 1, 0, 1, 1, 4, 4 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 0, 1, 16, 2, 8, 8, 16, 32, 16, 64, 1, 0, 1, 1, 1, 1 } }, + { kDeviceNameDefault , Params{ 0, 1, 16, 2, 8, 8, 16, 32, 16, 64, 1, 0, 1, 1, 1, 1 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 1, 16, 2, 32, 32, 64, 16, 8, 128, 0, 0, 1, 1, 2, 2 } }, { kDeviceNameDefault , Params{ 0, 1, 16, 2, 32, 32, 64, 16, 8, 128, 0, 0, 1, 1, 2, 2 } }, @@ -226,10 +230,11 @@ const DatabaseEntry XgemmComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 1, 16, 2, 16, 8, 128, 16, 32, 64, 1, 1, 1, 1, 1, 2 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 1, 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 1, 16, 2, 8, 16, 32, 16, 8, 64, 1, 1, 0, 0, 1, 1 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 0, 1, 32, 2, 32, 32, 64, 32, 8, 64, 1, 1, 0, 1, 2, 1 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 0, 1, 16, 2, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 1, 1 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 1, 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, { Name{"Tesla P4 "}, Params{ 0, 1, 32, 2, 32, 32, 64, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, - { kDeviceNameDefault , Params{ 0, 1, 32, 2, 32, 32, 64, 8, 8, 64, 1, 1, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 4 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 0, 1, 32, 2, 8, 8, 32, 16, 16, 64, 0, 0, 0, 0, 4, 4 } }, @@ -267,6 +272,7 @@ const DatabaseEntry XgemmComplexSingle = { { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 0, 1, 16, 2, 8, 8, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 0, 1, 16, 2, 8, 8, 128, 16, 16, 64, 1, 0, 1, 0, 1, 1 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 16, 1, 1, 16, 16, 64, 4, 4, 32, 0, 0, 0, 0, 1, 8 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 0, 1, 16, 2, 16, 8, 64, 8, 16, 64, 1, 1, 1, 1, 1, 4 } }, diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp index fe43feb5..6811fd30 100644 --- a/src/database/kernels/xgemm/xgemm_64.hpp +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -82,6 +82,10 @@ const DatabaseEntry XgemmDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 0, 1, 32, 2, 16, 8, 32, 8, 8, 16, 0, 1, 0, 1, 1, 2 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 8, 32, 8, 8, 16, 0, 1, 0, 1, 1, 2 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 0, 1, 32, 2, 16, 16, 32, 8, 8, 16, 1, 1, 0, 0, 2, 2 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 32, 8, 8, 16, 1, 1, 0, 0, 2, 2 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 1, 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 2 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 2 } }, @@ -189,10 +193,11 @@ const DatabaseEntry XgemmDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 1, 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 1, 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 1, 16, 2, 16, 16, 16, 16, 16, 64, 0, 0, 1, 0, 1, 4 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 16, 1, 1, 8, 8, 16, 4, 4, 16, 0, 0, 0, 0, 2, 4 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 0, 1, 32, 2, 8, 8, 64, 8, 8, 16, 0, 0, 0, 0, 1, 1 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 1, 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, { Name{"Tesla P4 "}, Params{ 1, 2, 1, 1, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, - { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 16, 32, 32, 64, 0, 0, 0, 0, 2, 2 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 0, 1, 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 2, 4 } }, @@ -230,12 +235,13 @@ const DatabaseEntry XgemmDouble = { { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 8, 1, 1, 8, 8, 16, 32, 32, 64, 0, 0, 0, 0, 1, 1 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 16, 1, 1, 16, 16, 32, 8, 8, 8, 0, 0, 0, 0, 1, 1 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 16, 1, 1, 16, 16, 32, 8, 8, 8, 0, 0, 0, 0, 1, 1 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 16, 1, 1, 8, 8, 16, 4, 4, 16, 0, 0, 0, 0, 2, 4 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 1, 16, 1, 1, 8, 8, 16, 4, 4, 16, 0, 0, 0, 0, 2, 4 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 1, 4, 1, 1, 32, 32, 128, 16, 16, 64, 0, 0, 0, 0, 1, 2 } }, - { kDeviceNameDefault , Params{ 1, 1, 1, 1, 16, 16, 128, 16, 16, 16, 0, 0, 0, 0, 4, 1 } }, + { kDeviceNameDefault , Params{ 1, 8, 1, 1, 32, 32, 32, 4, 4, 32, 0, 0, 0, 0, 1, 8 } }, } }, { "default", { { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 16, 8, 8, 16, 1, 1, 0, 0, 2, 2 } }, diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp index ff715ef6..51a97e06 100644 --- a/src/database/kernels/xgemm/xgemm_6464.hpp +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -81,6 +81,10 @@ const DatabaseEntry XgemmComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 0, 1, 32, 2, 16, 16, 32, 8, 8, 16, 1, 1, 0, 0, 2, 1 } }, { kDeviceNameDefault , Params{ 0, 1, 32, 2, 16, 16, 32, 8, 8, 16, 1, 1, 0, 0, 2, 1 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 0, 1, 32, 2, 8, 8, 32, 16, 16, 16, 1, 1, 0, 0, 2, 1 } }, + { kDeviceNameDefault , Params{ 0, 1, 32, 2, 8, 8, 32, 16, 16, 16, 1, 1, 0, 0, 2, 1 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 0, 1, 16, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 1, 4, 1 } }, { kDeviceNameDefault , Params{ 0, 1, 16, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 1, 4, 1 } }, @@ -187,6 +191,7 @@ const DatabaseEntry XgemmComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 0, 1, 32, 2, 8, 8, 16, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, { Name{"GeForce GTX 1080 "}, Params{ 0, 1, 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 0, 1, 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 0, 1, 32, 2, 8, 8, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 2, 1, 1, 8, 8, 32, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, { Name{"TITAN X (Pascal) "}, Params{ 0, 1, 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, { Name{"Tesla P4 "}, Params{ 1, 4, 1, 1, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, @@ -228,6 +233,7 @@ const DatabaseEntry XgemmComplexDouble = { { kDeviceNameDefault , Params{ 1, 1, 1, 1, 8, 8, 32, 32, 32, 32, 0, 0, 0, 0, 1, 1 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 16, 1, 1, 16, 16, 32, 8, 8, 8, 0, 0, 0, 0, 1, 1 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 2, 1, 1, 8, 8, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 0, 1, 32, 2, 8, 8, 16, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 0, 1, 32, 2, 8, 8, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp index 472619a0..e3379d33 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp @@ -53,6 +53,10 @@ const DatabaseEntry XgemmDirectHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 2, 8, 8, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 4, 32, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp index 741acd71..3f28d8da 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -76,6 +76,10 @@ const DatabaseEntry XgemmDirectSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 2, 8, 8, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 2, 16, 8, 8, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 16, 8, 8, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, @@ -91,7 +95,7 @@ const DatabaseEntry XgemmDirectSingle = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 8, 16, 8, 16, 8, 1, 0, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 2, 4, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 16, 16, 8, 1, 0, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -197,10 +201,11 @@ const DatabaseEntry XgemmDirectSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 16, 8, 8, 8, 1, 1, 1, 2, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 16, 8, 16, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 16, 8, 8, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 32, 8, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 8, 8, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 16, 8, 16, 8, 1, 0, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 4, 2, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 2, 8, 8, 16, 16, 1, 1, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, @@ -238,12 +243,13 @@ const DatabaseEntry XgemmDirectSingle = { { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 32, 32, 8, 8, 1, 0, 2, 4, 64, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp index e246eb50..fbadfd62 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -75,6 +75,10 @@ const DatabaseEntry XgemmDirectComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 2, 8, 16, 8, 16, 1, 0, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 16, 8, 16, 1, 0, 2, 1, 32, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 2, 8, 8, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, @@ -191,10 +195,11 @@ const DatabaseEntry XgemmDirectComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 8, 8, 16, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 8, 16, 16, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 8, 16, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 8, 16, 8, 8, 1, 0, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 2, 8, 8, 16, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 16, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, @@ -232,6 +237,7 @@ const DatabaseEntry XgemmDirectComplexSingle = { { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 16, 8, 16, 8, 1, 0, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp index c5f7afca..f0defc53 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -36,7 +36,7 @@ const DatabaseEntry XgemmDirectDouble = { { "default", { { Name{"AMD Radeon Pro 450 Compute Engine "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"AMD Radeon Pro 580 Compute Engine "}, Params{ 2, 16, 16, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, @@ -68,6 +68,10 @@ const DatabaseEntry XgemmDirectDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, @@ -82,7 +86,7 @@ const DatabaseEntry XgemmDirectDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 32, 32, 8, 8, 1, 1, 1, 2, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -153,10 +157,11 @@ const DatabaseEntry XgemmDirectDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 8, 8, 16, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 32, 8, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 2, 32, 32, 8, 8, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, @@ -194,6 +199,7 @@ const DatabaseEntry XgemmDirectDouble = { { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp index 0b981c12..c3fbdc0e 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -68,6 +68,10 @@ const DatabaseEntry XgemmDirectComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, @@ -154,10 +158,11 @@ const DatabaseEntry XgemmDirectComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 8, 16, 32, 16, 16, 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 16, 16, 32, 8, 8, 1, 0, 1, 4, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, @@ -195,6 +200,7 @@ const DatabaseEntry XgemmDirectComplexDouble = { { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 2, 8, 8, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 2, 16, 8, 8, 16, 1, 1, 2, 2, 32, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_16.hpp b/src/database/kernels/xgemv/xgemv_16.hpp index 196e88b3..e11a0578 100644 --- a/src/database/kernels/xgemv/xgemv_16.hpp +++ b/src/database/kernels/xgemv/xgemv_16.hpp @@ -53,6 +53,10 @@ const DatabaseEntry XgemvHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp index e3677851..c71610d3 100644 --- a/src/database/kernels/xgemv/xgemv_32.hpp +++ b/src/database/kernels/xgemv/xgemv_32.hpp @@ -89,6 +89,10 @@ const DatabaseEntry XgemvSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -226,6 +230,7 @@ const DatabaseEntry XgemvSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -267,6 +272,7 @@ const DatabaseEntry XgemvSingle = { { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp index 4b6e2933..29d02255 100644 --- a/src/database/kernels/xgemv/xgemv_3232.hpp +++ b/src/database/kernels/xgemv/xgemv_3232.hpp @@ -89,6 +89,10 @@ const DatabaseEntry XgemvComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -219,6 +223,7 @@ const DatabaseEntry XgemvComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -260,6 +265,7 @@ const DatabaseEntry XgemvComplexSingle = { { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp index e0faa06d..b18eccbc 100644 --- a/src/database/kernels/xgemv/xgemv_64.hpp +++ b/src/database/kernels/xgemv/xgemv_64.hpp @@ -81,6 +81,10 @@ const DatabaseEntry XgemvDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -186,6 +190,7 @@ const DatabaseEntry XgemvDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -227,6 +232,7 @@ const DatabaseEntry XgemvDouble = { { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp index 96c546f4..a9530c9b 100644 --- a/src/database/kernels/xgemv/xgemv_6464.hpp +++ b/src/database/kernels/xgemv/xgemv_6464.hpp @@ -81,6 +81,10 @@ const DatabaseEntry XgemvComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -170,6 +174,7 @@ const DatabaseEntry XgemvComplexDouble = { { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -210,6 +215,7 @@ const DatabaseEntry XgemvComplexDouble = { { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp index bbffb58e..dcb1a365 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp @@ -53,6 +53,10 @@ const DatabaseEntry XgemvFastHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp index b95b3efb..c9a6f4a8 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp @@ -89,6 +89,10 @@ const DatabaseEntry XgemvFastSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -226,10 +230,11 @@ const DatabaseEntry XgemvFastSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -267,12 +272,13 @@ const DatabaseEntry XgemvFastSingle = { { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp index 48bf07d5..bc9a5817 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp @@ -89,6 +89,10 @@ const DatabaseEntry XgemvFastComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -205,6 +209,7 @@ const DatabaseEntry XgemvFastComplexSingle = { { Name{"GeForce GTX 1070 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 2, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -245,6 +250,7 @@ const DatabaseEntry XgemvFastComplexSingle = { { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp index 569d354e..3ab00178 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp @@ -81,6 +81,10 @@ const DatabaseEntry XgemvFastDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -186,6 +190,7 @@ const DatabaseEntry XgemvFastDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 1, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -227,6 +232,7 @@ const DatabaseEntry XgemvFastDouble = { { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp index 4ec42ab2..4e62c703 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp @@ -81,6 +81,10 @@ const DatabaseEntry XgemvFastComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -166,6 +170,7 @@ const DatabaseEntry XgemvFastComplexDouble = { { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -206,6 +211,7 @@ const DatabaseEntry XgemvFastComplexDouble = { { kDeviceNameDefault , Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp index b294f4fd..03f02b4b 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp @@ -53,6 +53,10 @@ const DatabaseEntry XgemvFastRotHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp index 8b6b1b71..2ca35768 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp @@ -76,6 +76,10 @@ const DatabaseEntry XgemvFastRotSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -199,10 +203,11 @@ const DatabaseEntry XgemvFastRotSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -240,6 +245,7 @@ const DatabaseEntry XgemvFastRotSingle = { { kDeviceNameDefault , Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp index 0317cc3c..ff012452 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp @@ -76,6 +76,10 @@ const DatabaseEntry XgemvFastRotComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -183,9 +187,10 @@ const DatabaseEntry XgemvFastRotComplexSingle = { { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 4, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 1, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -223,6 +228,7 @@ const DatabaseEntry XgemvFastRotComplexSingle = { { kDeviceNameDefault , Params{ 2, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 4, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp index d99a221c..2844e04f 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp @@ -68,6 +68,10 @@ const DatabaseEntry XgemvFastRotDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -154,10 +158,11 @@ const DatabaseEntry XgemvFastRotDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 4, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 8, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -195,6 +200,7 @@ const DatabaseEntry XgemvFastRotDouble = { { kDeviceNameDefault , Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 2, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 2, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp index e8abac22..d5b158d3 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp @@ -68,6 +68,10 @@ const DatabaseEntry XgemvFastRotComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 2, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 2, 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -141,9 +145,10 @@ const DatabaseEntry XgemvFastRotComplexDouble = { { "SM6.1", { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -181,12 +186,13 @@ const DatabaseEntry XgemvFastRotComplexDouble = { { kDeviceNameDefault , Params{ 1, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 1, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 4, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 1, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_16.hpp b/src/database/kernels/xger/xger_16.hpp index 4cb11786..94d9eaf5 100644 --- a/src/database/kernels/xger/xger_16.hpp +++ b/src/database/kernels/xger/xger_16.hpp @@ -61,6 +61,10 @@ const DatabaseEntry XgerHalf = { { Name{"Radeon RX 7900 XTX "}, Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -76,7 +80,7 @@ const DatabaseEntry XgerHalf = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp index 743030cd..2e8c0b40 100644 --- a/src/database/kernels/xger/xger_32.hpp +++ b/src/database/kernels/xger/xger_32.hpp @@ -62,7 +62,7 @@ const DatabaseEntry XgerSingle = { { "default", { { Name{"AMD Radeon Pro 450 Compute Engine "}, Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"AMD Radeon Pro 580 Compute Engine "}, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 64, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -94,6 +94,10 @@ const DatabaseEntry XgerSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -226,10 +230,11 @@ const DatabaseEntry XgerSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 16, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 64, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 512, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 256, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -267,12 +272,13 @@ const DatabaseEntry XgerSingle = { { kDeviceNameDefault , Params{ 4, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 16, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4080 "}, Params{ 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4090 "}, Params{ 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "default", { { kDeviceNameDefault , Params{ 4, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp index f3d17def..f662c4a8 100644 --- a/src/database/kernels/xger/xger_3232.hpp +++ b/src/database/kernels/xger/xger_3232.hpp @@ -94,6 +94,10 @@ const DatabaseEntry XgerComplexSingle = { { Name{"Radeon RX 7900 XTX "}, Params{ 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 4, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -109,7 +113,7 @@ const DatabaseEntry XgerComplexSingle = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -223,10 +227,11 @@ const DatabaseEntry XgerComplexSingle = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 128, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 256, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 8, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -264,6 +269,7 @@ const DatabaseEntry XgerComplexSingle = { { kDeviceNameDefault , Params{ 4, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp index 6e20569b..b9638c9d 100644 --- a/src/database/kernels/xger/xger_64.hpp +++ b/src/database/kernels/xger/xger_64.hpp @@ -54,7 +54,7 @@ const DatabaseEntry XgerDouble = { { "default", { { Name{"AMD Radeon Pro 450 Compute Engine "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"AMD Radeon Pro 580 Compute Engine "}, Params{ 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "gfx1010:xnack-", { { Name{"AMD Radeon RX 5700 "}, Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -86,6 +86,10 @@ const DatabaseEntry XgerDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 128, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 128, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -101,7 +105,7 @@ const DatabaseEntry XgerDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -180,10 +184,11 @@ const DatabaseEntry XgerDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 8, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 512, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -221,6 +226,7 @@ const DatabaseEntry XgerDouble = { { kDeviceNameDefault , Params{ 4, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp index 7a91b52d..bd76b22e 100644 --- a/src/database/kernels/xger/xger_6464.hpp +++ b/src/database/kernels/xger/xger_6464.hpp @@ -86,6 +86,10 @@ const DatabaseEntry XgerComplexDouble = { { Name{"Radeon RX 7900 XTX "}, Params{ 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, + { "gfx1101", { + { Name{"AMD Radeon RX 7800 XT "}, Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, { "gfx1102", { { Name{"AMD Radeon RX 7600 "}, Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -101,7 +105,7 @@ const DatabaseEntry XgerComplexDouble = { } }, { "gfx90c", { { Name{"AMD Radeon(TM) Graphics "}, Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -180,10 +184,11 @@ const DatabaseEntry XgerComplexDouble = { { Name{"GeForce GTX 1070 Ti "}, Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 "}, Params{ 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"GeForce GTX 1080 Ti "}, Params{ 4, 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"NVIDIA GeForce GTX 1060 6GB "}, Params{ 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce MX150 "}, Params{ 512, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"TITAN X (Pascal) "}, Params{ 4, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"Tesla P4 "}, Params{ 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 256, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM7.0", { { Name{"Quadro GV100 "}, Params{ 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -221,6 +226,7 @@ const DatabaseEntry XgerComplexDouble = { { kDeviceNameDefault , Params{ 4, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, { "SM8.9", { + { Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 4, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 32, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 4, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },