diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts b/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts index f06b2f1ef3d20..fa787f9f0299e 100644 --- a/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts +++ b/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts @@ -111,6 +111,8 @@ export function activate(context: vscode.ExtensionContext) { serverOptions, clientOptions); const semanticHighlightingFeature = new semanticHighlighting.SemanticHighlightingFeature(); + context.subscriptions.push( + vscode.Disposable.from(semanticHighlightingFeature)); clangdClient.registerFeature(semanticHighlightingFeature); console.log('Clang Language Server is now active!'); context.subscriptions.push(clangdClient.start()); @@ -133,9 +135,10 @@ export function activate(context: vscode.ExtensionContext) { vscode.window.showTextDocument(doc); })); const status = new FileStatus(); + context.subscriptions.push(vscode.Disposable.from(status)); context.subscriptions.push(vscode.window.onDidChangeActiveTextEditor( () => { status.updateStatus(); })); - clangdClient.onDidChangeState(({newState}) => { + context.subscriptions.push(clangdClient.onDidChangeState(({newState}) => { if (newState == vscodelc.State.Running) { // clangd starts or restarts after crash. clangdClient.onNotification( @@ -150,7 +153,7 @@ export function activate(context: vscode.ExtensionContext) { status.clear(); semanticHighlightingFeature.dispose(); } - }) + })); // An empty place holder for the activate command, otherwise we'll get an // "command is not registered" error. context.subscriptions.push(vscode.commands.registerCommand( diff --git a/clang/.gitattributes b/clang/.gitattributes deleted file mode 100644 index b48a3e3911adb..0000000000000 --- a/clang/.gitattributes +++ /dev/null @@ -1,3 +0,0 @@ -# Windows line ending tests -test/Lexer/minimize_source_to_dependency_directives_invalid_error.c text eol=crlf -test/FixIt/fixit-newline-style.c text eol=crlf diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index bf2c6e2b68d70..6c504c7701a87 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -5025,12 +5025,17 @@ class OMPMapClause final : public OMPMappableExprListClause, } child_range used_children() { + if (MapType == OMPC_MAP_to || MapType == OMPC_MAP_tofrom) + return child_range(reinterpret_cast(varlist_begin()), + reinterpret_cast(varlist_end())); return child_range(child_iterator(), child_iterator()); } const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } + static bool classof(const OMPClause *T) { return T->getClauseKind() == OMPC_map; } diff --git a/clang/test/Analysis/cfg-openmp.cpp b/clang/test/Analysis/cfg-openmp.cpp index b6fe4f2b81b64..6cae9663f47c6 100644 --- a/clang/test/Analysis/cfg-openmp.cpp +++ b/clang/test/Analysis/cfg-openmp.cpp @@ -9,7 +9,8 @@ void xxx(int argc) { // CHECK-NEXT: 4: int rd; // CHECK-NEXT: 5: int lin; // CHECK-NEXT: 6: int step; - int x, cond, fp, rd, lin, step; +// CHECK-NEXT: 7: int map; + int x, cond, fp, rd, lin, step, map; // CHECK-NEXT: [[#ATOM:]]: x // CHECK-NEXT: [[#ATOM+1]]: [B1.[[#ATOM]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-NEXT: [[#ATOM+2]]: argc @@ -219,10 +220,10 @@ void xxx(int argc) { : argc) if(cond) firstprivate(fp) reduction(-:rd) argc = x; // CHECK-NEXT: [[#TPF:]]: -// CHECK-SAME: [B1.[[#TPF+13]]] -// CHECK-NEXT: [[#TPF+1]]: [B1.[[#TPF+13]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TPF+2]]: [B1.[[#TPF+12]]] -// CHECK-NEXT: [[#TPF+3]]: [B1.[[#TPF+12]]] = [B1.[[#TPF+1]]] +// CHECK-SAME: [B1.[[#TPF+14]]] +// CHECK-NEXT: [[#TPF+1]]: [B1.[[#TPF+14]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TPF+2]]: [B1.[[#TPF+13]]] +// CHECK-NEXT: [[#TPF+3]]: [B1.[[#TPF+13]]] = [B1.[[#TPF+1]]] // CHECK-NEXT: [[#TPF+4]]: cond // CHECK-NEXT: [[#TPF+5]]: [B1.[[#TPF+4]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-NEXT: [[#TPF+6]]: [B1.[[#TPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) @@ -231,19 +232,20 @@ void xxx(int argc) { // CHECK-NEXT: [[#TPF+9]]: lin // CHECK-NEXT: [[#TPF+10]]: step // CHECK-NEXT: [[#TPF+11]]: [B1.[[#TPF+10]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TPF+12]]: argc -// CHECK-NEXT: [[#TPF+13]]: x -// CHECK-NEXT: [[#TPF+14]]: #pragma omp target parallel for if(parallel: cond) firstprivate(fp) reduction(max: rd) linear(lin: step) +// CHECK-NEXT: [[#TPF+12]]: map +// CHECK-NEXT: [[#TPF+13]]: argc +// CHECK-NEXT: [[#TPF+14]]: x +// CHECK-NEXT: [[#TPF+15]]: #pragma omp target parallel for if(parallel: cond) firstprivate(fp) reduction(max: rd) linear(lin: step) map(tofrom: map) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: [B1.[[#TPF+3]]]; -#pragma omp target parallel for if(parallel:cond) firstprivate(fp) reduction(max:rd) linear(lin: step) +#pragma omp target parallel for if(parallel:cond) firstprivate(fp) reduction(max:rd) linear(lin: step) map(map) for (int i = 0; i < 10; ++i) argc = x; // CHECK-NEXT: [[#TPFS:]]: -// CHECK-SAME: [B1.[[#TPFS+13]]] -// CHECK-NEXT: [[#TPFS+1]]: [B1.[[#TPFS+13]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TPFS+2]]: [B1.[[#TPFS+12]]] -// CHECK-NEXT: [[#TPFS+3]]: [B1.[[#TPFS+12]]] = [B1.[[#TPFS+1]]] +// CHECK-SAME: [B1.[[#TPFS+14]]] +// CHECK-NEXT: [[#TPFS+1]]: [B1.[[#TPFS+14]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TPFS+2]]: [B1.[[#TPFS+13]]] +// CHECK-NEXT: [[#TPFS+3]]: [B1.[[#TPFS+13]]] = [B1.[[#TPFS+1]]] // CHECK-NEXT: [[#TPFS+4]]: cond // CHECK-NEXT: [[#TPFS+5]]: [B1.[[#TPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-NEXT: [[#TPFS+6]]: [B1.[[#TPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) @@ -252,29 +254,31 @@ void xxx(int argc) { // CHECK-NEXT: [[#TPFS+9]]: lin // CHECK-NEXT: [[#TPFS+10]]: step // CHECK-NEXT: [[#TPFS+11]]: [B1.[[#TPFS+10]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TPFS+12]]: argc -// CHECK-NEXT: [[#TPFS+13]]: x -// CHECK-NEXT: [[#TPFS+14]]: #pragma omp target parallel for simd if(target: cond) firstprivate(fp) reduction(*: rd) linear(lin: step) +// CHECK-NEXT: [[#TPFS+12]]: map +// CHECK-NEXT: [[#TPFS+13]]: argc +// CHECK-NEXT: [[#TPFS+14]]: x +// CHECK-NEXT: [[#TPFS+15]]: #pragma omp target parallel for simd if(target: cond) firstprivate(fp) reduction(*: rd) linear(lin: step) map(tofrom: map) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: [B1.[[#TPFS+3]]]; -#pragma omp target parallel for simd if(target:cond) firstprivate(fp) reduction(*:rd) linear(lin: step) +#pragma omp target parallel for simd if(target:cond) firstprivate(fp) reduction(*:rd) linear(lin: step) map(tofrom:map) for (int i = 0; i < 10; ++i) argc = x; // CHECK-NEXT: [[#TP:]]: -// CHECK-SAME: [B1.[[#TP+10]]] -// CHECK-NEXT: [[#TP+1]]: [B1.[[#TP+10]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TP+2]]: [B1.[[#TP+9]]] -// CHECK-NEXT: [[#TP+3]]: [B1.[[#TP+9]]] = [B1.[[#TP+1]]] +// CHECK-SAME: [B1.[[#TP+11]]] +// CHECK-NEXT: [[#TP+1]]: [B1.[[#TP+11]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TP+2]]: [B1.[[#TP+10]]] +// CHECK-NEXT: [[#TP+3]]: [B1.[[#TP+10]]] = [B1.[[#TP+1]]] // CHECK-NEXT: [[#TP+4]]: cond // CHECK-NEXT: [[#TP+5]]: [B1.[[#TP+4]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-NEXT: [[#TP+6]]: [B1.[[#TP+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) // CHECK-NEXT: [[#TP+7]]: fp // CHECK-NEXT: [[#TP+8]]: rd -// CHECK-NEXT: [[#TP+9]]: argc -// CHECK-NEXT: [[#TP+10]]: x -// CHECK-NEXT: [[#TP+11]]: #pragma omp target parallel if(cond) firstprivate(fp) reduction(+: rd) +// CHECK-NEXT: [[#TP+9]]: map +// CHECK-NEXT: [[#TP+10]]: argc +// CHECK-NEXT: [[#TP+11]]: x +// CHECK-NEXT: [[#TP+12]]: #pragma omp target parallel if(cond) firstprivate(fp) reduction(+: rd) map(to: map) // CHECK-NEXT: [B1.[[#TP+3]]]; -#pragma omp target parallel if(cond) firstprivate(fp) reduction(+:rd) +#pragma omp target parallel if(cond) firstprivate(fp) reduction(+:rd) map(to:map) argc = x; // CHECK-NEXT: [[#TSIMD:]]: // CHECK-SAME: [B1.[[#TSIMD+13]]] @@ -291,10 +295,10 @@ void xxx(int argc) { // CHECK-NEXT: [[#TSIMD+11]]: [B1.[[#TSIMD+10]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-NEXT: [[#TSIMD+12]]: argc // CHECK-NEXT: [[#TSIMD+13]]: x -// CHECK-NEXT: [[#TSIMD+14]]: #pragma omp target simd if(cond) firstprivate(fp) reduction(+: rd) linear(lin: step) +// CHECK-NEXT: [[#TSIMD+14]]: #pragma omp target simd if(cond) firstprivate(fp) reduction(+: rd) linear(lin: step) map(alloc: map) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: [B1.[[#TSIMD+3]]]; -#pragma omp target simd if(cond) firstprivate(fp) reduction(+:rd) linear(lin: step) +#pragma omp target simd if(cond) firstprivate(fp) reduction(+:rd) linear(lin: step) map(alloc:map) for (int i = 0; i < 10; ++i) argc = x; // CHECK-NEXT: [[#TTD:]]: @@ -309,10 +313,10 @@ void xxx(int argc) { // CHECK-NEXT: [[#TTD+8]]: rd // CHECK-NEXT: [[#TTD+9]]: argc // CHECK-NEXT: [[#TTD+10]]: x -// CHECK-NEXT: [[#TTD+11]]: #pragma omp target teams distribute if(cond) firstprivate(fp) reduction(+: rd) +// CHECK-NEXT: [[#TTD+11]]: #pragma omp target teams distribute if(cond) firstprivate(fp) reduction(+: rd) map(release: map) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: [B1.[[#TTD+3]]]; -#pragma omp target teams distribute if(cond) firstprivate(fp) reduction(+:rd) +#pragma omp target teams distribute if(cond) firstprivate(fp) reduction(+:rd) map(release:map) for (int i = 0; i < 10; ++i) argc = x; // CHECK-NEXT: [[#TTDPF:]]: @@ -327,10 +331,10 @@ void xxx(int argc) { // CHECK-NEXT: [[#TTDPF+8]]: rd // CHECK-NEXT: [[#TTDPF+9]]: argc // CHECK-NEXT: [[#TTDPF+10]]: x -// CHECK-NEXT: [[#TTDPF+11]]: #pragma omp target teams distribute parallel for if(cond) firstprivate(fp) reduction(+: rd) +// CHECK-NEXT: [[#TTDPF+11]]: #pragma omp target teams distribute parallel for if(cond) firstprivate(fp) reduction(+: rd) map(delete: map) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: [B1.[[#TTDPF+3]]]; -#pragma omp target teams distribute parallel for if(cond) firstprivate(fp) reduction(+:rd) +#pragma omp target teams distribute parallel for if(cond) firstprivate(fp) reduction(+:rd) map(delete:map) for (int i = 0; i < 10; ++i) argc = x; // CHECK-NEXT: [[#TTDPFS:]]: @@ -345,45 +349,47 @@ void xxx(int argc) { // CHECK-NEXT: [[#TTDPFS+8]]: rd // CHECK-NEXT: [[#TTDPFS+9]]: argc // CHECK-NEXT: [[#TTDPFS+10]]: x -// CHECK-NEXT: [[#TTDPFS+11]]: #pragma omp target teams distribute parallel for simd if(parallel: cond) firstprivate(fp) reduction(+: rd) +// CHECK-NEXT: [[#TTDPFS+11]]: #pragma omp target teams distribute parallel for simd if(parallel: cond) firstprivate(fp) reduction(+: rd) map(from: map) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: [B1.[[#TTDPFS+3]]]; -#pragma omp target teams distribute parallel for simd if(parallel:cond) firstprivate(fp) reduction(+:rd) +#pragma omp target teams distribute parallel for simd if(parallel:cond) firstprivate(fp) reduction(+:rd) map(from:map) for (int i = 0; i < 10; ++i) argc = x; // CHECK-NEXT: [[#TTDS:]]: -// CHECK-SAME: [B1.[[#TTDS+10]]] -// CHECK-NEXT: [[#TTDS+1]]: [B1.[[#TTDS+10]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTDS+2]]: [B1.[[#TTDS+9]]] -// CHECK-NEXT: [[#TTDS+3]]: [B1.[[#TTDS+9]]] = [B1.[[#TTDS+1]]] +// CHECK-SAME: [B1.[[#TTDS+11]]] +// CHECK-NEXT: [[#TTDS+1]]: [B1.[[#TTDS+11]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTDS+2]]: [B1.[[#TTDS+10]]] +// CHECK-NEXT: [[#TTDS+3]]: [B1.[[#TTDS+10]]] = [B1.[[#TTDS+1]]] // CHECK-NEXT: [[#TTDS+4]]: cond // CHECK-NEXT: [[#TTDS+5]]: [B1.[[#TTDS+4]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-NEXT: [[#TTDS+6]]: [B1.[[#TTDS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) // CHECK-NEXT: [[#TTDS+7]]: fp // CHECK-NEXT: [[#TTDS+8]]: rd -// CHECK-NEXT: [[#TTDS+9]]: argc -// CHECK-NEXT: [[#TTDS+10]]: x -// CHECK-NEXT: [[#TTDS+11]]: #pragma omp target teams distribute simd if(cond) firstprivate(fp) reduction(+: rd) +// CHECK-NEXT: [[#TTDS+9]]: map +// CHECK-NEXT: [[#TTDS+10]]: argc +// CHECK-NEXT: [[#TTDS+11]]: x +// CHECK-NEXT: [[#TTDS+12]]: #pragma omp target teams distribute simd if(cond) firstprivate(fp) reduction(+: rd) map(tofrom: map) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: [B1.[[#TTDS+3]]]; -#pragma omp target teams distribute simd if(cond) firstprivate(fp) reduction(+:rd) +#pragma omp target teams distribute simd if(cond) firstprivate(fp) reduction(+:rd) map(map) for (int i = 0; i < 10; ++i) argc = x; // CHECK-NEXT: [[#TT:]]: -// CHECK-SAME: [B1.[[#TT+10]]] -// CHECK-NEXT: [[#TT+1]]: [B1.[[#TT+10]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TT+2]]: [B1.[[#TT+9]]] -// CHECK-NEXT: [[#TT+3]]: [B1.[[#TT+9]]] = [B1.[[#TT+1]]] +// CHECK-SAME: [B1.[[#TT+11]]] +// CHECK-NEXT: [[#TT+1]]: [B1.[[#TT+11]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TT+2]]: [B1.[[#TT+10]]] +// CHECK-NEXT: [[#TT+3]]: [B1.[[#TT+10]]] = [B1.[[#TT+1]]] // CHECK-NEXT: [[#TT+4]]: cond // CHECK-NEXT: [[#TT+5]]: [B1.[[#TT+4]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-NEXT: [[#TT+6]]: [B1.[[#TT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) // CHECK-NEXT: [[#TT+7]]: fp // CHECK-NEXT: [[#TT+8]]: rd -// CHECK-NEXT: [[#TT+9]]: argc -// CHECK-NEXT: [[#TT+10]]: x -// CHECK-NEXT: [[#TT+11]]: #pragma omp target teams if(cond) firstprivate(fp) reduction(+: rd) +// CHECK-NEXT: [[#TT+9]]: map +// CHECK-NEXT: [[#TT+10]]: argc +// CHECK-NEXT: [[#TT+11]]: x +// CHECK-NEXT: [[#TT+12]]: #pragma omp target teams if(cond) firstprivate(fp) reduction(+: rd) map(tofrom: map) // CHECK-NEXT: [B1.[[#TT+3]]]; -#pragma omp target teams if(cond) firstprivate(fp) reduction(+:rd) +#pragma omp target teams if(cond) firstprivate(fp) reduction(+:rd) map(tofrom:map) argc = x; // CHECK-NEXT: [[#TU:]]: cond // CHECK-NEXT: [[#TU+1]]: [B1.[[#TU]]] (ImplicitCastExpr, LValueToRValue, int) diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c new file mode 100644 index 0000000000000..c4a4cf3d97526 --- /dev/null +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c @@ -0,0 +1,16 @@ +// Test CF+LF are properly handled along with quoted, multi-line #error +// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s + +#ifndef TEST +#error "message \ + more message \ + even more" +#endif + +#ifdef OTHER +#include +#endif + +// CHECK: #ifdef OTHER +// CHECK-NEXT: #include +// CHECK-NEXT: #endif diff --git a/clang/test/OpenMP/target_data_messages.c b/clang/test/OpenMP/target_data_messages.c index 9497ddba026de..7dd48f7e507e5 100644 --- a/clang/test/OpenMP/target_data_messages.c +++ b/clang/test/OpenMP/target_data_messages.c @@ -4,6 +4,13 @@ void foo() { } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target data map(map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + int main(int argc, char **argv) { int a; #pragma omp target data // expected-error {{expected at least one 'map' or 'use_device_ptr' clause for '#pragma omp target data'}} diff --git a/clang/test/OpenMP/target_enter_data_map_messages.c b/clang/test/OpenMP/target_enter_data_map_messages.c index cd082c63f85fa..c2701737b706f 100644 --- a/clang/test/OpenMP/target_enter_data_map_messages.c +++ b/clang/test/OpenMP/target_enter_data_map_messages.c @@ -4,6 +4,13 @@ // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - -x c++ %s -Wuninitialized +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target enter data map(to: map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + int main(int argc, char **argv) { int r; diff --git a/clang/test/OpenMP/target_map_messages.cpp b/clang/test/OpenMP/target_map_messages.cpp index 15f7b4fe072e4..56c93915a0704 100644 --- a/clang/test/OpenMP/target_map_messages.cpp +++ b/clang/test/OpenMP/target_map_messages.cpp @@ -20,6 +20,14 @@ void foo(int arg) { } #else +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target map(tofrom: map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + + struct SREF { int &a; int b; diff --git a/clang/test/OpenMP/target_parallel_for_map_messages.cpp b/clang/test/OpenMP/target_parallel_for_map_messages.cpp index 0f02350e29fa6..3eba4ba82bc32 100644 --- a/clang/test/OpenMP/target_parallel_for_map_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_map_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target parallel for map(tofrom: map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note 2 {{declared here}} extern S1 a; class S2 { diff --git a/clang/test/OpenMP/target_parallel_for_simd_map_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_map_messages.cpp index 6f7dc528d8481..92dbcf7f6f6da 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_map_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_map_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target parallel for simd map(map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note 2 {{declared here}} extern S1 a; class S2 { diff --git a/clang/test/OpenMP/target_parallel_map_messages.cpp b/clang/test/OpenMP/target_parallel_map_messages.cpp index a7a4e1cd9c201..362401e59b2cc 100644 --- a/clang/test/OpenMP/target_parallel_map_messages.cpp +++ b/clang/test/OpenMP/target_parallel_map_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target parallel map(tofrom: map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note 2 {{declared here}} extern S1 a; class S2 { diff --git a/clang/test/OpenMP/target_simd_map_messages.cpp b/clang/test/OpenMP/target_simd_map_messages.cpp index a93d20eb35fee..7e85cc094a369 100644 --- a/clang/test/OpenMP/target_simd_map_messages.cpp +++ b/clang/test/OpenMP/target_simd_map_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target simd map(to: map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note 2 {{declared here}} extern S1 a; class S2 { diff --git a/clang/test/OpenMP/target_teams_distribute_map_messages.cpp b/clang/test/OpenMP/target_teams_distribute_map_messages.cpp index f14233f49c055..f1ad2ecaa07cc 100644 --- a/clang/test/OpenMP/target_teams_distribute_map_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_map_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target teams distribute map(map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note 2 {{declared here}} extern S1 a; class S2 { diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp index c67b1835345a5..11aaaf2e88549 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target teams distribute parallel for map(tofrom: map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note 2 {{declared here}} extern S1 a; class S2 { diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp index 908184805a8c0..03d0c237b929e 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target teams distribute parallel for simd map(to: map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note 2 {{declared here}} extern S1 a; class S2 { diff --git a/clang/test/OpenMP/target_teams_distribute_simd_map_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_map_messages.cpp index 313bd7400a7d7..ef49201104d6f 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_map_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_map_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target teams distribute simd map(map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note 2 {{declared here}} extern S1 a; class S2 { diff --git a/clang/test/OpenMP/target_teams_map_messages.cpp b/clang/test/OpenMP/target_teams_map_messages.cpp index b4af6cb4ab1a5..86106b5d00a08 100644 --- a/clang/test/OpenMP/target_teams_map_messages.cpp +++ b/clang/test/OpenMP/target_teams_map_messages.cpp @@ -21,6 +21,14 @@ void foo(int arg) { {} } #else + +void xxx(int argc) { + int map; // expected-note {{initialize the variable 'map' to silence this warning}} +#pragma omp target teams map(tofrom: map) // expected-warning {{variable 'map' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + template struct SA { static int ss; diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index 92da621fabbf9..39eabd926903e 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -89,13 +89,8 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Formal argument #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString() << '\n'; -#endif - llvm_unreachable(nullptr); - } + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) + report_fatal_error("unable to allocate function argument #" + Twine(i)); } } @@ -121,13 +116,8 @@ void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Return operand #" << i << " has unhandled type " - << EVT(VT).getEVTString() << '\n'; -#endif - llvm_unreachable(nullptr); - } + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + report_fatal_error("unable to allocate function return #" + Twine(i)); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index be133b19c2695..f8a54a61aac22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -24,15 +24,7 @@ def CC_SI : CallingConv<[ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, - SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47, - SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55, - SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63, - SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71, - SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79, - SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87, - SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95, - SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103, - SGPR104, SGPR105 + SGPR40, SGPR41, SGPR42, SGPR43 ]>>>, // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. @@ -64,15 +56,7 @@ def RetCC_SI_Shader : CallingConv<[ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, - SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47, - SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55, - SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63, - SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71, - SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79, - SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87, - SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95, - SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103, - SGPR104, SGPR105 + SGPR40, SGPR41, SGPR42, SGPR43 ]>>, // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index c3be8c4e9a907..d304f98ceaed1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4183,8 +4183,10 @@ static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) { ConstantInt *Op1C = (Call.getNumArgOperands() == 1) ? nullptr : dyn_cast(Call.getOperand(1)); + // Bail out if the allocation size is zero. if ((Op0C && Op0C->isNullValue()) || (Op1C && Op1C->isNullValue())) return; + if (isMallocLikeFn(&Call, TLI) && Op0C) { Call.addAttribute(AttributeList::ReturnIndex, Attribute::getWithDereferenceableOrNullBytes( diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index dad38e38cabda..3a01d9cb4f4fa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -988,12 +988,14 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } // If one of the GEPs has all zero indices, recurse. - if (GEPLHS->hasAllZeroIndices()) + // FIXME: Handle vector of pointers. + if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices()) return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0), ICmpInst::getSwappedPredicate(Cond), I); // If the other GEP has all zero indices, recurse. - if (GEPRHS->hasAllZeroIndices()) + // FIXME: Handle vector of pointers. + if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices()) return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 71883b5692d3b..6a7f90a11a59d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -739,7 +739,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Value *LHS, Value *RHS, Instruction *CxtI) const; /// Maximum size of array considered when transforming. - uint64_t MaxArraySizeForCombine; + uint64_t MaxArraySizeForCombine = 0; private: /// Performs a few simplifications for operators which are associative diff --git a/llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll new file mode 100644 index 0000000000000..4352e411d277b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll @@ -0,0 +1,138 @@ +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s + +; CHECK: s_add_i32 s0, s0, s1 +; CHECK: s_add_i32 s1, s0, s2 +; CHECK: s_add_i32 s2, s1, s3 +; CHECK: s_add_i32 s3, s2, s4 +; CHECK: s_add_i32 s4, s3, s5 +; CHECK: s_add_i32 s5, s4, s6 +; CHECK: s_add_i32 s6, s5, s7 +; CHECK: s_add_i32 s7, s6, s8 +; CHECK: s_add_i32 s8, s7, s9 +; CHECK: s_add_i32 s9, s8, s10 +; CHECK: s_add_i32 s10, s9, s11 +; CHECK: s_add_i32 s11, s10, s12 +; CHECK: s_add_i32 s12, s11, s13 +; CHECK: s_add_i32 s13, s12, s14 +; CHECK: s_add_i32 s14, s13, s15 +; CHECK: s_add_i32 s15, s14, s16 +; CHECK: s_add_i32 s16, s15, s17 +; CHECK: s_add_i32 s17, s16, s18 +; CHECK: s_add_i32 s18, s17, s19 +; CHECK: s_add_i32 s19, s18, s20 +; CHECK: s_add_i32 s20, s19, s21 +; CHECK: s_add_i32 s21, s20, s22 +; CHECK: s_add_i32 s22, s21, s23 +; CHECK: s_add_i32 s23, s22, s24 +; CHECK: s_add_i32 s24, s23, s25 +; CHECK: s_add_i32 s25, s24, s26 +; CHECK: s_add_i32 s26, s25, s27 +; CHECK: s_add_i32 s27, s26, s28 +; CHECK: s_add_i32 s28, s27, s29 +define amdgpu_gs { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } @_amdgpu_gs_sgpr_limit_i32 (i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg) { +.entry: + %30 = add i32 %0, %1 + %31 = add i32 %30, %2 + %32 = add i32 %31, %3 + %33 = add i32 %32, %4 + %34 = add i32 %33, %5 + %35 = add i32 %34, %6 + %36 = add i32 %35, %7 + %37 = add i32 %36, %8 + %38 = add i32 %37, %9 + %39 = add i32 %38, %10 + %40 = add i32 %39, %11 + %41 = add i32 %40, %12 + %42 = add i32 %41, %13 + %43 = add i32 %42, %14 + %44 = add i32 %43, %15 + %45 = add i32 %44, %16 + %46 = add i32 %45, %17 + %47 = add i32 %46, %18 + %48 = add i32 %47, %19 + %49 = add i32 %48, %20 + %50 = add i32 %49, %21 + %51 = add i32 %50, %22 + %52 = add i32 %51, %23 + %53 = add i32 %52, %24 + %54 = add i32 %53, %25 + %55 = add i32 %54, %26 + %56 = add i32 %55, %27 + %57 = add i32 %56, %28 + %58 = add i32 %57, %29 + %59 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } undef, i32 %30, 0 + %60 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %59, i32 %31, 1 + %61 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %60, i32 %32, 2 + %62 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %61, i32 %33, 3 + %63 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %62, i32 %34, 4 + %64 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %63, i32 %35, 5 + %65 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %64, i32 %36, 6 + %66 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %65, i32 %37, 7 + %67 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %66, i32 %38, 8 + %68 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %67, i32 %39, 9 + %69 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %68, i32 %40, 10 + %70 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %69, i32 %41, 11 + %71 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %70, i32 %42, 12 + %72 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %71, i32 %43, 13 + %73 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %72, i32 %44, 14 + %74 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %73, i32 %45, 15 + %75 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %74, i32 %46, 16 + %76 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %75, i32 %47, 17 + %77 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %76, i32 %48, 18 + %78 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %77, i32 %49, 19 + %79 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %78, i32 %50, 20 + %80 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %79, i32 %51, 21 + %81 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %80, i32 %52, 22 + %82 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %81, i32 %53, 23 + %83 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %82, i32 %54, 24 + %84 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %83, i32 %55, 25 + %85 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %84, i32 %56, 26 + %86 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %85, i32 %57, 27 + %87 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %86, i32 %58, 28 + ret { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %87 +} + +; CHECK: s_xor_b64 s[0:1], s[0:1], s[2:3] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[4:5] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[6:7] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[8:9] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[10:11] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[12:13] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[14:15] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[16:17] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[18:19] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[20:21] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[22:23] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[24:25] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[26:27] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[28:29] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[30:31] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[32:33] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[34:35] +define amdgpu_gs void @_amdgpu_gs_sgpr_limit_i64(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, <4 x i32> inreg %addr) { +.entry: + %19 = xor i64 %0, %1 + %20 = xor i64 %19, %2 + %21 = xor i64 %20, %3 + %22 = xor i64 %21, %4 + %23 = xor i64 %22, %5 + %24 = xor i64 %23, %6 + %25 = xor i64 %24, %7 + %26 = xor i64 %25, %8 + %27 = xor i64 %26, %9 + %28 = xor i64 %27, %10 + %29 = xor i64 %28, %11 + %30 = xor i64 %29, %12 + %31 = xor i64 %30, %13 + %32 = xor i64 %31, %14 + %33 = xor i64 %32, %15 + %34 = xor i64 %33, %16 + %35 = xor i64 %34, %17 + %36 = bitcast i64 %35 to <2 x i32> + call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %36, <4 x i32> %addr, i32 4, i32 0, i32 0) + ret void +} + +declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll b/llvm/test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll new file mode 100644 index 0000000000000..72c6cfee28df6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll @@ -0,0 +1,101 @@ +; RUN: not llc -march=amdgcn -mcpu=verde -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s + +;CHECK: LLVM ERROR: unable to allocate function argument +define amdgpu_gs { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } @_amdgpu_gs_sgpr_i32 (i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg) { +.entry: + %46 = add i32 %0, %1 + %47 = add i32 %46, %2 + %48 = add i32 %47, %3 + %49 = add i32 %48, %4 + %50 = add i32 %49, %5 + %51 = add i32 %50, %6 + %52 = add i32 %51, %7 + %53 = add i32 %52, %8 + %54 = add i32 %53, %9 + %55 = add i32 %54, %10 + %56 = add i32 %55, %11 + %57 = add i32 %56, %12 + %58 = add i32 %57, %13 + %59 = add i32 %58, %14 + %60 = add i32 %59, %15 + %61 = add i32 %60, %16 + %62 = add i32 %61, %17 + %63 = add i32 %62, %18 + %64 = add i32 %63, %19 + %65 = add i32 %64, %20 + %66 = add i32 %65, %21 + %67 = add i32 %66, %22 + %68 = add i32 %67, %23 + %69 = add i32 %68, %24 + %70 = add i32 %69, %25 + %71 = add i32 %70, %26 + %72 = add i32 %71, %27 + %73 = add i32 %72, %28 + %74 = add i32 %73, %29 + %75 = add i32 %74, %30 + %76 = add i32 %75, %31 + %77 = add i32 %76, %32 + %78 = add i32 %77, %33 + %79 = add i32 %78, %34 + %80 = add i32 %79, %35 + %81 = add i32 %80, %36 + %82 = add i32 %81, %37 + %83 = add i32 %82, %38 + %84 = add i32 %83, %39 + %85 = add i32 %84, %40 + %86 = add i32 %85, %41 + %87 = add i32 %86, %42 + %88 = add i32 %87, %43 + %89 = add i32 %88, %44 + %90 = add i32 %89, %45 + %91 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } undef, i32 %46, 0 + %92 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %91, i32 %47, 1 + %93 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %92, i32 %48, 2 + %94 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %93, i32 %49, 3 + %95 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %94, i32 %50, 4 + %96 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %95, i32 %51, 5 + %97 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %96, i32 %52, 6 + %98 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %97, i32 %53, 7 + %99 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %98, i32 %54, 8 + %100 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %99, i32 %55, 9 + %101 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %100, i32 %56, 10 + %102 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %101, i32 %57, 11 + %103 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %102, i32 %58, 12 + %104 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %103, i32 %59, 13 + %105 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %104, i32 %60, 14 + %106 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %105, i32 %61, 15 + %107 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %106, i32 %62, 16 + %108 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %107, i32 %63, 17 + %109 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %108, i32 %64, 18 + %110 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %109, i32 %65, 19 + %111 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %110, i32 %66, 20 + %112 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %111, i32 %67, 21 + %113 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %112, i32 %68, 22 + %114 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %113, i32 %69, 23 + %115 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %114, i32 %70, 24 + %116 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %115, i32 %71, 25 + %117 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %116, i32 %72, 26 + %118 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %117, i32 %73, 27 + %119 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %118, i32 %74, 28 + %120 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %119, i32 %75, 29 + %121 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %120, i32 %76, 30 + %122 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %121, i32 %77, 31 + %123 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %122, i32 %78, 32 + %124 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %123, i32 %79, 33 + %125 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %124, i32 %80, 34 + %126 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %125, i32 %81, 35 + %127 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %126, i32 %82, 36 + %128 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %127, i32 %83, 37 + %129 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %128, i32 %84, 38 + %130 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %129, i32 %85, 39 + %131 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %130, i32 %86, 40 + %132 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %131, i32 %87, 41 + %133 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %132, i32 %88, 42 + %134 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %133, i32 %89, 43 + %135 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %134, i32 %90, 44 + ret { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %135 +} + +declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/sgpr-limit.ll deleted file mode 100644 index 364cfd880db77..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/sgpr-limit.ll +++ /dev/null @@ -1,265 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s - -; CHECK: s_add_i32 s0, s0, s1 -; CHECK: s_add_i32 s1, s0, s2 -; CHECK: s_add_i32 s2, s1, s3 -; CHECK: s_add_i32 s3, s2, s4 -; CHECK: s_add_i32 s4, s3, s5 -; CHECK: s_add_i32 s5, s4, s6 -; CHECK: s_add_i32 s6, s5, s7 -; CHECK: s_add_i32 s7, s6, s8 -; CHECK: s_add_i32 s8, s7, s9 -; CHECK: s_add_i32 s9, s8, s10 -; CHECK: s_add_i32 s10, s9, s11 -; CHECK: s_add_i32 s11, s10, s12 -; CHECK: s_add_i32 s12, s11, s13 -; CHECK: s_add_i32 s13, s12, s14 -; CHECK: s_add_i32 s14, s13, s15 -; CHECK: s_add_i32 s15, s14, s16 -; CHECK: s_add_i32 s16, s15, s17 -; CHECK: s_add_i32 s17, s16, s18 -; CHECK: s_add_i32 s18, s17, s19 -; CHECK: s_add_i32 s19, s18, s20 -; CHECK: s_add_i32 s20, s19, s21 -; CHECK: s_add_i32 s21, s20, s22 -; CHECK: s_add_i32 s22, s21, s23 -; CHECK: s_add_i32 s23, s22, s24 -; CHECK: s_add_i32 s24, s23, s25 -; CHECK: s_add_i32 s25, s24, s26 -; CHECK: s_add_i32 s26, s25, s27 -; CHECK: s_add_i32 s27, s26, s28 -; CHECK: s_add_i32 s28, s27, s29 -; CHECK: s_add_i32 s29, s28, s30 -; CHECK: s_add_i32 s30, s29, s31 -; CHECK: s_add_i32 s31, s30, s32 -; CHECK: s_add_i32 s32, s31, s33 -; CHECK: s_add_i32 s33, s32, s34 -; CHECK: s_add_i32 s34, s33, s35 -; CHECK: s_add_i32 s35, s34, s36 -; CHECK: s_add_i32 s36, s35, s37 -; CHECK: s_add_i32 s37, s36, s38 -; CHECK: s_add_i32 s38, s37, s39 -; CHECK: s_add_i32 s39, s38, s40 -; CHECK: s_add_i32 s40, s39, s41 -; CHECK: s_add_i32 s41, s40, s42 -; CHECK: s_add_i32 s42, s41, s43 -; CHECK: s_add_i32 s43, s42, s44 -; CHECK: s_add_i32 s44, s43, s45 -; CHECK: s_add_i32 s45, s44, s46 -; CHECK: s_add_i32 s46, s45, s47 -; CHECK: s_add_i32 s47, s46, s48 -; CHECK: s_add_i32 s48, s47, s49 -; CHECK: s_add_i32 s49, s48, s50 -; CHECK: s_add_i32 s50, s49, s51 -; CHECK: s_add_i32 s51, s50, s52 -; CHECK: s_add_i32 s52, s51, s53 -; CHECK: s_add_i32 s53, s52, s54 -; CHECK: s_add_i32 s54, s53, s55 -; CHECK: s_add_i32 s55, s54, s56 -; CHECK: s_add_i32 s56, s55, s57 -; CHECK: s_add_i32 s57, s56, s58 -; CHECK: s_add_i32 s58, s57, s59 -; CHECK: s_add_i32 s59, s58, s60 -; CHECK: s_add_i32 s60, s59, s61 -; CHECK: s_add_i32 s61, s60, s62 -; CHECK: s_add_i32 s62, s61, s63 -define amdgpu_gs { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } @_amdgpu_gs_sgpr_limit_i32 (i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, <4 x i32> inreg) { -.entry: - %65 = add i32 %0, %1 - %66 = add i32 %65, %2 - %67 = add i32 %66, %3 - %68 = add i32 %67, %4 - %69 = add i32 %68, %5 - %70 = add i32 %69, %6 - %71 = add i32 %70, %7 - %72 = add i32 %71, %8 - %73 = add i32 %72, %9 - %74 = add i32 %73, %10 - %75 = add i32 %74, %11 - %76 = add i32 %75, %12 - %77 = add i32 %76, %13 - %78 = add i32 %77, %14 - %79 = add i32 %78, %15 - %80 = add i32 %79, %16 - %81 = add i32 %80, %17 - %82 = add i32 %81, %18 - %83 = add i32 %82, %19 - %84 = add i32 %83, %20 - %85 = add i32 %84, %21 - %86 = add i32 %85, %22 - %87 = add i32 %86, %23 - %88 = add i32 %87, %24 - %89 = add i32 %88, %25 - %90 = add i32 %89, %26 - %91 = add i32 %90, %27 - %92 = add i32 %91, %28 - %93 = add i32 %92, %29 - %94 = add i32 %93, %30 - %95 = add i32 %94, %31 - %96 = add i32 %95, %32 - %97 = add i32 %96, %33 - %98 = add i32 %97, %34 - %99 = add i32 %98, %35 - %100 = add i32 %99, %36 - %101 = add i32 %100, %37 - %102 = add i32 %101, %38 - %103 = add i32 %102, %39 - %104 = add i32 %103, %40 - %105 = add i32 %104, %41 - %106 = add i32 %105, %42 - %107 = add i32 %106, %43 - %108 = add i32 %107, %44 - %109 = add i32 %108, %45 - %110 = add i32 %109, %46 - %111 = add i32 %110, %47 - %112 = add i32 %111, %48 - %113 = add i32 %112, %49 - %114 = add i32 %113, %50 - %115 = add i32 %114, %51 - %116 = add i32 %115, %52 - %117 = add i32 %116, %53 - %118 = add i32 %117, %54 - %119 = add i32 %118, %55 - %120 = add i32 %119, %56 - %121 = add i32 %120, %57 - %122 = add i32 %121, %58 - %123 = add i32 %122, %59 - %124 = add i32 %123, %60 - %125 = add i32 %124, %61 - %126 = add i32 %125, %62 - %127 = add i32 %126, %63 -%128 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } undef, i32 %65, 0 -%129 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %128, i32 %66, 1 -%130 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %129, i32 %67, 2 -%131 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %130, i32 %68, 3 -%132 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %131, i32 %69, 4 -%133 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %132, i32 %70, 5 -%134 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %133, i32 %71, 6 -%135 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %134, i32 %72, 7 -%136 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %135, i32 %73, 8 -%137 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %136, i32 %74, 9 -%138 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %137, i32 %75, 10 -%139 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %138, i32 %76, 11 -%140 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %139, i32 %77, 12 -%141 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %140, i32 %78, 13 -%142 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %141, i32 %79, 14 -%143 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %142, i32 %80, 15 -%144 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %143, i32 %81, 16 -%145 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %144, i32 %82, 17 -%146 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %145, i32 %83, 18 -%147 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %146, i32 %84, 19 -%148 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %147, i32 %85, 20 -%149 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %148, i32 %86, 21 -%150 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %149, i32 %87, 22 -%151 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %150, i32 %88, 23 -%152 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %151, i32 %89, 24 -%153 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %152, i32 %90, 25 -%154 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %153, i32 %91, 26 -%155 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %154, i32 %92, 27 -%156 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %155, i32 %93, 28 -%157 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %156, i32 %94, 29 -%158 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %157, i32 %95, 30 -%159 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %158, i32 %96, 31 -%160 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %159, i32 %97, 32 -%161 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %160, i32 %98, 33 -%162 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %161, i32 %99, 34 -%163 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %162, i32 %100, 35 -%164 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %163, i32 %101, 36 -%165 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %164, i32 %102, 37 -%166 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %165, i32 %103, 38 -%167 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %166, i32 %104, 39 -%168 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %167, i32 %105, 40 -%169 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %168, i32 %106, 41 -%170 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %169, i32 %107, 42 -%171 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %170, i32 %108, 43 -%172 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %171, i32 %109, 44 -%173 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %172, i32 %110, 45 -%174 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %173, i32 %111, 46 -%175 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %174, i32 %112, 47 -%176 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %175, i32 %113, 48 -%177 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %176, i32 %114, 49 -%178 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %177, i32 %115, 50 -%179 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %178, i32 %116, 51 -%180 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %179, i32 %117, 52 -%181 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %180, i32 %118, 53 -%182 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %181, i32 %119, 54 -%183 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %182, i32 %120, 55 -%184 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %183, i32 %121, 56 -%185 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %184, i32 %122, 57 -%186 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %185, i32 %123, 58 -%187 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %186, i32 %124, 59 -%188 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %187, i32 %125, 60 -%189 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %188, i32 %126, 61 -%190 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %189, i32 %127, 62 - ret { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %190 -} - -; CHECK: s_xor_b64 s[0:1], s[0:1], s[2:3] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[4:5] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[6:7] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[8:9] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[10:11] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[12:13] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[14:15] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[16:17] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[18:19] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[20:21] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[22:23] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[24:25] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[26:27] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[28:29] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[30:31] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[32:33] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[34:35] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[36:37] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[38:39] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[40:41] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[42:43] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[44:45] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[46:47] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[48:49] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[50:51] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[52:53] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[54:55] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[56:57] -; CHECK: s_xor_b64 s[0:1], s[0:1], s[58:59] -define amdgpu_gs void @_amdgpu_gs_sgpr_limit_i64 (i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, <4 x i32> inreg %addr) { -.entry: - %31 = xor i64 %0, %1 - %32 = xor i64 %31, %2 - %33 = xor i64 %32, %3 - %34 = xor i64 %33, %4 - %35 = xor i64 %34, %5 - %36 = xor i64 %35, %6 - %37 = xor i64 %36, %7 - %38 = xor i64 %37, %8 - %39 = xor i64 %38, %9 - %40 = xor i64 %39, %10 - %41 = xor i64 %40, %11 - %42 = xor i64 %41, %12 - %43 = xor i64 %42, %13 - %44 = xor i64 %43, %14 - %45 = xor i64 %44, %15 - %46 = xor i64 %45, %16 - %47 = xor i64 %46, %17 - %48 = xor i64 %47, %18 - %49 = xor i64 %48, %19 - %50 = xor i64 %49, %20 - %51 = xor i64 %50, %21 - %52 = xor i64 %51, %22 - %53 = xor i64 %52, %23 - %54 = xor i64 %53, %24 - %55 = xor i64 %54, %25 - %56 = xor i64 %55, %26 - %57 = xor i64 %56, %27 - %58 = xor i64 %57, %28 - %59 = xor i64 %58, %29 - %60 = bitcast i64 %59 to <2 x i32> - call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %60, <4 x i32> %addr, i32 4, i32 0, i32 0) - ret void -} - -declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) - diff --git a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll index e22653042a349..ac47d933ceb24 100644 --- a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll +++ b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll @@ -110,6 +110,18 @@ define <2 x i1> @test6(<2 x i32> %X, <2 x %S*> %P) nounwind { ret <2 x i1> %C } +; Same as above, but indices scalarized. +define <2 x i1> @test6b(<2 x i32> %X, <2 x %S*> %P) nounwind { +; CHECK-LABEL: @test6b( +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = getelementptr inbounds %S, <2 x %S*> %P, i32 0, i32 1, <2 x i32> %X + %B = getelementptr inbounds %S, <2 x %S*> %P, i32 0, i32 0 + %C = icmp eq <2 x i32*> %A, %B + ret <2 x i1> %C +} + @G = external global [3 x i8] define i8* @test7(i16 %Idx) { ; CHECK-LABEL: @test7( diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu index 4f24adace556f..3ed44f7580801 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu @@ -381,7 +381,7 @@ public: // Support for dispatch next INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) { - int lo, hi; + uint32_t lo, hi; __kmpc_impl_unpack(val, lo, hi); hi = __kmpc_impl_shfl_sync(active, hi, leader); lo = __kmpc_impl_shfl_sync(active, lo, leader); @@ -390,8 +390,8 @@ public: INLINE static uint64_t NextIter() { __kmpc_impl_lanemask_t active = __ACTIVEMASK(); - int leader = __kmpc_impl_ffs(active) - 1; - int change = __kmpc_impl_popc(active); + uint32_t leader = __kmpc_impl_ffs(active) - 1; + uint32_t change = __kmpc_impl_popc(active); __kmpc_impl_lanemask_t lane_mask_lt = __kmpc_impl_lanemask_lt(); unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt); uint64_t warp_res; diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu index 182a4f68cc579..ee47cc4e5810a 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -49,13 +49,12 @@ EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask, int32_t *LaneId, int32_t *NumLanes) { PRINT0(LD_IO, "call to __kmpc_kernel_convergent_simd\n"); uint32_t ConvergentMask = Mask; - int32_t ConvergentSize = __popc(ConvergentMask); + int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask); uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); - *LaneSource += __ffs(WorkRemaining); - *IsFinal = __popc(WorkRemaining) == 1; - uint32_t lanemask_lt; - asm("mov.u32 %0, %%lanemask_lt;" : "=r"(lanemask_lt)); - *LaneId = __popc(ConvergentMask & lanemask_lt); + *LaneSource += __kmpc_impl_ffs(WorkRemaining); + *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1; + uint32_t lanemask_lt = __kmpc_impl_lanemask_lt(); + *LaneId = __kmpc_impl_popc(ConvergentMask & lanemask_lt); int threadId = GetLogicalThreadIdInBlock(isSPMDMode()); int sourceThreadId = (threadId & ~(WARPSIZE - 1)) + *LaneSource; @@ -123,13 +122,12 @@ EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask, int32_t *LaneSource) { PRINT0(LD_IO, "call to __kmpc_kernel_convergent_parallel\n"); uint32_t ConvergentMask = Mask; - int32_t ConvergentSize = __popc(ConvergentMask); + int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask); uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); - *LaneSource += __ffs(WorkRemaining); - *IsFinal = __popc(WorkRemaining) == 1; - uint32_t lanemask_lt; - asm("mov.u32 %0, %%lanemask_lt;" : "=r"(lanemask_lt)); - uint32_t OmpId = __popc(ConvergentMask & lanemask_lt); + *LaneSource += __kmpc_impl_ffs(WorkRemaining); + *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1; + uint32_t lanemask_lt = __kmpc_impl_lanemask_lt(); + uint32_t OmpId = __kmpc_impl_popc(ConvergentMask & lanemask_lt); int threadId = GetLogicalThreadIdInBlock(isSPMDMode()); int sourceThreadId = (threadId & ~(WARPSIZE - 1)) + *LaneSource; diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu index e5e76d553117e..ea53f613738df 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -28,12 +28,11 @@ EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) { } EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) { - int lo, hi; - asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val)); + uint32_t lo, hi; + __kmpc_impl_unpack(val, lo, hi); hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size); lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size); - asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi)); - return val; + return __kmpc_impl_pack(lo, hi); } INLINE static void gpu_regular_warp_reduce(void *reduce_data, @@ -60,18 +59,16 @@ INLINE static void gpu_irregular_warp_reduce(void *reduce_data, INLINE static uint32_t gpu_irregular_simd_reduce(void *reduce_data, kmp_ShuffleReductFctPtr shflFct) { - uint32_t lanemask_lt; - uint32_t lanemask_gt; uint32_t size, remote_id, physical_lane_id; physical_lane_id = GetThreadIdInBlock() % WARPSIZE; - asm("mov.u32 %0, %%lanemask_lt;" : "=r"(lanemask_lt)); + uint32_t lanemask_lt = __kmpc_impl_lanemask_lt(); uint32_t Liveness = __ACTIVEMASK(); - uint32_t logical_lane_id = __popc(Liveness & lanemask_lt) * 2; - asm("mov.u32 %0, %%lanemask_gt;" : "=r"(lanemask_gt)); + uint32_t logical_lane_id = __kmpc_impl_popc(Liveness & lanemask_lt) * 2; + uint32_t lanemask_gt = __kmpc_impl_lanemask_gt(); do { Liveness = __ACTIVEMASK(); - remote_id = __ffs(Liveness & lanemask_gt); - size = __popc(Liveness); + remote_id = __kmpc_impl_ffs(Liveness & lanemask_gt); + size = __kmpc_impl_popc(Liveness); logical_lane_id /= 2; shflFct(reduce_data, /*LaneId =*/logical_lane_id, /*Offset=*/remote_id - 1 - physical_lane_id, /*AlgoVersion=*/2); @@ -150,7 +147,7 @@ static int32_t nvptx_parallel_reduce_nowait( gpu_regular_warp_reduce(reduce_data, shflFct); else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes gpu_irregular_warp_reduce(reduce_data, shflFct, - /*LaneCount=*/__popc(Liveness), + /*LaneCount=*/__kmpc_impl_popc(Liveness), /*LaneId=*/GetThreadIdInBlock() % WARPSIZE); else if (!isRuntimeUninitialized) // Dispersed lanes. Only threads in L2 // parallel region may enter here; return @@ -325,7 +322,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars, gpu_regular_warp_reduce(reduce_data, shflFct); else // Partial warp but contiguous lanes gpu_irregular_warp_reduce(reduce_data, shflFct, - /*LaneCount=*/__popc(Liveness), + /*LaneCount=*/__kmpc_impl_popc(Liveness), /*LaneId=*/ThreadId % WARPSIZE); // When we have more than [warpsize] number of threads diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h index c1a8467964940..884982d9a9e72 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h @@ -206,9 +206,8 @@ INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); } INLINE void IncParallelLevel(bool ActiveParallel) { unsigned Active = __ACTIVEMASK(); __kmpc_impl_syncwarp(Active); - unsigned LaneMaskLt; - asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt)); - unsigned Rank = __popc(Active & LaneMaskLt); + unsigned LaneMaskLt = __kmpc_impl_lanemask_lt(); + unsigned Rank = __kmpc_impl_popc(Active & LaneMaskLt); if (Rank == 0) { parallelLevel[GetWarpId()] += (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0)); @@ -220,9 +219,8 @@ INLINE void IncParallelLevel(bool ActiveParallel) { INLINE void DecParallelLevel(bool ActiveParallel) { unsigned Active = __ACTIVEMASK(); __kmpc_impl_syncwarp(Active); - unsigned LaneMaskLt; - asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt)); - unsigned Rank = __popc(Active & LaneMaskLt); + unsigned LaneMaskLt = __kmpc_impl_lanemask_lt(); + unsigned Rank = __kmpc_impl_popc(Active & LaneMaskLt); if (Rank == 0) { parallelLevel[GetWarpId()] -= (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0)); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index b9f930d0da5ec..8986195a363bf 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -16,12 +16,12 @@ #include "option.h" -INLINE void __kmpc_impl_unpack(int64_t val, int32_t &lo, int32_t &hi) { +INLINE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) { asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val)); } -INLINE int64_t __kmpc_impl_pack(int32_t lo, int32_t hi) { - int64_t val; +INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) { + uint64_t val; asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi)); return val; } @@ -34,9 +34,15 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() { return res; } -INLINE int __kmpc_impl_ffs(uint32_t x) { return __ffs(x); } +INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() { + __kmpc_impl_lanemask_t res; + asm("mov.u32 %0, %%lanemask_gt;" : "=r"(res)); + return res; +} + +INLINE uint32_t __kmpc_impl_ffs(uint32_t x) { return __ffs(x); } -INLINE int __kmpc_impl_popc(uint32_t x) { return __popc(x); } +INLINE uint32_t __kmpc_impl_popc(uint32_t x) { return __popc(x); } #ifndef CUDA_VERSION #error CUDA_VERSION macro is undefined, something wrong with cuda.