Skip to content

Commit f4d87c4

Browse files
authored
[flang][cuda] Add asyncId to allocate entry point (llvm#134947)
1 parent fa273e1 commit f4d87c4

38 files changed

+110
-91
lines changed

flang-rt/include/flang-rt/runtime/descriptor.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
#include <cstdio>
3030
#include <cstring>
3131

32+
/// Value used for asyncId when no specific stream is specified.
33+
static constexpr std::int64_t kNoAsyncId = -1;
34+
3235
namespace Fortran::runtime {
3336

3437
class Terminator;
@@ -369,7 +372,7 @@ class Descriptor {
369372
// before calling. It (re)computes the byte strides after
370373
// allocation. Does not allocate automatic components or
371374
// perform default component initialization.
372-
RT_API_ATTRS int Allocate();
375+
RT_API_ATTRS int Allocate(std::int64_t asyncId);
373376
RT_API_ATTRS void SetByteStrides();
374377

375378
// Deallocates storage; does not call FINAL subroutines or

flang-rt/include/flang-rt/runtime/reduction-templates.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
347347
// as the element size of the source.
348348
result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
349349
CFI_attribute_allocatable);
350-
if (int stat{result.Allocate()}) {
350+
if (int stat{result.Allocate(kNoAsyncId)}) {
351351
terminator.Crash(
352352
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
353353
}

flang-rt/lib/cuda/allocatable.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
5353
}
5454
// Perform the standard allocation.
5555
int stat{RTNAME(AllocatableAllocate)(
56-
desc, hasStat, errMsg, sourceFile, sourceLine)};
56+
desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
5757
if (pinned) {
5858
// Set pinned according to stat. More infrastructre is needed to set it
5959
// closer to the actual allocation call.

flang-rt/lib/runtime/allocatable.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,17 @@ void RTDEF(AllocatableApplyMold)(
133133
}
134134
}
135135

136-
int RTDEF(AllocatableAllocate)(Descriptor &descriptor, bool hasStat,
137-
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
136+
int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
137+
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
138+
int sourceLine) {
138139
Terminator terminator{sourceFile, sourceLine};
139140
if (!descriptor.IsAllocatable()) {
140141
return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
141142
} else if (descriptor.IsAllocated()) {
142143
return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
143144
} else {
144-
int stat{ReturnError(terminator, descriptor.Allocate(), errMsg, hasStat)};
145+
int stat{
146+
ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
145147
if (stat == StatOk) {
146148
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
147149
if (const auto *derived{addendum->derivedType()}) {
@@ -160,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
160162
const Descriptor &source, bool hasStat, const Descriptor *errMsg,
161163
const char *sourceFile, int sourceLine) {
162164
int stat{RTNAME(AllocatableAllocate)(
163-
alloc, hasStat, errMsg, sourceFile, sourceLine)};
165+
alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
164166
if (stat == StatOk) {
165167
Terminator terminator{sourceFile, sourceLine};
166168
DoFromSourceAssign(alloc, source, terminator);

flang-rt/lib/runtime/array-constructor.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,17 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
5050
initialAllocationSize(fromElements, to.ElementBytes())};
5151
to.GetDimension(0).SetBounds(1, allocationSize);
5252
RTNAME(AllocatableAllocate)
53-
(to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
54-
vector.sourceLine);
53+
(to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
54+
vector.sourceFile, vector.sourceLine);
5555
to.GetDimension(0).SetBounds(1, fromElements);
5656
vector.actualAllocationSize = allocationSize;
5757
} else {
5858
// Do not over-allocate if the final extent was known before pushing the
5959
// first value: there should be no reallocation.
6060
RUNTIME_CHECK(terminator, previousToElements >= fromElements);
6161
RTNAME(AllocatableAllocate)
62-
(to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
63-
vector.sourceLine);
62+
(to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
63+
vector.sourceFile, vector.sourceLine);
6464
vector.actualAllocationSize = previousToElements;
6565
}
6666
} else {

flang-rt/lib/runtime/assign.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static RT_API_ATTRS int AllocateAssignmentLHS(
9999
toDim.SetByteStride(stride);
100100
stride *= toDim.Extent();
101101
}
102-
int result{ReturnError(terminator, to.Allocate())};
102+
int result{ReturnError(terminator, to.Allocate(kNoAsyncId))};
103103
if (result == StatOk && derived && !derived->noInitializationNeeded()) {
104104
result = ReturnError(terminator, Initialize(to, *derived, terminator));
105105
}
@@ -277,7 +277,7 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
277277
// entity, otherwise, the Deallocate() below will not
278278
// free the descriptor memory.
279279
newFrom.raw().attribute = CFI_attribute_allocatable;
280-
auto stat{ReturnError(terminator, newFrom.Allocate())};
280+
auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncId))};
281281
if (stat == StatOk) {
282282
if (HasDynamicComponent(from)) {
283283
// If 'from' has allocatable/automatic component, we cannot

flang-rt/lib/runtime/character.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ static RT_API_ATTRS void Compare(Descriptor &result, const Descriptor &x,
117117
for (int j{0}; j < rank; ++j) {
118118
result.GetDimension(j).SetBounds(1, ub[j]);
119119
}
120-
if (result.Allocate() != CFI_SUCCESS) {
120+
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
121121
terminator.Crash("Compare: could not allocate storage for result");
122122
}
123123
std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
@@ -172,7 +172,7 @@ static RT_API_ATTRS void AdjustLRHelper(Descriptor &result,
172172
for (int j{0}; j < rank; ++j) {
173173
result.GetDimension(j).SetBounds(1, ub[j]);
174174
}
175-
if (result.Allocate() != CFI_SUCCESS) {
175+
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
176176
terminator.Crash("ADJUSTL/R: could not allocate storage for result");
177177
}
178178
for (SubscriptValue resultAt{0}; elements-- > 0;
@@ -226,7 +226,7 @@ static RT_API_ATTRS void LenTrim(Descriptor &result, const Descriptor &string,
226226
for (int j{0}; j < rank; ++j) {
227227
result.GetDimension(j).SetBounds(1, ub[j]);
228228
}
229-
if (result.Allocate() != CFI_SUCCESS) {
229+
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
230230
terminator.Crash("LEN_TRIM: could not allocate storage for result");
231231
}
232232
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -408,7 +408,7 @@ static RT_API_ATTRS void GeneralCharFunc(Descriptor &result,
408408
for (int j{0}; j < rank; ++j) {
409409
result.GetDimension(j).SetBounds(1, ub[j]);
410410
}
411-
if (result.Allocate() != CFI_SUCCESS) {
411+
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
412412
terminator.Crash("SCAN/VERIFY: could not allocate storage for result");
413413
}
414414
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -511,7 +511,7 @@ static RT_API_ATTRS void MaxMinHelper(Descriptor &accumulator,
511511
for (int j{0}; j < rank; ++j) {
512512
accumulator.GetDimension(j).SetBounds(1, ub[j]);
513513
}
514-
RUNTIME_CHECK(terminator, accumulator.Allocate() == CFI_SUCCESS);
514+
RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
515515
}
516516
for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
517517
accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
@@ -587,7 +587,7 @@ void RTDEF(CharacterConcatenate)(Descriptor &accumulator,
587587
for (int j{0}; j < rank; ++j) {
588588
accumulator.GetDimension(j).SetBounds(1, ub[j]);
589589
}
590-
if (accumulator.Allocate() != CFI_SUCCESS) {
590+
if (accumulator.Allocate(kNoAsyncId) != CFI_SUCCESS) {
591591
terminator.Crash(
592592
"CharacterConcatenate: could not allocate storage for result");
593593
}
@@ -610,7 +610,7 @@ void RTDEF(CharacterConcatenateScalar1)(
610610
accumulator.set_base_addr(nullptr);
611611
std::size_t oldLen{accumulator.ElementBytes()};
612612
accumulator.raw().elem_len += chars;
613-
RUNTIME_CHECK(terminator, accumulator.Allocate() == CFI_SUCCESS);
613+
RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
614614
std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
615615
FreeMemory(old);
616616
}
@@ -812,7 +812,7 @@ void RTDEF(Repeat)(Descriptor &result, const Descriptor &string,
812812
std::size_t origBytes{string.ElementBytes()};
813813
result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
814814
CFI_attribute_allocatable);
815-
if (result.Allocate() != CFI_SUCCESS) {
815+
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
816816
terminator.Crash("REPEAT could not allocate storage for result");
817817
}
818818
const char *from{string.OffsetElement()};
@@ -846,7 +846,7 @@ void RTDEF(Trim)(Descriptor &result, const Descriptor &string,
846846
}
847847
result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
848848
CFI_attribute_allocatable);
849-
RUNTIME_CHECK(terminator, result.Allocate() == CFI_SUCCESS);
849+
RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncId) == CFI_SUCCESS);
850850
std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
851851
}
852852

flang-rt/lib/runtime/copy.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,8 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
171171
*reinterpret_cast<Descriptor *>(toPtr + component->offset())};
172172
if (toDesc.raw().base_addr != nullptr) {
173173
toDesc.set_base_addr(nullptr);
174-
RUNTIME_CHECK(terminator, toDesc.Allocate() == CFI_SUCCESS);
174+
RUNTIME_CHECK(
175+
terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS);
175176
const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>(
176177
fromPtr + component->offset())};
177178
copyStack.emplace(toDesc, fromDesc);

flang-rt/lib/runtime/derived.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
5151
comp.EstablishDescriptor(allocDesc, instance, terminator);
5252
allocDesc.raw().attribute = CFI_attribute_allocatable;
5353
if (comp.genre() == typeInfo::Component::Genre::Automatic) {
54-
stat = ReturnError(terminator, allocDesc.Allocate(), errMsg, hasStat);
54+
stat = ReturnError(
55+
terminator, allocDesc.Allocate(kNoAsyncId), errMsg, hasStat);
5556
if (stat == StatOk) {
5657
if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) {
5758
if (const auto *derived{addendum->derivedType()}) {
@@ -151,7 +152,8 @@ RT_API_ATTRS int InitializeClone(const Descriptor &clone,
151152
*clone.ElementComponent<Descriptor>(at, comp.offset())};
152153
if (origDesc.IsAllocated()) {
153154
cloneDesc.ApplyMold(origDesc, origDesc.rank());
154-
stat = ReturnError(terminator, cloneDesc.Allocate(), errMsg, hasStat);
155+
stat = ReturnError(
156+
terminator, cloneDesc.Allocate(kNoAsyncId), errMsg, hasStat);
155157
if (stat == StatOk) {
156158
if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
157159
if (const typeInfo::DerivedType *
@@ -258,7 +260,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
258260
copy.raw().attribute = CFI_attribute_allocatable;
259261
Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0};
260262
RUNTIME_CHECK(terminator ? *terminator : stubTerminator,
261-
copy.Allocate() == CFI_SUCCESS);
263+
copy.Allocate(kNoAsyncId) == CFI_SUCCESS);
262264
ShallowCopyDiscontiguousToContiguous(copy, descriptor);
263265
argDescriptor = &copy;
264266
}

flang-rt/lib/runtime/descriptor.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
158158
#endif
159159
}
160160

161-
RT_API_ATTRS int Descriptor::Allocate() {
161+
RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
162162
std::size_t elementBytes{ElementBytes()};
163163
if (static_cast<std::int64_t>(elementBytes) < 0) {
164164
// F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
@@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate() {
170170
// Zero size allocation is possible in Fortran and the resulting
171171
// descriptor must be allocated/associated. Since std::malloc(0)
172172
// result is implementation defined, always allocate at least one byte.
173-
void *p{alloc(byteSize ? byteSize : 1, /*asyncId=*/-1)};
173+
void *p{alloc(byteSize ? byteSize : 1, asyncId)};
174174
if (!p) {
175175
return CFI_ERROR_MEM_ALLOCATION;
176176
}

0 commit comments

Comments
 (0)