Skip to content

Commit f2c3859

Browse files
committed
[OpenMP] Remove shadow pointer map and introduce consistent locking
The shadow pointer map was problematic as we scanned an entire list if an entry had shadow pointers. The new scheme stores the shadow pointers inside the entries. This allows easy access without any search. It also helps us, but also makes it necessary, to define a consistent locking scheme. The implicit locking of entries via TargetPointerResultTy makes this pretty effortless, however one has to: - Lock HDTTMap before locking an entry. - Do not lock HDTTMap while holding an entry lock. - Hold the entry lock to read or modify an entry. The changes to submitData and retrieveData have been made to ensure 2 when the LIBOMPTARGET_INFO flag is used. Most everything else happens by itself as TargetPointerResultTy acts as a lock_guard for the entry. It is a little complicated when we deal with multiple entries, especially as they can be equal. However, one can still follow the rules with reasonable effort. LookupResult are now finally also locking the entry before it is inspected. This is good even if we haven't run into a problem yet. Differential Revision: https://reviews.llvm.org/D123446
1 parent 0153ab6 commit f2c3859

File tree

4 files changed

+435
-398
lines changed

4 files changed

+435
-398
lines changed

openmp/libomptarget/include/device.h

Lines changed: 137 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <cassert>
1717
#include <cstddef>
1818
#include <cstdint>
19+
#include <cstring>
1920
#include <list>
2021
#include <map>
2122
#include <memory>
@@ -26,6 +27,7 @@
2627
#include "ExclusiveAccess.h"
2728
#include "omptarget.h"
2829
#include "rtl.h"
30+
#include "llvm/ADT/SmallSet.h"
2931
#include "llvm/ADT/SmallVector.h"
3032

3133
// Forward declarations.
@@ -43,6 +45,22 @@ enum kmp_target_offload_kind {
4345
};
4446
typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
4547

48+
/// Information about shadow pointers.
49+
struct ShadowPtrInfoTy {
50+
void **HstPtrAddr = nullptr;
51+
void *HstPtrVal = nullptr;
52+
void **TgtPtrAddr = nullptr;
53+
void *TgtPtrVal = nullptr;
54+
55+
bool operator==(const ShadowPtrInfoTy &Other) const {
56+
return HstPtrAddr == Other.HstPtrAddr;
57+
}
58+
};
59+
60+
inline bool operator<(const ShadowPtrInfoTy &lhs, const ShadowPtrInfoTy &rhs) {
61+
return lhs.HstPtrAddr < rhs.HstPtrAddr;
62+
}
63+
4664
/// Map between host data and target data.
4765
struct HostDataToTargetTy {
4866
const uintptr_t HstPtrBase; // host info.
@@ -60,8 +78,7 @@ struct HostDataToTargetTy {
6078

6179
struct StatesTy {
6280
StatesTy(uint64_t DRC, uint64_t HRC)
63-
: DynRefCount(DRC), HoldRefCount(HRC),
64-
MayContainAttachedPointers(false) {}
81+
: DynRefCount(DRC), HoldRefCount(HRC) {}
6582
/// The dynamic reference count is the standard reference count as of OpenMP
6683
/// 4.5. The hold reference count is an OpenMP extension for the sake of
6784
/// OpenACC support.
@@ -80,17 +97,10 @@ struct HostDataToTargetTy {
8097
uint64_t DynRefCount;
8198
uint64_t HoldRefCount;
8299

83-
/// Boolean flag to remember if any subpart of the mapped region might be
84-
/// an attached pointer.
85-
bool MayContainAttachedPointers;
86-
87-
/// This mutex will be locked when data movement is issued. For targets that
88-
/// doesn't support async data movement, this mutex can guarantee that after
89-
/// it is released, memory region on the target is update to date. For
90-
/// targets that support async data movement, this can guarantee that data
91-
/// movement has been issued. This mutex *must* be locked right before
92-
/// releasing the mapping table lock.
93-
std::mutex UpdateMtx;
100+
/// A map of shadow pointers associated with this entry, the keys are host
101+
/// pointer addresses to identify stale entries.
102+
llvm::SmallSet<ShadowPtrInfoTy, 2> ShadowPtrInfos;
103+
94104
/// Pointer to the event corresponding to the data update of this map.
95105
/// Note: At present this event is created when the first data transfer from
96106
/// host to device is issued, and only being used for H2D. It is not used
@@ -222,16 +232,41 @@ struct HostDataToTargetTy {
222232
return ThisRefCount == 1;
223233
}
224234

225-
void setMayContainAttachedPointers() const {
226-
States->MayContainAttachedPointers = true;
235+
/// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the
236+
/// the target ptr value was not already present in the existing set of shadow
237+
/// pointers. Return true if something was added.
238+
bool addShadowPointer(const ShadowPtrInfoTy &ShadowPtrInfo) const {
239+
auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo);
240+
if (Pair.second)
241+
return true;
242+
// Check for a stale entry, if found, replace the old one.
243+
if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal)
244+
return false;
245+
States->ShadowPtrInfos.erase(ShadowPtrInfo);
246+
return addShadowPointer(ShadowPtrInfo);
227247
}
228-
bool getMayContainAttachedPointers() const {
229-
return States->MayContainAttachedPointers;
248+
249+
/// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if
250+
/// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns
251+
/// OFFLOAD_SUCCESS. The entry is locked for this operation.
252+
template <typename CBTy> int foreachShadowPointerInfo(CBTy CB) const {
253+
for (auto &It : States->ShadowPtrInfos)
254+
if (CB(It) == OFFLOAD_FAIL)
255+
return OFFLOAD_FAIL;
256+
return OFFLOAD_SUCCESS;
230257
}
231258

232-
void lock() const { States->UpdateMtx.lock(); }
259+
/// Lock this entry for exclusive access. Ensure to get exclusive access to
260+
/// HDTTMap first!
261+
void lock() const { Mtx.lock(); }
262+
263+
/// Unlock this entry to allow other threads inspecting it.
264+
void unlock() const { Mtx.unlock(); }
233265

234-
void unlock() const { States->UpdateMtx.unlock(); }
266+
private:
267+
// Mutex that needs to be held before the entry is inspected or modified. The
268+
// HDTTMap mutex needs to be held before trying to lock any HDTT Entry.
269+
mutable std::mutex Mtx;
235270
};
236271

237272
/// Wrapper around the HostDataToTargetTy to be used in the HDTT map. In
@@ -243,6 +278,7 @@ struct HostDataToTargetMapKeyTy {
243278
uintptr_t KeyValue;
244279

245280
HostDataToTargetMapKeyTy(void *Key) : KeyValue(uintptr_t(Key)) {}
281+
HostDataToTargetMapKeyTy(uintptr_t Key) : KeyValue(Key) {}
246282
HostDataToTargetMapKeyTy(HostDataToTargetTy *HDTT)
247283
: KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {}
248284
HostDataToTargetTy *HDTT;
@@ -260,49 +296,89 @@ inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
260296
return LHS.KeyValue < RHS.KeyValue;
261297
}
262298

263-
struct LookupResult {
264-
struct {
265-
unsigned IsContained : 1;
266-
unsigned ExtendsBefore : 1;
267-
unsigned ExtendsAfter : 1;
268-
} Flags;
269-
270-
/// The corresponding map table entry which is stable.
271-
HostDataToTargetTy *Entry = nullptr;
272-
273-
LookupResult() : Flags({0, 0, 0}), Entry() {}
274-
};
275-
276299
/// This struct will be returned by \p DeviceTy::getTargetPointer which provides
277-
/// more data than just a target pointer.
300+
/// more data than just a target pointer. A TargetPointerResultTy that has a non
301+
/// null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists
302+
/// the entry is locked. To give up ownership without destroying the TPR use the
303+
/// reset() function.
278304
struct TargetPointerResultTy {
279-
struct {
305+
struct FlagTy {
280306
/// If the map table entry is just created
281307
unsigned IsNewEntry : 1;
282308
/// If the pointer is actually a host pointer (when unified memory enabled)
283309
unsigned IsHostPointer : 1;
284310
/// If the pointer is present in the mapping table.
285311
unsigned IsPresent : 1;
286-
} Flags = {0, 0, 0};
312+
/// Flag indicating that this was the last user of the entry and the ref
313+
/// count is now 0.
314+
unsigned IsLast : 1;
315+
} Flags = {0, 0, 0, 0};
316+
317+
TargetPointerResultTy(const TargetPointerResultTy &) = delete;
318+
TargetPointerResultTy &operator=(const TargetPointerResultTy &TPR) = delete;
319+
TargetPointerResultTy() {}
320+
321+
TargetPointerResultTy(FlagTy Flags, HostDataToTargetTy *Entry,
322+
void *TargetPointer)
323+
: Flags(Flags), TargetPointer(TargetPointer), Entry(Entry) {
324+
if (Entry)
325+
Entry->lock();
326+
}
327+
328+
TargetPointerResultTy(TargetPointerResultTy &&TPR)
329+
: Flags(TPR.Flags), TargetPointer(TPR.TargetPointer), Entry(TPR.Entry) {
330+
TPR.Entry = nullptr;
331+
}
332+
333+
TargetPointerResultTy &operator=(TargetPointerResultTy &&TPR) {
334+
if (&TPR != this) {
335+
std::swap(Flags, TPR.Flags);
336+
std::swap(Entry, TPR.Entry);
337+
std::swap(TargetPointer, TPR.TargetPointer);
338+
}
339+
return *this;
340+
}
341+
342+
~TargetPointerResultTy() {
343+
if (Entry)
344+
Entry->unlock();
345+
}
287346

288347
bool isPresent() const { return Flags.IsPresent; }
289348

290349
bool isHostPointer() const { return Flags.IsHostPointer; }
291350

292-
/// The corresponding map table entry which is stable.
293-
HostDataToTargetTy *Entry = nullptr;
294-
295351
/// The corresponding target pointer
296352
void *TargetPointer = nullptr;
353+
354+
HostDataToTargetTy *getEntry() const { return Entry; }
355+
void setEntry(HostDataToTargetTy *HDTTT,
356+
HostDataToTargetTy *OwnedTPR = nullptr) {
357+
if (Entry)
358+
Entry->unlock();
359+
Entry = HDTTT;
360+
if (Entry && Entry != OwnedTPR)
361+
Entry->lock();
362+
}
363+
364+
void reset() { *this = TargetPointerResultTy(); }
365+
366+
private:
367+
/// The corresponding map table entry which is stable.
368+
HostDataToTargetTy *Entry = nullptr;
297369
};
298370

299-
/// Map for shadow pointers
300-
struct ShadowPtrValTy {
301-
void *HstPtrVal;
302-
void *TgtPtrAddr;
303-
void *TgtPtrVal;
371+
struct LookupResult {
372+
struct {
373+
unsigned IsContained : 1;
374+
unsigned ExtendsBefore : 1;
375+
unsigned ExtendsAfter : 1;
376+
} Flags;
377+
378+
LookupResult() : Flags({0, 0, 0}), TPR() {}
379+
380+
TargetPointerResultTy TPR;
304381
};
305-
typedef std::map<void *, ShadowPtrValTy> ShadowPtrListTy;
306382

307383
///
308384
struct PendingCtorDtorListsTy {
@@ -336,9 +412,7 @@ struct DeviceTy {
336412

337413
PendingCtorsDtorsPerLibrary PendingCtorsDtors;
338414

339-
ShadowPtrListTy ShadowPtrMap;
340-
341-
std::mutex PendingGlobalsMtx, ShadowMtx;
415+
std::mutex PendingGlobalsMtx;
342416

343417
DeviceTy(RTLInfoTy *RTL);
344418
// DeviceTy is not copyable
@@ -353,7 +427,8 @@ struct DeviceTy {
353427
/// Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures
354428
/// exclusive access to the HDTT map.
355429
LookupResult lookupMapping(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
356-
int64_t Size);
430+
int64_t Size,
431+
HostDataToTargetTy *OwnedTPR = nullptr);
357432

358433
/// Get the target pointer based on host pointer begin and base. If the
359434
/// mapping already exists, the target pointer will be returned directly. In
@@ -365,12 +440,13 @@ struct DeviceTy {
365440
/// - Data allocation failed;
366441
/// - The user tried to do an illegal mapping;
367442
/// - Data transfer issue fails.
368-
TargetPointerResultTy
369-
getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
370-
map_var_info_t HstPtrName, bool HasFlagTo,
371-
bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
372-
bool HasCloseModifier, bool HasPresentModifier,
373-
bool HasHoldModifier, AsyncInfoTy &AsyncInfo);
443+
TargetPointerResultTy getTargetPointer(
444+
HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
445+
int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
446+
bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
447+
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
448+
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
449+
bool ReleaseHDTTMap = true);
374450

375451
/// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
376452
/// ensures exclusive access to the HDTT map.
@@ -388,10 +464,9 @@ struct DeviceTy {
388464
/// - \p FromDataEnd tracks the number of threads referencing the entry at
389465
/// targetDataEnd for delayed deletion purpose.
390466
[[nodiscard]] TargetPointerResultTy
391-
getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
392-
bool UpdateRefCount, bool UseHoldRefCount, bool &IsHostPtr,
393-
bool MustContain = false, bool ForceDelete = false,
394-
bool FromDataEnd = false);
467+
getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount,
468+
bool UseHoldRefCount, bool MustContain = false,
469+
bool ForceDelete = false, bool FromDataEnd = false);
395470

396471
/// Remove the \p Entry from the data map. Expect the entry's total reference
397472
/// count to be zero and the caller thread to be the last one using it. \p
@@ -436,10 +511,12 @@ struct DeviceTy {
436511
// synchronous.
437512
// Copy data from host to device
438513
int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
439-
AsyncInfoTy &AsyncInfo);
514+
AsyncInfoTy &AsyncInfo,
515+
HostDataToTargetTy *Entry = nullptr);
440516
// Copy data from device back to host
441517
int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
442-
AsyncInfoTy &AsyncInfo);
518+
AsyncInfoTy &AsyncInfo,
519+
HostDataToTargetTy *Entry = nullptr);
443520
// Copy data from current device to destination device directly
444521
int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
445522
int64_t Size, AsyncInfoTy &AsyncInfo);

openmp/libomptarget/src/api.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,16 +116,13 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
116116
}
117117

118118
DeviceTy &Device = *PM->Devices[DeviceNum];
119-
bool IsLast; // not used
120-
bool IsHostPtr;
121119
// omp_target_is_present tests whether a host pointer refers to storage that
122120
// is mapped to a given device. However, due to the lack of the storage size,
123121
// only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
124122
// lengh array) is mapped instead of the referred storage.
125-
TargetPointerResultTy TPR =
126-
Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1, IsLast,
127-
/*UpdateRefCount=*/false,
128-
/*UseHoldRefCount=*/false, IsHostPtr);
123+
TargetPointerResultTy TPR = Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1,
124+
/*UpdateRefCount=*/false,
125+
/*UseHoldRefCount=*/false);
129126
int Rc = TPR.isPresent();
130127
DP("Call to omp_target_is_present returns %d\n", Rc);
131128
return Rc;
@@ -360,13 +357,10 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) {
360357
return nullptr;
361358
}
362359

363-
bool IsLast = false;
364-
bool IsHostPtr = false;
365360
auto &Device = *PM->Devices[DeviceNum];
366-
TargetPointerResultTy TPR =
367-
Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1, IsLast,
368-
/*UpdateRefCount=*/false,
369-
/*UseHoldRefCount=*/false, IsHostPtr);
361+
TargetPointerResultTy TPR = Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1,
362+
/*UpdateRefCount=*/false,
363+
/*UseHoldRefCount=*/false);
370364
if (!TPR.isPresent()) {
371365
DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n",
372366
DPxPTR(Ptr), DeviceNum);

0 commit comments

Comments
 (0)