16
16
#include < cassert>
17
17
#include < cstddef>
18
18
#include < cstdint>
19
+ #include < cstring>
19
20
#include < list>
20
21
#include < map>
21
22
#include < memory>
26
27
#include " ExclusiveAccess.h"
27
28
#include " omptarget.h"
28
29
#include " rtl.h"
30
+ #include " llvm/ADT/SmallSet.h"
29
31
#include " llvm/ADT/SmallVector.h"
30
32
31
33
// Forward declarations.
@@ -43,6 +45,22 @@ enum kmp_target_offload_kind {
43
45
};
44
46
typedef enum kmp_target_offload_kind kmp_target_offload_kind_t ;
45
47
48
+ // / Information about shadow pointers.
49
+ struct ShadowPtrInfoTy {
50
+ void **HstPtrAddr = nullptr ;
51
+ void *HstPtrVal = nullptr ;
52
+ void **TgtPtrAddr = nullptr ;
53
+ void *TgtPtrVal = nullptr ;
54
+
55
+ bool operator ==(const ShadowPtrInfoTy &Other) const {
56
+ return HstPtrAddr == Other.HstPtrAddr ;
57
+ }
58
+ };
59
+
60
+ inline bool operator <(const ShadowPtrInfoTy &lhs, const ShadowPtrInfoTy &rhs) {
61
+ return lhs.HstPtrAddr < rhs.HstPtrAddr ;
62
+ }
63
+
46
64
// / Map between host data and target data.
47
65
struct HostDataToTargetTy {
48
66
const uintptr_t HstPtrBase; // host info.
@@ -60,8 +78,7 @@ struct HostDataToTargetTy {
60
78
61
79
struct StatesTy {
62
80
StatesTy (uint64_t DRC, uint64_t HRC)
63
- : DynRefCount(DRC), HoldRefCount(HRC),
64
- MayContainAttachedPointers (false ) {}
81
+ : DynRefCount(DRC), HoldRefCount(HRC) {}
65
82
// / The dynamic reference count is the standard reference count as of OpenMP
66
83
// / 4.5. The hold reference count is an OpenMP extension for the sake of
67
84
// / OpenACC support.
@@ -80,17 +97,10 @@ struct HostDataToTargetTy {
80
97
uint64_t DynRefCount;
81
98
uint64_t HoldRefCount;
82
99
83
- // / Boolean flag to remember if any subpart of the mapped region might be
84
- // / an attached pointer.
85
- bool MayContainAttachedPointers;
86
-
87
- // / This mutex will be locked when data movement is issued. For targets that
88
- // / doesn't support async data movement, this mutex can guarantee that after
89
- // / it is released, memory region on the target is update to date. For
90
- // / targets that support async data movement, this can guarantee that data
91
- // / movement has been issued. This mutex *must* be locked right before
92
- // / releasing the mapping table lock.
93
- std::mutex UpdateMtx;
100
+ // / A map of shadow pointers associated with this entry, the keys are host
101
+ // / pointer addresses to identify stale entries.
102
+ llvm::SmallSet<ShadowPtrInfoTy, 2 > ShadowPtrInfos;
103
+
94
104
// / Pointer to the event corresponding to the data update of this map.
95
105
// / Note: At present this event is created when the first data transfer from
96
106
// / host to device is issued, and only being used for H2D. It is not used
@@ -222,16 +232,41 @@ struct HostDataToTargetTy {
222
232
return ThisRefCount == 1 ;
223
233
}
224
234
225
- void setMayContainAttachedPointers () const {
226
- States->MayContainAttachedPointers = true ;
235
+ // / Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the
236
+ // / the target ptr value was not already present in the existing set of shadow
237
+ // / pointers. Return true if something was added.
238
+ bool addShadowPointer (const ShadowPtrInfoTy &ShadowPtrInfo) const {
239
+ auto Pair = States->ShadowPtrInfos .insert (ShadowPtrInfo);
240
+ if (Pair.second )
241
+ return true ;
242
+ // Check for a stale entry, if found, replace the old one.
243
+ if ((*Pair.first ).TgtPtrVal == ShadowPtrInfo.TgtPtrVal )
244
+ return false ;
245
+ States->ShadowPtrInfos .erase (ShadowPtrInfo);
246
+ return addShadowPointer (ShadowPtrInfo);
227
247
}
228
- bool getMayContainAttachedPointers () const {
229
- return States->MayContainAttachedPointers ;
248
+
249
+ // / Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if
250
+ // / \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns
251
+ // / OFFLOAD_SUCCESS. The entry is locked for this operation.
252
+ template <typename CBTy> int foreachShadowPointerInfo (CBTy CB) const {
253
+ for (auto &It : States->ShadowPtrInfos )
254
+ if (CB (It) == OFFLOAD_FAIL)
255
+ return OFFLOAD_FAIL;
256
+ return OFFLOAD_SUCCESS;
230
257
}
231
258
232
- void lock () const { States->UpdateMtx .lock (); }
259
+ // / Lock this entry for exclusive access. Ensure to get exclusive access to
260
+ // / HDTTMap first!
261
+ void lock () const { Mtx.lock (); }
262
+
263
+ // / Unlock this entry to allow other threads inspecting it.
264
+ void unlock () const { Mtx.unlock (); }
233
265
234
- void unlock () const { States->UpdateMtx .unlock (); }
266
+ private:
267
+ // Mutex that needs to be held before the entry is inspected or modified. The
268
+ // HDTTMap mutex needs to be held before trying to lock any HDTT Entry.
269
+ mutable std::mutex Mtx;
235
270
};
236
271
237
272
// / Wrapper around the HostDataToTargetTy to be used in the HDTT map. In
@@ -243,6 +278,7 @@ struct HostDataToTargetMapKeyTy {
243
278
uintptr_t KeyValue;
244
279
245
280
HostDataToTargetMapKeyTy (void *Key) : KeyValue(uintptr_t (Key)) {}
281
+ HostDataToTargetMapKeyTy (uintptr_t Key) : KeyValue(Key) {}
246
282
HostDataToTargetMapKeyTy (HostDataToTargetTy *HDTT)
247
283
: KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {}
248
284
HostDataToTargetTy *HDTT;
@@ -260,49 +296,89 @@ inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
260
296
return LHS.KeyValue < RHS.KeyValue ;
261
297
}
262
298
263
- struct LookupResult {
264
- struct {
265
- unsigned IsContained : 1 ;
266
- unsigned ExtendsBefore : 1 ;
267
- unsigned ExtendsAfter : 1 ;
268
- } Flags;
269
-
270
- // / The corresponding map table entry which is stable.
271
- HostDataToTargetTy *Entry = nullptr ;
272
-
273
- LookupResult () : Flags({0 , 0 , 0 }), Entry() {}
274
- };
275
-
276
299
// / This struct will be returned by \p DeviceTy::getTargetPointer which provides
277
- // / more data than just a target pointer.
300
+ // / more data than just a target pointer. A TargetPointerResultTy that has a non
301
+ // / null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists
302
+ // / the entry is locked. To give up ownership without destroying the TPR use the
303
+ // / reset() function.
278
304
struct TargetPointerResultTy {
279
- struct {
305
+ struct FlagTy {
280
306
// / If the map table entry is just created
281
307
unsigned IsNewEntry : 1 ;
282
308
// / If the pointer is actually a host pointer (when unified memory enabled)
283
309
unsigned IsHostPointer : 1 ;
284
310
// / If the pointer is present in the mapping table.
285
311
unsigned IsPresent : 1 ;
286
- } Flags = {0 , 0 , 0 };
312
+ // / Flag indicating that this was the last user of the entry and the ref
313
+ // / count is now 0.
314
+ unsigned IsLast : 1 ;
315
+ } Flags = {0 , 0 , 0 , 0 };
316
+
317
+ TargetPointerResultTy (const TargetPointerResultTy &) = delete ;
318
+ TargetPointerResultTy &operator =(const TargetPointerResultTy &TPR) = delete ;
319
+ TargetPointerResultTy () {}
320
+
321
+ TargetPointerResultTy (FlagTy Flags, HostDataToTargetTy *Entry,
322
+ void *TargetPointer)
323
+ : Flags(Flags), TargetPointer(TargetPointer), Entry(Entry) {
324
+ if (Entry)
325
+ Entry->lock ();
326
+ }
327
+
328
+ TargetPointerResultTy (TargetPointerResultTy &&TPR)
329
+ : Flags(TPR.Flags), TargetPointer(TPR.TargetPointer), Entry(TPR.Entry) {
330
+ TPR.Entry = nullptr ;
331
+ }
332
+
333
+ TargetPointerResultTy &operator =(TargetPointerResultTy &&TPR) {
334
+ if (&TPR != this ) {
335
+ std::swap (Flags, TPR.Flags );
336
+ std::swap (Entry, TPR.Entry );
337
+ std::swap (TargetPointer, TPR.TargetPointer );
338
+ }
339
+ return *this ;
340
+ }
341
+
342
+ ~TargetPointerResultTy () {
343
+ if (Entry)
344
+ Entry->unlock ();
345
+ }
287
346
288
347
bool isPresent () const { return Flags.IsPresent ; }
289
348
290
349
bool isHostPointer () const { return Flags.IsHostPointer ; }
291
350
292
- // / The corresponding map table entry which is stable.
293
- HostDataToTargetTy *Entry = nullptr ;
294
-
295
351
// / The corresponding target pointer
296
352
void *TargetPointer = nullptr ;
353
+
354
+ HostDataToTargetTy *getEntry () const { return Entry; }
355
+ void setEntry (HostDataToTargetTy *HDTTT,
356
+ HostDataToTargetTy *OwnedTPR = nullptr ) {
357
+ if (Entry)
358
+ Entry->unlock ();
359
+ Entry = HDTTT;
360
+ if (Entry && Entry != OwnedTPR)
361
+ Entry->lock ();
362
+ }
363
+
364
+ void reset () { *this = TargetPointerResultTy (); }
365
+
366
+ private:
367
+ // / The corresponding map table entry which is stable.
368
+ HostDataToTargetTy *Entry = nullptr ;
297
369
};
298
370
299
- // / Map for shadow pointers
300
- struct ShadowPtrValTy {
301
- void *HstPtrVal;
302
- void *TgtPtrAddr;
303
- void *TgtPtrVal;
371
+ struct LookupResult {
372
+ struct {
373
+ unsigned IsContained : 1 ;
374
+ unsigned ExtendsBefore : 1 ;
375
+ unsigned ExtendsAfter : 1 ;
376
+ } Flags;
377
+
378
+ LookupResult () : Flags({0 , 0 , 0 }), TPR() {}
379
+
380
+ TargetPointerResultTy TPR;
304
381
};
305
- typedef std::map<void *, ShadowPtrValTy> ShadowPtrListTy;
306
382
307
383
// /
308
384
struct PendingCtorDtorListsTy {
@@ -336,9 +412,7 @@ struct DeviceTy {
336
412
337
413
PendingCtorsDtorsPerLibrary PendingCtorsDtors;
338
414
339
- ShadowPtrListTy ShadowPtrMap;
340
-
341
- std::mutex PendingGlobalsMtx, ShadowMtx;
415
+ std::mutex PendingGlobalsMtx;
342
416
343
417
DeviceTy (RTLInfoTy *RTL);
344
418
// DeviceTy is not copyable
@@ -353,7 +427,8 @@ struct DeviceTy {
353
427
// / Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures
354
428
// / exclusive access to the HDTT map.
355
429
LookupResult lookupMapping (HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
356
- int64_t Size);
430
+ int64_t Size,
431
+ HostDataToTargetTy *OwnedTPR = nullptr );
357
432
358
433
// / Get the target pointer based on host pointer begin and base. If the
359
434
// / mapping already exists, the target pointer will be returned directly. In
@@ -365,12 +440,13 @@ struct DeviceTy {
365
440
// / - Data allocation failed;
366
441
// / - The user tried to do an illegal mapping;
367
442
// / - Data transfer issue fails.
368
- TargetPointerResultTy
369
- getTargetPointer (void *HstPtrBegin, void *HstPtrBase, int64_t Size,
370
- map_var_info_t HstPtrName, bool HasFlagTo,
371
- bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
372
- bool HasCloseModifier, bool HasPresentModifier,
373
- bool HasHoldModifier, AsyncInfoTy &AsyncInfo);
443
+ TargetPointerResultTy getTargetPointer (
444
+ HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
445
+ int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
446
+ bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
447
+ bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
448
+ AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr ,
449
+ bool ReleaseHDTTMap = true );
374
450
375
451
// / Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
376
452
// / ensures exclusive access to the HDTT map.
@@ -388,10 +464,9 @@ struct DeviceTy {
388
464
// / - \p FromDataEnd tracks the number of threads referencing the entry at
389
465
// / targetDataEnd for delayed deletion purpose.
390
466
[[nodiscard]] TargetPointerResultTy
391
- getTgtPtrBegin (void *HstPtrBegin, int64_t Size, bool &IsLast,
392
- bool UpdateRefCount, bool UseHoldRefCount, bool &IsHostPtr,
393
- bool MustContain = false , bool ForceDelete = false ,
394
- bool FromDataEnd = false );
467
+ getTgtPtrBegin (void *HstPtrBegin, int64_t Size, bool UpdateRefCount,
468
+ bool UseHoldRefCount, bool MustContain = false ,
469
+ bool ForceDelete = false , bool FromDataEnd = false );
395
470
396
471
// / Remove the \p Entry from the data map. Expect the entry's total reference
397
472
// / count to be zero and the caller thread to be the last one using it. \p
@@ -436,10 +511,12 @@ struct DeviceTy {
436
511
// synchronous.
437
512
// Copy data from host to device
438
513
int32_t submitData (void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
439
- AsyncInfoTy &AsyncInfo);
514
+ AsyncInfoTy &AsyncInfo,
515
+ HostDataToTargetTy *Entry = nullptr );
440
516
// Copy data from device back to host
441
517
int32_t retrieveData (void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
442
- AsyncInfoTy &AsyncInfo);
518
+ AsyncInfoTy &AsyncInfo,
519
+ HostDataToTargetTy *Entry = nullptr );
443
520
// Copy data from current device to destination device directly
444
521
int32_t dataExchange (void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
445
522
int64_t Size, AsyncInfoTy &AsyncInfo);
0 commit comments