Skip to content

Commit

Permalink
[WIP] Debug and temp fixed the deadlock issue
Browse files Browse the repository at this point in the history
The deadlock issue is caused by an IPPT that reads a batch version
that is initialized during insert phase, but not yet written to.

The hack to fix this issue is (thanks to the king of ECE):
 - initialize the last_batch_obj in DoublyLinkedListExtraVHandle
 - when ReadWithVersion during init phase
   - if the extraVHandle is not created, create one
   - if no version is read from the extraVHandle, return the
     last_batch_obj instead of a nullptr

Potential Solution:
 - redirect ReadWithVersion during init phase to extraVHandle only
   in the future
 - find out how to create an initial version properly and return
   that instead of the last_batch_obj
 - make use of last_batch_obj and last_batch_version as the way
   in the design
  • Loading branch information
Rayzgz committed Mar 16, 2022
1 parent 5b80eb9 commit c64bd90
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 7 deletions.
1 change: 1 addition & 0 deletions benchmark/ycsb/ycsb_priority.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ bool MWTxn_Run(PriorityTxn *txn)
ctx.txn->measure_tsc = queue_tsc;
}

// trace(TRACE_DEADLOCK "sid {} read on row {}", sid_info(ctx.txn->sid), ctx.key);
auto row = ctx.txn->Read<Ycsb::Value>(ctx.row);
row.v.resize_junk(90);
ctx.txn->Write(ctx.row, row);
Expand Down
3 changes: 3 additions & 0 deletions epoch.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class EpochCallback {
friend class EpochClient;
friend class CallTxnsWorker;
friend class DoublyLinkedListExtraVHandle;
friend class SortedArrayVHandle;
PerfLog perf;
EpochClient *client;
const char *label;
Expand Down Expand Up @@ -177,6 +178,8 @@ class EpochClient {
friend class BaseTxn;
friend class EpochCallback;
friend class DoublyLinkedListExtraVHandle;
friend class SortedArrayVHandle;


void InitializeEpoch();
void ExecuteEpoch();
Expand Down
15 changes: 14 additions & 1 deletion extravhandle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <forward_list>

#include "priority.h"
#include "sid_info.h"
#include "benchmark/ycsb/ycsb.h"

namespace felis
{
Expand All @@ -20,10 +22,16 @@ DoublyLinkedListExtraVHandle::DoublyLinkedListExtraVHandle()
: head(nullptr),
tail(nullptr),
size(0),
last_batch_obj(kIgnoreValue),
last_batch_obj((uint64_t) VarStr::New(2000)), // TODO: Shujian Hack
last_batch_version(0),
max_exec_sid(0)
{
// TODO: Shujian
// A *serious* hack for allowing priority txn to read empty batch
// by letting it read a useless last_batch_obj.
*((uint64_t *)((VarStr *)last_batch_obj)->data()) = 0;
// TODO: Shujian: end of hack

this_coreid = alloc_by_regionid = mem::ParallelPool::CurrentAffinity();
}

Expand Down Expand Up @@ -248,6 +256,10 @@ VarStr *DoublyLinkedListExtraVHandle::ReadWithVersion(uint64_t sid,
new_obj_ptr = old_obj_ptr | kReadBitMask;
}
}
int core_id = go::Scheduler::CurrentThreadPoolId() - 1;

if(core_id == 0)
trace(TRACE_IPPT "DispatchService on core {} notifies {}+{} completions");

// wait for the object to be filled
util::Impl<VHandleSyncService>().WaitForData(obj_ptr_ptr,
Expand Down Expand Up @@ -449,6 +461,7 @@ bool DoublyLinkedListExtraVHandle::WriteWithVersion(uint64_t sid, VarStr *obj)
}

volatile uintptr_t *obj_ptr_ptr = &p->object;
// trace(TRACE_DEADLOCK "writing to sid {}", sid_info(sid));
util::Impl<VHandleSyncService>().OfferData(obj_ptr_ptr, (uintptr_t) obj);
return true;
}
Expand Down
4 changes: 3 additions & 1 deletion extravhandle.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,13 @@ class DoublyLinkedListExtraVHandle
std::atomic<Entry *> head;
std::atomic<Entry *> tail;
uint64_t last_batch_version;
uintptr_t last_batch_obj;
std::atomic<uint64_t> max_exec_sid;

public:
static mem::ParallelSlabPool pool;
// TODO: Ray
// Change this back to private and create API for accessing this obj.
uintptr_t last_batch_obj;

static void InitPool()
{
Expand Down
1 change: 1 addition & 0 deletions log.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ static void debug(std::string_view fmt, T... args)
#define TRACE_COMPLETION // "\x7f" "Trace Completion: "
#define TRACE_PRIORITY // "\x7f" "Trace PriorityTxn: "
#define TRACE_IPPT // "\x7f" "Trace IPPT Debug: "
#define TRACE_DEADLOCK "\x7f" "Trace deadlock: "

// Debug tags
#define DBG_WORKLOAD // "\x7f" "Workload: "
Expand Down
4 changes: 3 additions & 1 deletion routine_sched.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "routine_sched.h"
#include "pwv_graph.h"
#include "priority.h"
#include "sid_info.h"

namespace felis {

Expand Down Expand Up @@ -790,7 +791,8 @@ int EpochExecutionDispatchService::TraceDependency(uint64_t key)
}
for (auto i = 0; i < q.start.load(); i++) {
if (q.q[i % max_item_percore]->sched_key == key) {
printf("found %lu in the consumed pending area of %d\n", key, core_id);
// printf("found %lu in the consumed pending area of %d\n", key, core_id);
trace(TRACE_DEADLOCK "found {} in the consumed pending area of {}", sid_info(key), core_id);
}
}

Expand Down
24 changes: 24 additions & 0 deletions vhandle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,30 @@ VarStr *SortedArrayVHandle::ReadWithVersion(uint64_t sid)
int pos;
volatile uintptr_t *addr = WithVersion(sid, pos);

// TODO: Shujian
// A *serious* hack for allowing priority txn to read empty batch
// by letting it read a useless last_batch_obj.
if (EpochClient::g_workload_client->callback.phase != EpochPhase::Execute) {
auto extra = extra_vhandle.load();
if (extra == nullptr) {
// did not exist, allocate
auto temp = new ExtraVHandle();
auto succ = extra_vhandle.compare_exchange_strong(extra, temp);
if (succ)
extra = temp;
else {
delete temp; // somebody else allocated and CASed their ptr first, just use that
extra = extra_vhandle.load(); // all to avoid an extra atomic load
}
}
auto version = extra->ReadWithVersion(sid, 0, this);
if (!version) {
return reinterpret_cast<VarStr *>(extra->last_batch_obj);
}
return version;
}
// TODO: Shujian: end of hack

// MVTO: mark row read timestamp
if (PriorityTxnService::g_row_rts) {
uint64_t new_rts_64 = sid >> 8;
Expand Down
12 changes: 8 additions & 4 deletions vhandle_sync.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "vhandle.h"
#include "vhandle_sync.h"
#include "priority.h"
#include "sid_info.h"

namespace felis {

Expand Down Expand Up @@ -139,8 +140,9 @@ bool SpinnerSlot::Spin(uint64_t sid, uint64_t ver, ulong &wait_cnt, volatile uin

if (unlikely((wait_cnt & 0x7FFFFFF) == 0)) {
int dep = dispatch.TraceDependency(ver);
printf("Deadlock on core %d? %lu (using %p) waiting for %lu (%d) node (%lu), ptr %p\n",
core_id, sid, routine, ver, dep, ver & 0xFF, ptr);
trace(TRACE_DEADLOCK "Deadlock ver {}, sid {}", sid_info(ver), sid_info(sid));
// printf("Deadlock on core %d? %lu (using %p) waiting for %lu (%d) node (%lu), ptr %p\n",
// core_id, sid, routine, ver, dep, ver & 0xFF, ptr);
sleep(600);
}

Expand Down Expand Up @@ -202,8 +204,10 @@ void SimpleSync::WaitForData(volatile uintptr_t *addr, uint64_t sid, uint64_t ve
wait_cnt++;
if (unlikely((wait_cnt & 0x7FFFFFF) == 0)) {
int dep = dispatch.TraceDependency(ver);
printf("Deadlock on core %d? %lu (using %p) waiting for %lu (%d) node (%lu)\n",
core_id, sid, routine, ver, dep, ver & 0xFF);
trace(TRACE_DEADLOCK "Deadlock ver {}, sid {}", sid_info(ver), sid_info(sid));

// printf("Deadlock on core %d? %lu (using %p) waiting for %lu (%d) node (%lu)\n",
// core_id, sid, routine, ver, dep, ver & 0xFF);
sleep(600);
}

Expand Down

0 comments on commit c64bd90

Please sign in to comment.