use a custom allocator with a single cache slot

This commit implements a wrapper around new/delete as the struct, Allocator. This struct caches the last thing that was deleted and uses it for allocation the next time it is called upon. In our domain, this works because (1) we do all our new/deletes in pseudo lock step (we never have two 'deletes' without a 'new' in-between) and (2) our allocations are all statically localised to threads, so we can get away without any locking in the allocator. This seems both trivial and over-engineered. However, it makes a significant practical difference. In my current work horse, long-running2, this takes the single-threaded checker from 90 seconds down to 51 and the 4-threaded checker from 117 seconds down to 85. Related to Github #6 "Multithreaded optimisation"
Smattr · Feb 19, 2018 · 533f202 · 533f202
1 parent 98613ff
commit 533f202
Show file tree

Hide file tree

Showing 2 changed files with 45 additions and 13 deletions.
diff --git a/src/librumur/resources/footer.cc b/src/librumur/resources/footer.cc
@@ -48,6 +48,7 @@ struct ThreadData {
   std::vector<std::thread> threads;
   std::atomic_bool done;
   int exit_code;
+  std::array<Allocator<State>, THREADS> allocator;
 };
 }
 
@@ -93,11 +94,11 @@ static void explore(unsigned long thread_id, ThreadData &data, StateQueue &q, St
     // Run each applicable rule on it, generating new states.
     for (const Rule &rule : RULES) {
       try {
-        for (State *next : rule.get_iterable(*s)) {
+        for (State *next : rule.get_iterable(*s, data.allocator[thread_id])) {
 
           std::pair<size_t, bool> seen_result = seen.insert(next);
           if (!seen_result.second) {
-            delete next;
+            data.allocator[thread_id].free(next);
             continue;
           }
 

diff --git a/src/librumur/resources/header.cc b/src/librumur/resources/header.cc
@@ -306,6 +306,35 @@ struct QueueLink {
 };
 }
 
+namespace {
+template<typename T>
+struct Allocator {
+
+ private:
+  T *cached = nullptr;
+
+ public:
+  T *alloc() {
+    if (cached != nullptr) {
+      T *t = cached;
+      cached = nullptr;
+      return t;
+    }
+    T *t = reinterpret_cast<T*>(new unsigned char[sizeof(T)]);
+    return t;
+  }
+
+  void free(T *t) {
+    ASSERT(cached == nullptr);
+    cached = t;
+  }
+
+  ~Allocator() {
+    delete[] reinterpret_cast<unsigned char*>(cached);
+  }
+};
+}
+
 namespace {
 template<size_t SIZE_BITS, unsigned long THREAD_COUNT>
 struct StateBase : public BitBlock,
@@ -321,8 +350,8 @@ struct StateBase : public BitBlock,
   StateBase &operator=(const StateBase&) = default;
   StateBase &operator=(StateBase&&) = default;
 
-  StateBase *duplicate() const {
-    return new StateBase(this);
+  StateBase *duplicate(Allocator<StateBase> &a) const {
+    return new(a.alloc()) StateBase(this);
   }
 
   bool operator==(const StateBase &other) const {
@@ -403,11 +432,12 @@ struct RuleBase {
    private:
     const RuleBase &rule;
     STATE_T &origin;
+    Allocator<STATE_T> *allocator;
     bool end;
 
    public:
-    iterator(const RuleBase &rule_, STATE_T &origin_, bool end_ = false):
-      rule(rule_), origin(origin_), end(end_) {
+    iterator(const RuleBase &rule_, STATE_T &origin_, Allocator<STATE_T> &allocator_, bool end_ = false):
+      rule(rule_), origin(origin_), allocator(&allocator_), end(end_) {
       if (!end && !rule.guard(origin)) {
         ++*this;
       }
@@ -431,7 +461,7 @@ struct RuleBase {
 
     STATE_T *operator*() const {
       ASSERT(!end);
-      STATE_T *d = origin.duplicate();
+      STATE_T *d = origin.duplicate(*allocator);
       rule.body(*d);
       return d;
     }
@@ -441,23 +471,24 @@ struct RuleBase {
    private:
     const RuleBase &rule;
     STATE_T &origin;
+    Allocator<STATE_T> *allocator;
 
    public:
-    iterable(const RuleBase &rule_, STATE_T &origin_):
-      rule(rule_), origin(origin_) { }
+    iterable(const RuleBase &rule_, STATE_T &origin_, Allocator<STATE_T> &allocator_):
+      rule(rule_), origin(origin_), allocator(&allocator_) { }
 
     iterator begin() const {
-      return iterator(rule, origin);
+      return iterator(rule, origin, *allocator);
     }
 
     iterator end() const {
-      return iterator(rule, origin, true);
+      return iterator(rule, origin, *allocator, true);
     }
   };
 
  public:
-  iterable get_iterable(STATE_T &origin) const {
-    return iterable(*this, origin);
+  iterable get_iterable(STATE_T &origin, Allocator<STATE_T> &allocator) const {
+    return iterable(*this, origin, allocator);
   }
 };
 }