diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index f814bb0..18f386b 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -9,7 +9,7 @@ RUN apt-get install -y \
 
 # Packages for local testing
 RUN pip install --upgrade pip setuptools wheel
-RUN pip install pytest h5py numpy
+RUN pip install pytest numpy
 
 # Packages for local build
 RUN pip install cmake ninja
@@ -25,5 +25,4 @@ RUN apt-get install -y \
 RUN apt-get install -y \
     python3-dev \
     libboost-dev \
-    libeigen3-dev \
     pybind11-dev
diff --git a/.github/workflows/build-cpp-libs.yml b/.github/workflows/build-cpp-libs.yml
index c3b3dfa..05e8cd5 100644
--- a/.github/workflows/build-cpp-libs.yml
+++ b/.github/workflows/build-cpp-libs.yml
@@ -19,16 +19,16 @@ jobs:
         include:
           - os: ubuntu-22.04
             build-type: Debug
-            system-deps: "sudo apt -y install libboost-dev libboost-math-dev libeigen3-dev"
+            system-deps: "sudo apt -y install libboost-dev libboost-math-dev"
           - os: ubuntu-22.04
             build-type: Release
-            system-deps: "sudo apt -y install libboost-dev libboost-math-dev libeigen3-dev"
+            system-deps: "sudo apt -y install libboost-dev libboost-math-dev"
           - os: macos-14
             build-type: Debug
-            system-deps: "brew install boost eigen"
+            system-deps: "brew install boost"
           - os: macos-14
             build-type: Release
-            system-deps: "brew install boost eigen"
+            system-deps: "brew install boost"
 
     steps:
 
diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml
index f80bc56..02fc999 100644
--- a/.github/workflows/build-wheels.yml
+++ b/.github/workflows/build-wheels.yml
@@ -27,12 +27,12 @@ jobs:
           - os: macos-13
             arch: x86_64
             py-vers: cp38-* cp39-* cp310-* cp311-* cp312-*
-            before-all: brew install cmake ninja boost eigen
+            before-all: brew install cmake ninja boost
             extra-env: CC="$(brew --prefix llvm@15)/bin/clang" CXX="$(brew --prefix llvm@15)/bin/clang++" HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
           - os: macos-14
             arch: arm64
             py-vers: cp39-* cp310-* cp311-* cp312-*
-            before-all: brew install cmake ninja boost eigen
+            before-all: brew install cmake ninja boost
             extra-env: CC="$(brew --prefix llvm@15)/bin/clang" CXX="$(brew --prefix llvm@15)/bin/clang++" HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
 
     env:
diff --git a/.github/workflows/ctest-macos.yml b/.github/workflows/ctest-macos.yml
index baf3af5..f0a8882 100644
--- a/.github/workflows/ctest-macos.yml
+++ b/.github/workflows/ctest-macos.yml
@@ -16,7 +16,7 @@ jobs:
         uses: actions/checkout@v2
 
       - name: install system packages
-        run: brew install boost eigen
+        run: brew install boost
 
       - name: make build directory
         run: mkdir build_dir
diff --git a/.github/workflows/ctest-ubuntu.yml b/.github/workflows/ctest-ubuntu.yml
index e7cc5c0..dcd7e2f 100644
--- a/.github/workflows/ctest-ubuntu.yml
+++ b/.github/workflows/ctest-ubuntu.yml
@@ -21,7 +21,7 @@ jobs:
       - name: install system packages
         run: |
           sudo apt -y update
-          sudo apt -y install libboost-dev libeigen3-dev
+          sudo apt -y install libboost-dev
 
       - name: make build directory
         run: mkdir build_dir
diff --git a/.github/workflows/pytest-ubuntu.yml b/.github/workflows/pytest-ubuntu.yml
index c809702..7395f96 100644
--- a/.github/workflows/pytest-ubuntu.yml
+++ b/.github/workflows/pytest-ubuntu.yml
@@ -38,7 +38,7 @@ jobs:
       - name: install system packages
         run: |
           sudo apt -y update
-          sudo apt -y install libboost-dev libeigen3-dev
+          sudo apt -y install libboost-dev
 
       - name: install python dependencies
         run: |
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 6e42dfb..a67d847 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -7,7 +7,85 @@
     "cmake.generator": "Ninja",
     "cmake.configureOnOpen": false,
     "cmake.configureSettings": {
-        //"ENABLE_TESTING": "ON",
-        //"WARNINGS_AS_ERRORS": "TRUE",
+        "ENABLE_TESTING": "ON",
+        "WARNINGS_AS_ERRORS": "TRUE"
+    },
+    "files.associations": {
+        "iosfwd": "cpp",
+        "cctype": "cpp",
+        "clocale": "cpp",
+        "cmath": "cpp",
+        "csignal": "cpp",
+        "cstdarg": "cpp",
+        "cstddef": "cpp",
+        "cstdio": "cpp",
+        "cstdlib": "cpp",
+        "cstring": "cpp",
+        "ctime": "cpp",
+        "cwchar": "cpp",
+        "cwctype": "cpp",
+        "array": "cpp",
+        "atomic": "cpp",
+        "strstream": "cpp",
+        "bit": "cpp",
+        "*.tcc": "cpp",
+        "bitset": "cpp",
+        "chrono": "cpp",
+        "codecvt": "cpp",
+        "compare": "cpp",
+        "complex": "cpp",
+        "concepts": "cpp",
+        "condition_variable": "cpp",
+        "cstdint": "cpp",
+        "deque": "cpp",
+        "forward_list": "cpp",
+        "list": "cpp",
+        "map": "cpp",
+        "set": "cpp",
+        "string": "cpp",
+        "unordered_map": "cpp",
+        "unordered_set": "cpp",
+        "vector": "cpp",
+        "exception": "cpp",
+        "algorithm": "cpp",
+        "functional": "cpp",
+        "iterator": "cpp",
+        "memory": "cpp",
+        "memory_resource": "cpp",
+        "numeric": "cpp",
+        "optional": "cpp",
+        "random": "cpp",
+        "ratio": "cpp",
+        "regex": "cpp",
+        "string_view": "cpp",
+        "system_error": "cpp",
+        "tuple": "cpp",
+        "type_traits": "cpp",
+        "utility": "cpp",
+        "fstream": "cpp",
+        "future": "cpp",
+        "initializer_list": "cpp",
+        "iomanip": "cpp",
+        "iostream": "cpp",
+        "istream": "cpp",
+        "limits": "cpp",
+        "mutex": "cpp",
+        "new": "cpp",
+        "numbers": "cpp",
+        "ostream": "cpp",
+        "semaphore": "cpp",
+        "sstream": "cpp",
+        "stdexcept": "cpp",
+        "stop_token": "cpp",
+        "streambuf": "cpp",
+        "thread": "cpp",
+        "cfenv": "cpp",
+        "cinttypes": "cpp",
+        "typeindex": "cpp",
+        "typeinfo": "cpp",
+        "valarray": "cpp",
+        "variant": "cpp",
+        "core": "cpp",
+        "geometry": "cpp"
     }
 }
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 60d9b8b..197cef7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,18 @@
-# This file is part of https://github.com/PalamaraLab/threads which is released under the GPL-3.0 license.
-# See accompanying LICENSE and COPYING for copyright notice and full details.
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 cmake_minimum_required(VERSION 3.16)
 message(STATUS "Using CMake version ${CMAKE_VERSION}")
@@ -16,9 +29,7 @@ include(cmake/ProjectSettings.cmake)
 # Link this 'library' to use the warnings specified in CompilerWarnings.cmake
 add_library(project_warnings INTERFACE)
 include(cmake/CompilerWarnings.cmake)
-
-# FIXME Uncomment to enable warnings
-#set_project_warnings(project_warnings)
+set_project_warnings(project_warnings)
 
 # Sanitiser options if supported by compiler
 include(cmake/Sanitisers.cmake)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
diff --git a/README.md b/README.md
index 7ecd6aa..1abd197 100644
--- a/README.md
+++ b/README.md
@@ -8,12 +8,9 @@ module load git/2.36.0-GCCcore-11.3.0-nodocs
 module load Python/3.10.4-GCCcore-11.3.0
 module load GSL/2.7-GCC-11.3.0
 module load Boost/1.79.0-GCC-11.3.0
-module load Eigen/3.4.0-GCCcore-11.3.0
 module load pybind11/2.9.2-GCCcore-11.3.0
 ```
 
-NB: Eigen is only used for functionality in TGEN.cpp. We can consider releasing without TGEN to have fewer dependencies.
-
 Fire up and activate a new venv:
 ```
 python -m venv venv
diff --git a/cmake/CompilerWarnings.cmake b/cmake/CompilerWarnings.cmake
index c2965c8..2adab9c 100644
--- a/cmake/CompilerWarnings.cmake
+++ b/cmake/CompilerWarnings.cmake
@@ -44,7 +44,9 @@ function(set_project_warnings project_name)
       -Woverloaded-virtual # warn if you overload (not override) a virtual function
       -Wpedantic # warn if non-standard C++ is used
       -Wconversion # warn on type conversions that may lose data
-      -Wsign-conversion # warn on sign conversions
+      # TODO replace -Wno-sign-conversion with -Wsign-conversion (ticket #26)
+      -Wno-sign-conversion # disabled until int/size_t resolved
+      #-Wsign-conversion # warn on sign conversions
       -Wnull-dereference # warn if a null dereference is detected
       -Wdouble-promotion # warn if float is implicit promoted to double
       -Wformat=2 # warn on security issues around functions that format output (ie printf)
diff --git a/setup.py b/setup.py
index 1823220..70a9b2e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,19 @@
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 # Based on https://github.com/pybind/cmake_example
 
 import os
@@ -46,7 +62,7 @@ def build_extension(self, ext):
             f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}",
             f"-DPYTHON_EXECUTABLE={sys.executable}",
             f"-DCMAKE_BUILD_TYPE={cfg}",
-            "-DWARNINGS_AS_ERRORS=OFF",
+            "-DWARNINGS_AS_ERRORS=ON",
             "-DENABLE_TESTING=OFF",
             "-DBoost_NO_BOOST_CMAKE=ON", # from arni: o/w boost 1.74 gets confused re. mtx
             "-DMAKE_DOCS=OFF"
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5dd6146..451e5c2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,5 +1,18 @@
-# This file is part of https://github.com/PalamaraLab/threads which is released under the GPL-3.0 license.
-# See accompanying LICENSE and COPYING for copyright notice and full details.
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Find Boost
 find_package(Boost REQUIRED)
@@ -14,23 +27,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     endif()
 endif()
 
-# Try to find Eigen3 version 3.4 locally installed
-find_package(Eigen3 3.4 QUIET)
-
-if(NOT Eigen3_FOUND)
-    include(FetchContent)
-    message(STATUS "Suitable local version of Eigen3 (>= 3.4) not found, fetching from repository...")
-    FetchContent_Declare(
-        Eigen3
-        GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
-        GIT_TAG        9df21dc8b4b576a7aa5c0094daa8d7e8b8be60f0  # 3.4 branch at 2024-03-15
-        GIT_SHALLOW TRUE
-    )
-    FetchContent_MakeAvailable(Eigen3)
-    set(Eigen3_VERSION "3.4")
-endif()
-message(STATUS "Found Eigen3 ${Eigen3_VERSION}")
-
 # Threads static library
 set(threads_arg_src
     Demography.cpp
@@ -77,8 +73,6 @@ target_link_libraries(threads_arg
     PRIVATE
         Boost::headers
         project_warnings
-    PUBLIC
-        Eigen3::Eigen
 )
 
 # Conditionally create python bindings
diff --git a/src/Demography.cpp b/src/Demography.cpp
index c6ca316..6c1c9c4 100644
--- a/src/Demography.cpp
+++ b/src/Demography.cpp
@@ -1,20 +1,36 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "Demography.hpp"
-#include <algorithm> // for std::sort
+
+#include <algorithm>
 #include <iostream>
 #include <math.h>
 #include <sstream>
-#include <stdexcept> // to throw errors
+#include <stdexcept>
 #include <vector>
 
-
 Demography::Demography(std::vector<double> _sizes, std::vector<double> _times)
     : times(_times), sizes(_sizes), std_times(std::vector<double>()) {
   if (times.size() != sizes.size()) {
     throw std::runtime_error("Demography times and sizes must have equal length");
   }
 
-  for (int i = 0; i < times.size(); i++) {
-    if (times[i] < 0 || sizes[i] <= 0) {
+  for (std::size_t i = 0; i < times.size(); i++) {
+    if ((times[i] < 0.0) || (sizes[i] <= 0.0)) {
       throw std::runtime_error("Demography expects non-negative times and strictly positive sizes");
     }
 
@@ -33,9 +49,9 @@ Demography::Demography(std::vector<double> _sizes, std::vector<double> _times)
   }
 
   // Compute times in standard coalescent space
-  int K = times.size();
+  std::size_t K = times.size();
   std_times.push_back(0.0);
-  for (int i = 1; i < K; i++) {
+  for (std::size_t i = 1; i < K; i++) {
     double d = (times[i] - times[i - 1]) / sizes[i - 1];
     std_times.push_back(std_times[i - 1] + d);
   }
@@ -45,27 +61,28 @@ Demography::Demography(std::vector<double> _sizes, std::vector<double> _times)
 }
 
 double Demography::std_to_gen(const double t) {
-  if (t < 0) {
-    throw std::runtime_error("Demography can only convert non-negative times to std");
+  if (t < std_times.front()) {
+    throw std::runtime_error("Demography can only convert times greater than the first entry");
   }
+
   // Find the highest i s.t. std_times[i] <= t.
-  int i =
-      std::distance(std_times.begin(), std::upper_bound(std_times.begin(), std_times.end(), t)) - 1;
+  const auto it = std::upper_bound(std_times.begin(), std_times.end(), t);
+
+  // Defensive check as the t < std_times.front check above should mean `it` is never first
+  if (it == std_times.begin()) {
+    throw std::runtime_error("Unexpected std_to_gen upper bound finding first element");
+  }
+
+  std::size_t i = static_cast<std::size_t>(it - std_times.begin() - 1);
   return times[i] + (t - std_times[i]) * sizes[i];
 }
 
-/**
- * @brief Compute the expected length of the N-th branch
- *
- * @param N
- * @return double
- */
 double Demography::expected_branch_length(const int N) {
   return std_to_gen(2. / N);
 }
 
 std::ostream& operator<<(std::ostream& os, const Demography& d) {
-  for (int i = 0; i < d.sizes.size(); i++) {
+  for (std::size_t i = 0; i < d.sizes.size(); i++) {
     std::cout << d.times[i] << " " << d.sizes[i] << " " << d.std_times[i] << std::endl;
   }
   return os;
diff --git a/src/Demography.hpp b/src/Demography.hpp
index 06efa23..f6249ee 100644
--- a/src/Demography.hpp
+++ b/src/Demography.hpp
@@ -1,30 +1,45 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_DEMOGRAPHY_HPP
 #define THREADS_ARG_DEMOGRAPHY_HPP
 
-#include <vector>
 #include <iostream>
+#include <vector>
 
-// This class is a wrapper for simple coalescence time queries under a piecewise-constant demography
+/// This class is a wrapper for simple coalescence time queries under a piecewise-constant
+/// demography
 class Demography {
 public:
-  // These are in generations
-  std::vector<double> times;
-  // These are *haploid*
-  std::vector<double> sizes;
-  // Normalised coalescence times
-  std::vector<double> std_times;
-  // Expected pairwise coalescent time
-  double expected_time = 0.0;
-
   Demography(std::vector<double> _times, std::vector<double> _sizes);
 
-  // Map a time in the standard coalescent to generations under this demography
+  /// Map a time in the standard coalescent to generations under this demography
   double std_to_gen(const double t);
-  // The expected branch length of a new branch in a tree with N leaves
+
+  /// The expected branch length of a new branch in a tree with N leaves
   double expected_branch_length(const int N);
 
-  // Output
+  /// Stream output
   friend std::ostream& operator<<(std::ostream& os, const Demography& demography);
+
+public:
+  std::vector<double> times;     ///< These are in generations
+  std::vector<double> sizes;     ///< These are *haploid*
+  std::vector<double> std_times; ///< Normalised coalescence times
+  double expected_time = 0.0;    ///< Expected pairwise coalescent time
 };
 
 #endif // THREADS_ARG_DEMOGRAPHY_HPP
diff --git a/src/HMM.cpp b/src/HMM.cpp
index bbea44a..dd48636 100644
--- a/src/HMM.cpp
+++ b/src/HMM.cpp
@@ -1,9 +1,25 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "HMM.hpp"
+
 #include <boost/math/distributions/exponential.hpp>
 #include <iostream>
 #include <vector>
 
-
 HMM::HMM(Demography demography, std::vector<double> bp_sizes, std::vector<double> cm_sizes,
          double mutation_rate, int K)
     : num_states(K) {
@@ -12,7 +28,8 @@ HMM::HMM(Demography demography, std::vector<double> bp_sizes, std::vector<double
   compute_recombination_scores(cm_sizes);
   compute_mutation_scores(bp_sizes, mutation_rate);
 
-  for (int i = 0; i < bp_sizes.size(); i++) {
+  // TODO Profile usage of std containers (ticket #25)
+  for (std::size_t i = 0; i < bp_sizes.size(); i++) {
     std::vector<double> trellis_row(num_states, 0.0);
     std::vector<unsigned short> pointer_row(num_states, 0);
     trellis.push_back(trellis_row);
@@ -21,19 +38,19 @@ HMM::HMM(Demography demography, std::vector<double> bp_sizes, std::vector<double
 }
 
 std::vector<double> HMM::compute_expected_times(Demography demography, const int K) {
-  std::vector<double> expected_times;
+  std::vector<double> result;
   double k = static_cast<double>(num_states);
   boost::math::exponential e;
 
   for (int i = 1; i <= K; i++) {
     double t = demography.std_to_gen(quantile(e, (i - 0.5) / k));
-    expected_times.push_back(t);
+    result.push_back(t);
   }
-  return expected_times;
+  return result;
 }
 
 void HMM::compute_recombination_scores(std::vector<double> cm_sizes) {
-  for (int i = 0; i < cm_sizes.size(); i++) {
+  for (std::size_t i = 0; i < cm_sizes.size(); i++) {
     non_transition_score.push_back(std::vector<double>());
     transition_score.push_back(std::vector<double>());
     for (int k = 0; k < num_states; k++) {
@@ -51,14 +68,14 @@ void HMM::compute_recombination_scores(std::vector<double> cm_sizes) {
 }
 
 void HMM::compute_mutation_scores(std::vector<double> bp_sizes, double mutation_rate) {
-  for (int i = 0; i < bp_sizes.size(); i++) {
+  for (std::size_t i = 0; i < bp_sizes.size(); i++) {
     hom_score.push_back(std::vector<double>());
     het_score.push_back(std::vector<double>());
     for (int k = 0; k < num_states; k++) {
       double t = expected_times[k];
+
       // TODO: use mean-bp sizes here as in the main algorithm
       const double l = 2. * mutation_rate * bp_sizes[i] * t;
-      const double trans = std::log1p(-std::exp(-l));
 
       // log-prob of mutating
       het_score[i].push_back(std::log1p(-std::exp(-l)));
@@ -75,17 +92,20 @@ void HMM::compute_mutation_scores(std::vector<double> bp_sizes, double mutation_
 std::vector<int> HMM::breakpoints(std::vector<bool> observations, int start) {
   // Viterbi
   // Initialize
-  int neighborhood_size = observations.size();
+  int neighborhood_size = static_cast<int>(observations.size());
   std::vector<unsigned short> z(neighborhood_size);
-  int end = start + neighborhood_size;
   for (int i = 0; i < num_states; i++) {
     double score = observations[0] ? het_score[start][i] : hom_score[start][i];
     trellis[start][i] = score;
   }
 
+  if (num_states > std::numeric_limits<unsigned short>::max()) {
+    throw std::runtime_error("Unable to store breakpoints for more than 2^16 states");
+  }
+
   // Main routine
-  double score;
-  unsigned short running_argmax;
+  double score = 0.0;
+  unsigned short running_argmax = 0;
   for (int j = 1; j < neighborhood_size; j++) {
     for (int i = 0; i < num_states; i++) {
       double running_max = 0;
@@ -98,7 +118,7 @@ std::vector<int> HMM::breakpoints(std::vector<bool> observations, int start) {
 
         if (score > running_max || k == 0) {
           running_max = score;
-          running_argmax = k;
+          running_argmax = static_cast<unsigned short>(k);
         }
       }
       trellis[j + start][i] = running_max;
@@ -113,7 +133,7 @@ std::vector<int> HMM::breakpoints(std::vector<bool> observations, int start) {
     double s = trellis[start + neighborhood_size - 1][k];
     if (s > running_max) {
       running_max = s;
-      argmax = k;
+      argmax = static_cast<unsigned short>(k);
     }
   }
 
diff --git a/src/HMM.hpp b/src/HMM.hpp
index 379ecba..0ce0201 100644
--- a/src/HMM.hpp
+++ b/src/HMM.hpp
@@ -1,32 +1,47 @@
-#ifndef DEMOGRAPHY
-#define DEMOGRAPHY
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "Demography.hpp"
-#endif // DEMOGRAPHY
 
-#include <vector>
 #include <iostream>
+#include <vector>
 
-
-// This class contains the PSMC-like algorithm used to break up segments for small-N inference
+/// This class contains the PSMC-like algorithm used to break up segments for small-N inference
 class HMM {
 public:
-  // HMM data
-  int num_states = 0;
-  std::vector<double> expected_times;
+  HMM(Demography demography, std::vector<double> bp_sizes, std::vector<double> cm_sizes,
+      double mutation_rate, int K);
+  HMM() = default;
 
-  std::vector<std::vector<double>> non_transition_score;
-  std::vector<std::vector<double>> transition_score;
-  std::vector<std::vector<double>> hom_score;
-  std::vector<std::vector<double>> het_score;
   void compute_recombination_scores(std::vector<double> cm_sizes);
   void compute_mutation_scores(std::vector<double> bp_sizes, double mutation_rate);
   std::vector<double> compute_expected_times(Demography demography, int K);
+  std::vector<int> breakpoints(std::vector<bool> observations, int start);
+
+public:
+  // HMM data
+  int num_states = 0;
+  std::vector<double> expected_times;
 
   // HMM working quantities
   std::vector<std::vector<double>> trellis;
   std::vector<std::vector<unsigned short>> pointers;
 
-  HMM(Demography demography, std::vector<double> bp_sizes, std::vector<double> cm_sizes, double mutation_rate, int K);
-  HMM() {};
-  std::vector<int> breakpoints(std::vector<bool> observations, int start);
+  std::vector<std::vector<double>> non_transition_score;
+  std::vector<std::vector<double>> transition_score;
+  std::vector<std::vector<double>> hom_score;
+  std::vector<std::vector<double>> het_score;
 };
diff --git a/src/ImputationMatcher.cpp b/src/ImputationMatcher.cpp
index 90cefd4..19e0390 100644
--- a/src/ImputationMatcher.cpp
+++ b/src/ImputationMatcher.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "ImputationMatcher.hpp"
 
 #include <algorithm>
@@ -8,33 +24,37 @@
 #include <unordered_set>
 #include <vector>
 
+// Uncomment this #define to enable a runtime check that genetic position are in order. This
+// diagnostic check is left in whilst we may have issues during development.
+// #define IMPUTATION_MATCHER_CHECK_IN_ORDER
 
 ImputationMatcher::ImputationMatcher(int _n_ref, int _n_target,
                                      const std::vector<double>& _genetic_positions,
                                      double _query_interval_size, int _neighborhood_size)
-    : num_reference(_n_ref), num_target(_n_target), genetic_positions(_genetic_positions),
-      query_interval_size(_query_interval_size), neighborhood_size(_neighborhood_size) {
+    : neighborhood_size(_neighborhood_size), num_reference(_n_ref), num_target(_n_target),
+      query_interval_size(_query_interval_size), genetic_positions(_genetic_positions) {
   if (genetic_positions.size() <= 2) {
     throw std::runtime_error("Need at least 3 sites, found " +
                              std::to_string(genetic_positions.size()));
   }
-  num_sites = genetic_positions.size();
+  num_sites = static_cast<int>(genetic_positions.size());
   num_samples = num_reference + num_target;
   sites_processed = 0;
 
-  // Check maps are strictly increasing
-  // for (int i = 0; i < num_sites - 1; i++) {
-  //   if (genetic_positions.at(i + 1) <= genetic_positions.at(i)) {
-  //     std::string prompt = "Genetic coordinates must be strictly increasing, found ";
-  //     prompt += std::to_string(genetic_positions[i + 1]) + " after " +
-  //               std::to_string(genetic_positions[i]);
-  //     throw std::runtime_error(prompt);
-  //   }
-  // }
+#ifdef IMPUTATION_MATCHER_CHECK_IN_ORDER
+  for (int i = 0; i < num_sites - 1; i++) {
+    if (genetic_positions.at(i + 1) <= genetic_positions.at(i)) {
+      std::string prompt = "Genetic coordinates must be strictly increasing, found ";
+      prompt += std::to_string(genetic_positions[i + 1]) + " after " +
+                std::to_string(genetic_positions[i]);
+      throw std::runtime_error(prompt);
+    }
+  }
+#endif // IMPUTATION_MATCHER_CHECK_IN_ORDER
 
   int query_site_idx = 1;
   double gen_pos_offset = genetic_positions[0];
-  for (int i = 0; i < genetic_positions.size(); i++) {
+  for (int i = 0; i < static_cast<int>(genetic_positions.size()); i++) {
     double cm = genetic_positions.at(i);
     if (cm > gen_pos_offset + query_interval_size * query_site_idx) {
       query_sites.push_back(i);
@@ -82,11 +102,11 @@ void ImputationMatcher::process_site(const std::vector<int>& genotype) {
     }
   }
 
-  if (genotype.size() != num_samples) {
+  if (static_cast<int>(genotype.size()) != num_samples) {
     throw std::runtime_error("invalid genotype vector size");
   }
 
-  if (next_query_site_idx < query_sites.size() &&
+  if (next_query_site_idx < static_cast<int>(query_sites.size()) &&
       sites_processed == query_sites.at(next_query_site_idx)) {
     next_query_site_idx++;
     int ref_allele_count = 0;
@@ -175,10 +195,10 @@ void ImputationMatcher::process_site(const std::vector<int>& genotype) {
   sites_processed++;
 }
 
-std::unordered_map<int, std::unordered_set<int>> ImputationMatcher::get_matches() {
+const std::unordered_map<int, std::unordered_set<int>>& ImputationMatcher::get_matches() const {
   return match_sets;
 }
 
-std::vector<int> ImputationMatcher::get_sorting() {
+const std::vector<int>& ImputationMatcher::get_sorting() const {
   return sorting;
 }
diff --git a/src/ImputationMatcher.hpp b/src/ImputationMatcher.hpp
index d2202f7..8479db8 100644
--- a/src/ImputationMatcher.hpp
+++ b/src/ImputationMatcher.hpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_IMPUTATION_MATCHER_HPP
 #define THREADS_ARG_IMPUTATION_MATCHER_HPP
 
@@ -5,23 +21,19 @@
 #include <unordered_set>
 #include <vector>
 
-// NB this recycles a lot of code from Matcher.hpp
+/// This is a version of the Threads haplotype matching algorithm
+/// adapted to be used for imputation.
+/// NB this recycles a lot of code from Matcher.hpp
 class ImputationMatcher {
+public:
+  ImputationMatcher(int _n_ref, int _n_target, const std::vector<double>& _genetic_positions,
+                    double _query_interval_size, int _neighborhood_size);
 
-private:
-  int sites_processed = 0;
-  int next_query_site_idx = 0;
-
-  // pbwt quantities
-  std::vector<int> sorting;
-  std::vector<int> next_sorting;
-
-  // querying quantities
-  std::vector<int> ref_sorting;
+  void process_site(const std::vector<int>& genotype);
+  const std::unordered_map<int, std::unordered_set<int>>& get_matches() const;
+  const std::vector<int>& get_sorting() const;
 
 public:
-  // This is a version of the Threads haplotype matching algorithm
-  // adapted to be used for imputation.
   // TODO: include a second pass through data here to get divergence values and not do that using
   // Threads-fastLS
   int neighborhood_size = 0;
@@ -33,14 +45,17 @@ class ImputationMatcher {
   double query_interval_size = 0.0;
   std::unordered_map<int, std::unordered_set<int>> match_sets;
   std::vector<double> genetic_positions;
-  ImputationMatcher(int _n_ref, int _n_target, const std::vector<double>& _genetic_positions,
-                    double _query_interval_size, int _neighborhood_size);
 
-  // Do all the work
-  void process_site(const std::vector<int>& genotype);
-  std::unordered_map<int, std::unordered_set<int>> get_matches();
+private:
+  int sites_processed = 0;
+  int next_query_site_idx = 0;
 
-  std::vector<int> get_sorting();
+  // pbwt quantities
+  std::vector<int> sorting;
+  std::vector<int> next_sorting;
+
+  // querying quantities
+  std::vector<int> ref_sorting;
 };
 
 #endif // THREADS_ARG_IMPUTATION_MATCHER_HPP
diff --git a/src/Matcher.cpp b/src/Matcher.cpp
index 733118e..a95ac05 100644
--- a/src/Matcher.cpp
+++ b/src/Matcher.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "Matcher.hpp"
 
 #include <algorithm>
@@ -8,7 +24,6 @@
 #include <unordered_set>
 #include <vector>
 
-
 // For a given interval, this contains all the matches for all the samples
 MatchGroup::MatchGroup(int _num_samples, double _cm_position)
     : num_samples(_num_samples), cm_position(_cm_position) {
@@ -24,7 +39,7 @@ MatchGroup::MatchGroup(const std::vector<int>& target_ids,
   if (target_ids.size() != matches.size()) {
     throw std::runtime_error("Inconsistent target/matches sizes");
   }
-  for (int i = 0; i < target_ids.size(); i++) {
+  for (int i = 0; i < static_cast<int>(target_ids.size()); i++) {
     match_candidates[target_ids[i]] = matches[i];
   }
 }
@@ -78,7 +93,7 @@ void MatchGroup::filter_matches(int min_matches) {
   // Then determine top 4 candidates for neighboring groups
   top_four_maps.reserve(num_samples);
   for (int i = 0; i < num_samples; i++) {
-    top_four_maps.emplace_back(std::min(4, (int) match_candidates.at(i).size()));
+    top_four_maps.emplace_back(std::min(4, static_cast<int>(match_candidates.at(i).size())));
     std::partial_sort_copy(match_candidates_counts.at(i).begin(),
                            match_candidates_counts.at(i).end(), top_four_maps.at(i).begin(),
                            top_four_maps.at(i).end(),
@@ -107,12 +122,13 @@ Matcher::Matcher(int _n, const std::vector<double>& _genetic_positions, double _
                  double _match_group_interval_size, int _neighborhood_size, int _min_matches)
     : num_samples(_n), genetic_positions(_genetic_positions),
       query_interval_size(_query_interval_size),
-      match_group_interval_size(_match_group_interval_size), neighborhood_size(_neighborhood_size), min_matches(_min_matches) {
+      match_group_interval_size(_match_group_interval_size), neighborhood_size(_neighborhood_size),
+      min_matches(_min_matches) {
   if (genetic_positions.size() <= 2) {
     throw std::runtime_error("Need at least 3 sites, found " +
                              std::to_string(genetic_positions.size()));
   }
-  num_sites = genetic_positions.size();
+  num_sites = static_cast<int>(genetic_positions.size());
   sites_processed = 0;
 
   // Check maps are strictly increasing
@@ -130,7 +146,7 @@ Matcher::Matcher(int _n, const std::vector<double>& _genetic_positions, double _
   int match_group_site_idx = 1;
   double gen_pos_offset = genetic_positions[0];
   match_group_sites = {0};
-  for (int i = 0; i < genetic_positions.size(); i++) {
+  for (int i = 0; i < static_cast<int>(genetic_positions.size()); i++) {
     double cm = genetic_positions.at(i);
     if (cm > gen_pos_offset + query_interval_size * query_site_idx) {
       query_sites.push_back(i);
@@ -162,7 +178,7 @@ Matcher::Matcher(int _n, const std::vector<double>& _genetic_positions, double _
   }
   match_group_idx = 0;
   std::cout << "Will use " << query_sites.size() << " query sites and " << match_group_sites.size()
-       << " match_group_sites" << std::endl;
+            << " match_group_sites" << std::endl;
 
   match_groups.reserve(match_group_sites.size());
   for (int match_group_site : match_group_sites) {
@@ -195,7 +211,7 @@ void Matcher::process_site(const std::vector<int>& genotype) {
   }
   int counter0 = 0;
   int counter1 = 0;
-  if (genotype.size() != num_samples) {
+  if (static_cast<int>(genotype.size()) != num_samples) {
     throw std::runtime_error("invalid genotype vector size");
   }
 
@@ -217,14 +233,14 @@ void Matcher::process_site(const std::vector<int>& genotype) {
   sorting = next_sorting;
 
   // Threading-neighbor queries
-  if (match_group_idx < match_group_sites.size() - 1 &&
-      sites_processed >= match_group_sites.at(match_group_idx + 1)) {
+  if (match_group_idx < (static_cast<int>(match_group_sites.size()) - 1) &&
+      (sites_processed >= match_group_sites.at(match_group_idx + 1))) {
     match_group_idx++;
     match_groups.at(match_group_idx - 1).filter_matches(min_matches);
   }
 
   // If we've reached a query site, query
-  if (next_query_site_idx < query_sites.size() &&
+  if (next_query_site_idx < static_cast<int>(query_sites.size()) &&
       sites_processed == query_sites.at(next_query_site_idx)) {
     // Get the arg-sort of the sorting
     for (int i = 0; i < num_samples; i++) {
@@ -238,18 +254,18 @@ void Matcher::process_site(const std::vector<int>& genotype) {
     // Insert sequences and query in order
     for (int i = 1; i < num_samples; i++) {
       std::vector<int> matches;
-      int allele = genotype.at(i);
       matches.reserve(neighborhood_size);
       auto iter = threaded.insert(permutation.at(i));
       auto iter_up = iter.first;
       auto iter_down = iter.first;
       // Check if genotypes are identical, just to be sure
-      while (matches.size() < neighborhood_size && (iter_down != threaded.begin() || iter_up != threaded.end())) {
+      while ((static_cast<int>(matches.size()) < neighborhood_size) &&
+             (iter_down != threaded.begin() || iter_up != threaded.end())) {
         if (iter_down != threaded.begin()) {
           iter_down--;
           matches.push_back(sorting.at(*iter_down));
         }
-        if (matches.size() < neighborhood_size && iter_up != threaded.end()) {
+        if (static_cast<int>(matches.size()) < neighborhood_size && iter_up != threaded.end()) {
           iter_up++;
           if (iter_up != threaded.end()) {
             matches.push_back(sorting.at(*iter_up));
@@ -273,7 +289,7 @@ void Matcher::process_site(const std::vector<int>& genotype) {
     }
 
     // Special case for last query
-    if (next_query_site_idx == query_sites.size()) {
+    if (next_query_site_idx == static_cast<int>(query_sites.size())) {
       match_groups.at(match_group_sites.size() - 1).filter_matches(min_matches);
     }
   }
@@ -282,7 +298,7 @@ void Matcher::process_site(const std::vector<int>& genotype) {
 
 // Propagate top 4 matches from left and right match groups
 void Matcher::propagate_adjacent_matches() {
-  for (int i = 1; i < match_groups.size(); i++) {
+  for (int i = 1; i < static_cast<int>(match_groups.size()); i++) {
     MatchGroup& group = match_groups.at(i);
     MatchGroup& prev = match_groups.at(i - 1);
     group.insert_tops_from(prev);
diff --git a/src/Matcher.hpp b/src/Matcher.hpp
index ff468dd..fbe5a41 100644
--- a/src/Matcher.hpp
+++ b/src/Matcher.hpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_MATCHER_HPP
 #define THREADS_ARG_MATCHER_HPP
 
@@ -5,45 +21,26 @@
 #include <unordered_set>
 #include <vector>
 
-// for a certain interval, store the matches for all samples
+/// for a certain interval, store the matches for all samples
 class MatchGroup {
 public:
-  int num_samples;
-  std::unordered_map<int, std::unordered_set<int>> match_candidates;
-  std::vector<std::unordered_map<int, int>> match_candidates_counts;
-  std::vector<std::vector<std::pair<int, int>>> top_four_maps;
-  double cm_position;
   MatchGroup(int _num_samples, double cm_position);
   MatchGroup(const std::vector<int>& target_ids,
              const std::vector<std::unordered_set<int>>& matches, const double _cm_position);
   void filter_matches(int min_matches);
   void insert_tops_from(MatchGroup& other);
   void clear();
+
+public:
+  int num_samples = 0;
+  std::unordered_map<int, std::unordered_set<int>> match_candidates;
+  std::vector<std::unordered_map<int, int>> match_candidates_counts;
+  std::vector<std::vector<std::pair<int, int>>> top_four_maps;
+  double cm_position = 0.0;
 };
 
 class Matcher {
-
-private:
-  int min_matches = 0;
-  int sites_processed = 0;
-  int next_query_site_idx = 0;
-  int match_group_idx = 0;
-  int min_match_length = 1;
-  std::vector<MatchGroup> match_groups;
-  std::vector<int> sorting;
-  std::vector<int> next_sorting;
-  std::vector<int> permutation;
-
 public:
-  int neighborhood_size = 0;
-  std::vector<int> query_sites;
-  std::vector<int> match_group_sites;
-  int num_samples = 0;
-  int num_sites = 0;
-  double query_interval_size = 0.0;
-  // matches in these groups are considered together in the hmm
-  double match_group_interval_size = 0.0;
-  std::vector<double> genetic_positions;
   Matcher(int _n, const std::vector<double>& _genetic_positions, double _query_interval_size,
           double _match_group_interval_size, int _neighborhood_size, int _min_matches);
 
@@ -51,6 +48,7 @@ class Matcher {
   void process_site(const std::vector<int>& genotype);
   void propagate_adjacent_matches();
   void clear();
+
   std::vector<MatchGroup> get_matches();
   std::vector<std::vector<std::unordered_set<int>>>
   serializable_matches(std::vector<int>& target_ids);
@@ -58,6 +56,27 @@ class Matcher {
 
   std::vector<int> get_sorting();
   std::vector<int> get_permutation();
+
+public:
+  int num_samples = 0;
+  std::vector<double> genetic_positions;
+  double query_interval_size = 0.0;
+  double match_group_interval_size = 0.0;
+  int neighborhood_size = 0;
+  std::vector<int> query_sites;
+  std::vector<int> match_group_sites;
+  int num_sites = 0;
+  // matches in these groups are considered together in the hmm
+
+private:
+  int min_matches = 0;
+  int sites_processed = 0;
+  int next_query_site_idx = 0;
+  int match_group_idx = 0;
+  std::vector<MatchGroup> match_groups;
+  std::vector<int> sorting;
+  std::vector<int> next_sorting;
+  std::vector<int> permutation;
 };
 
 #endif // THREADS_ARG_MATCHER_HPP
diff --git a/src/Node.cpp b/src/Node.cpp
index 4eb6faa..12bf8a9 100644
--- a/src/Node.cpp
+++ b/src/Node.cpp
@@ -1,8 +1,23 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "Node.hpp"
 
 #include <iostream>
 
-
 Node::Node(int _sample_ID, int _divergence, bool _genotype)
     : sample_ID(_sample_ID), divergence(_divergence), genotype(_genotype) {
 }
diff --git a/src/Node.hpp b/src/Node.hpp
index 1b228ef..1c07075 100644
--- a/src/Node.hpp
+++ b/src/Node.hpp
@@ -1,11 +1,36 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_NODE_HPP
 #define THREADS_ARG_NODE_HPP
 
 #include <array>
 #include <iostream>
 
-
 class Node {
+public:
+  // Constructors
+  Node(int sample_ID, int divergence, bool genotype);
+
+  // Node movers-arounders
+  void insert_above(Node* node);
+
+  // Output
+  friend std::ostream& operator<<(std::ostream& os, const Node& node);
+
 public:
   // Node data
   int sample_ID = 0;
@@ -18,15 +43,6 @@ class Node {
 
   // "Next below to the right" for 0 and 1
   std::array<Node*, 2> w = {nullptr, nullptr};
-
-  // Constructors
-  Node(int sample_ID, int divergence, bool genotype);
-
-  // Node movers-arounders
-  void insert_above(Node* node);
-
-  // Output
-  friend std::ostream& operator<<(std::ostream& os, const Node& node);
 };
 
 #endif // THREADS_ARG_NODE_HPP
\ No newline at end of file
diff --git a/src/State.cpp b/src/State.cpp
index 6c8125d..b726adb 100644
--- a/src/State.cpp
+++ b/src/State.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "State.hpp"
 
 #include <algorithm>
@@ -5,7 +21,6 @@
 #include <limits>
 #include <vector>
 
-
 TracebackState::TracebackState(int _site, Node* _best_prev_node, TracebackState* _prev)
     : site(_site), best_prev_node(_best_prev_node), prev(_prev) {
 }
@@ -14,7 +29,7 @@ State::State(Node* _below, double _score, TracebackState* _traceback)
     : below(_below), score(_score), traceback(_traceback) {
 }
 
-bool State::genotype() {
+bool State::genotype() const {
   return this->below->above->genotype;
 }
 
@@ -55,7 +70,7 @@ void StateBranch::prune() {
   });
 
   std::vector<State> new_states;
-  double running_min_score = std::numeric_limits<double>::infinity();
+  double running_min_score = std::numeric_limits<double>::max();
   for (const State& s : states) {
     if (s.score < running_min_score) {
       new_states.push_back(s);
@@ -81,7 +96,7 @@ void StateTree::prune() {
   }
 }
 
-std::vector<State> StateTree::dump() {
+std::vector<State> StateTree::dump() const {
   std::vector<State> states;
   for (auto pair : branches) {
     for (auto s : pair.second.states) {
diff --git a/src/State.hpp b/src/State.hpp
index 9ad796b..fcc589f 100644
--- a/src/State.hpp
+++ b/src/State.hpp
@@ -1,69 +1,85 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_STATE_HPP
 #define THREADS_ARG_STATE_HPP
 
 #include "Node.hpp"
 
-#include <vector>
 #include <unordered_map>
+#include <vector>
 
 class TracebackState {
+public:
+  TracebackState(int _site, Node* _best_prev_node, TracebackState* _prev);
+
 public:
   int site = 0;
-  // ID to trace back through to last traceback state
-  Node* best_prev_node = nullptr;
+  Node* best_prev_node = nullptr; ///< ID to trace back through to last traceback state
   TracebackState* prev = nullptr;
-
-  TracebackState(int _site, Node* _best_prev_node, TracebackState* _prev);
 };
 
 class State {
 public:
-  // Nothing here can be const because we want to use std::sort later on
-  // Panel entry directly below
-  Node* below = nullptr;
-  // Score of the current state
-  double score = 0.0;
-  // Pointer to last recombinant state
-  TracebackState* traceback = nullptr;
-  // Shorthand for this.below->site
-  bool genotype();
-
   State(Node* _below, double _score, TracebackState* _traceback);
 
+  /// Shorthand for this.below->site
+  bool genotype() const;
+
   friend std::ostream& operator<<(std::ostream& os, State& state);
+
+public:
+  // Nothing here can be const because we want to use std::sort later on
+  Node* below = nullptr; ///< Panel entry directly below
+  double score = 0.0;    ///< Score of the current state
+  TracebackState* traceback = nullptr;
 };
 
 class StatePair {
 public:
-  // Panel entry directly below
-  Node* below_a = nullptr;
-  Node* below_b = nullptr;
-  // Score of the current state
-  double score = 0.0;
-  // Pointer to last recombinant state
-  TracebackState* traceback_a = nullptr;
-  TracebackState* traceback_b = nullptr;
-
   StatePair(Node* _below_a, Node* _below_b, double _score, TracebackState* _traceback_a,
             TracebackState* _traceback_b);
 
   friend std::ostream& operator<<(std::ostream& os, StatePair& state_pair);
+
+public:
+  Node* below_a = nullptr; ///< Panel entry directly below
+  Node* below_b = nullptr;
+  double score = 0.0;                    ///< Score of the current state
+  TracebackState* traceback_a = nullptr; ///< Pointer to last recombinant state
+  TracebackState* traceback_b = nullptr;
 };
 
 class StateBranch {
 public:
-  std::vector<State> states;
   void insert(const State& state);
   void prune();
+
+public:
+  std::vector<State> states;
 };
 
 class StateTree {
 public:
-  std::unordered_map<int, StateBranch> branches;
-
   StateTree(std::vector<State>& states);
   void prune();
-  std::vector<State> dump();
+  std::vector<State> dump() const;
+
+public:
+  std::unordered_map<int, StateBranch> branches;
 };
 
 #endif // THREADS_ARG_STATE_HPP
diff --git a/src/TGEN.cpp b/src/TGEN.cpp
index f5f6d08..fdee447 100644
--- a/src/TGEN.cpp
+++ b/src/TGEN.cpp
@@ -1,8 +1,23 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "TGEN.hpp"
 
 #include "TgenSegment.hpp"
 
-#include <Eigen/Core>
 #include <boost/icl/separate_interval_set.hpp>
 
 #include <limits>
@@ -10,7 +25,6 @@
 #include <utility>
 #include <vector>
 
-
 namespace boost::icl {
 // See
 // https://www.boost.org/doc/libs/1_81_0/libs/icl/doc/html/boost_icl/examples/custom_interval.html
@@ -52,15 +66,12 @@ class TGENImpl {
   std::vector<SegmentSet> interval_sets;
 
   std::unordered_map<int, int> pos_idx_map;
-  Eigen::VectorXi reference_genome;
   std::vector<bool> reference_genome_vec;
   std::vector<std::vector<int>> bp_starts;
   std::vector<std::vector<int>> target_IDs;
   std::vector<std::vector<int>> het_sites;
   std::vector<int> positions;
   std::vector<std::vector<bool>> genotypes;
-
-  Eigen::MatrixXi genotype_cache;
   std::unordered_map<int, int> cached_genotypes_map;
 
   // Constructor
@@ -70,27 +81,23 @@ class TGENImpl {
         het_sites(std::move(_het_sites)), positions(std::move(_positions)) {
     positions.push_back(std::numeric_limits<int>::max());
     // position-to-site map
-    for (int i = 0; i < positions.size(); i++) {
+    for (int i = 0; i < static_cast<int>(positions.size()); i++) {
       pos_idx_map[positions[i]] = i;
     }
 
-    // set reference genome here
-    reference_genome = Eigen::VectorXi::Zero(positions.size());
     reference_genome_vec = std::vector<bool>(positions.size(), false);
     for (int h : het_sites[0]) {
-      reference_genome[pos_idx_map.at(h)] = 1;
       reference_genome_vec[pos_idx_map.at(h)] = true;
     }
 
     interval_sets.reserve(bp_starts.size());
     // Initialize interval maps for each sample. This can get too slow
-    for (int i = 0; i < bp_starts.size(); i++) {
+    for (std::size_t i = 0; i < bp_starts.size(); i++) {
       SegmentSet iset;
-      int n_segs = bp_starts[i].size();
+      int n_segs = static_cast<int>(bp_starts[i].size());
       std::vector<int>& sample_hets = het_sites[i];
-      int n_hets = sample_hets.size();
+      int n_hets = static_cast<int>(sample_hets.size());
       int het_site_idx = 0;
-      int pos_idx = 0;
       std::vector<int> seg_hets;
       for (int j = 0; j < n_segs; j++) {
         int seg_start = bp_starts[i][j];
@@ -116,84 +123,7 @@ class TGENImpl {
     cached_genotypes_map[0] = -1;
   }
 
-  // Eigen-based query
-//  Eigen::MatrixXi& query(const int bp_from, const int bp_to, const std::vector<int>& samples) {
-//    clear_cache();
-
-// Deprecated eigen-based query
-// Eigen::MatrixXi& TGEN::query(const int bp_from, const int bp_to, const std::vector<int>& samples) {
-//   clear_cache();
-
-//   // Find number of expected sites
-//   int start_pos = *std::lower_bound(positions.begin(), positions.end(), bp_from);
-//   int end_pos = *std::upper_bound(positions.begin(), positions.end(), bp_to);
-//   int idx_offset = pos_idx_map[start_pos];
-//   genotype_cache.resize(samples.size(), pos_idx_map[end_pos] - idx_offset);
-
-//   TgenSegment range(start_pos, end_pos);
-
-//   for (int i = 0; i < samples.size(); i++) {
-//     cached_genotypes_map[samples[i]] = i;
-//     if (samples[i] == 0) {
-//       auto insert_range =
-//           Eigen::seq(0, pos_idx_map[end_pos] - idx_offset - 1); // eigen seq is inclusive
-//       auto copy_range = Eigen::seq(idx_offset, pos_idx_map[end_pos] - 1);
-//       genotype_cache(i, insert_range) = reference_genome(
-//           copy_range); //.eval(); //WARNING need .eval() here (or do we? I don't think we do)
-//     }
-//     else {
-//       SegmentSet& segments(interval_sets[samples[i]]);
-
-//       // Initialize the queue
-//       std::queue<TgenSegment> seg_queue;
-//       auto eqr = segments.equal_range(range);
-//       for (SegmentSet::const_iterator iter = eqr.first; iter != eqr.second; iter++) {
-//         seg_queue.push(*iter & range);
-//       }
-
-//       // Process everything in the queue
-//       while (!seg_queue.empty()) {
-//         TgenSegment& segment = seg_queue.front();
-//         if (cached_genotypes_map.find(segment.target) != cached_genotypes_map.end()) {
-//           // We've reached somewhere along the tree where we can copy from
-//           int seg_start_idx = pos_idx_map[segment.lower()];
-//           int seg_end_idx = pos_idx_map[segment.upper()];
-//           auto insert_range = Eigen::seq(
-//               seg_start_idx - idx_offset, seg_end_idx - idx_offset - 1); // eigen seq is inclusive
-//           if (segment.target == 0) {
-//             auto copy_range = Eigen::seq(seg_start_idx, seg_end_idx - 1);
-//             // We've reached the root of the tree and copy from the "reference" genome
-//             genotype_cache(i, insert_range) = reference_genome(copy_range);
-//           }
-//           else {
-//             // We've found a cached genotype to copy from
-
-//             genotype_cache(i, insert_range) =
-//                 genotype_cache(cached_genotypes_map[segment.target], insert_range);
-//           }
-
-//           // We then flip all the het sites
-//           for (int h : segment.het_sites) {
-//             genotype_cache(i, pos_idx_map[h] - idx_offset) =
-//                 1 - genotype_cache(i, pos_idx_map[h] - idx_offset);
-//           }
-//         }
-//         else {
-//           // We've not yet reached somewhere to copy from, so we keep traversing
-//           auto new_eqr = interval_sets[segment.target].equal_range(segment);
-//           for (SegmentSet::const_iterator iter = new_eqr.first; iter != new_eqr.second; iter++) {
-//             seg_queue.push(*iter & segment);
-//           }
-//         }
-//         seg_queue.pop();
-//       }
-//     }
-//   }
-//   return genotype_cache;
-// }
-
-// std::vector-based query
-// Warning: This makes a copy when returned through the python interface
+  // Warning: This makes a copy when returned through the python interface
   std::vector<std::vector<bool>>& query(const int bp_from, const int bp_to,
                                         const std::vector<int>& samples) {
     genotypes.clear();
@@ -203,15 +133,14 @@ class TGENImpl {
     int end_pos = *std::upper_bound(positions.begin(), positions.end(), bp_to);
     int idx_offset = pos_idx_map[start_pos];
 
-    int n_samples = samples.size();
     int n_sites = pos_idx_map[end_pos] - idx_offset;
     genotypes.reserve(samples.size());
-    for (int i = 0; i < samples.size(); i++) {
+    for (std::size_t i = 0; i < samples.size(); i++) {
       genotypes.push_back(std::vector<bool>(n_sites));
     }
     TgenSegment range(start_pos, end_pos);
 
-    for (int i = 0; i < samples.size(); i++) {
+    for (int i = 0; i < static_cast<int>(samples.size()); i++) {
       std::vector<bool>& current_gt = genotypes.at(i);
       cached_genotypes_map[samples[i]] = i;
       if (samples[i] == 0) {
@@ -225,7 +154,7 @@ class TGENImpl {
         std::queue<TgenSegment> seg_queue;
         auto eqr = segments.equal_range(range);
         for (SegmentSet::const_iterator iter = eqr.first; iter != eqr.second; iter++) {
-          seg_queue.push(*iter & range);
+          seg_queue.push(iter->calc_intersection_with(range));
         }
 
         // Process everything in the queue
@@ -259,7 +188,7 @@ class TGENImpl {
             // We've not yet reached somewhere to copy from, so we keep traversing
             auto new_eqr = interval_sets[segment.target].equal_range(segment);
             for (SegmentSet::const_iterator iter = new_eqr.first; iter != new_eqr.second; iter++) {
-              seg_queue.push(*iter & segment);
+              seg_queue.push(iter->calc_intersection_with(segment));
             }
           }
           seg_queue.pop();
@@ -271,7 +200,6 @@ class TGENImpl {
 
   void clear_cache() {
     cached_genotypes_map.clear();
-    genotype_cache.resize(0, 0);
     cached_genotypes_map[0] = -1;
   }
 };
@@ -279,11 +207,15 @@ class TGENImpl {
 TGEN::TGEN(std::vector<int> _positions, std::vector<std::vector<int>> _bp_starts,
            std::vector<std::vector<int>> _target_IDs, std::vector<std::vector<int>> _het_sites)
     : pimpl(std::make_unique<TGENImpl>(std::move(_positions), std::move(_bp_starts),
-                                       std::move(_target_IDs), std::move(_het_sites))) {}
+                                       std::move(_target_IDs), std::move(_het_sites))) {
+}
 
-TGEN::~TGEN() = default;
+TGEN::~TGEN() {
+  // Empty destructor is declared in source rather than header so pimpl destructor is accessible
+}
 
-std::vector<std::vector<bool>>& TGEN::query(int start_pos, int end_pos, const std::vector<int>& samples) {
+std::vector<std::vector<bool>>& TGEN::query(int start_pos, int end_pos,
+                                            const std::vector<int>& samples) {
   // Forward the call to pimpl
   return pimpl->query(start_pos, end_pos, samples);
 }
diff --git a/src/TGEN.hpp b/src/TGEN.hpp
index 1a886d1..79b9c2a 100644
--- a/src/TGEN.hpp
+++ b/src/TGEN.hpp
@@ -1,32 +1,42 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_TGEN_HPP
 #define THREADS_ARG_TGEN_HPP
 
-#include <Eigen/Dense>
 #include <limits>
 #include <memory>
 #include <vector>
 
-
-class TGENImpl; // Forward declaration of the implementation class
+// Forward declaration of the implementation class
 // Using PIMPL idiom to prevent libraries that #include "TGEN.hpp" needing to see boost/icl headers
+class TGENImpl;
 
 class TGEN {
-
-private:
-  std::unique_ptr<TGENImpl> pimpl; // Pointer to implementation
-
 public:
-  // Constructors
   TGEN(std::vector<int> _positions, std::vector<std::vector<int>> _bp_starts,
        std::vector<std::vector<int>> _target_IDs, std::vector<std::vector<int>> _het_sites);
+  ~TGEN();
 
-  ~TGEN(); // Define the destructor for proper deletion of pimpl
-
-  std::vector<std::vector<bool>>& query(int start_pos, int end_pos, const std::vector<int>& samples);
-
-  // Deprecated Eigen version:
-  // Eigen::MatrixXi& query(const int start_pos, const int end_pos, const std::vector<int>& samples);
+  std::vector<std::vector<bool>>& query(int start_pos, int end_pos,
+                                        const std::vector<int>& samples);
   void clear_cache();
+
+private:
+  std::unique_ptr<TGENImpl> pimpl; ///< Pointer to implementation
 };
 
 #endif // THREADS_ARG_TGEN_HPP
diff --git a/src/TgenSegment.hpp b/src/TgenSegment.hpp
index 8c18d20..1a89c84 100644
--- a/src/TgenSegment.hpp
+++ b/src/TgenSegment.hpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_TGEN_SEGMENT_HPP
 #define THREADS_ARG_TGEN_SEGMENT_HPP
 
@@ -9,25 +25,22 @@
 // Here is a typical class that may model intervals in your application.
 class TgenSegment {
 public:
-  TgenSegment() : _first(), _past(), target(std::numeric_limits<int>::max()) {
-  }
-  TgenSegment(const TgenSegment& other)
-      : _first(other._first), _past(other._past), het_sites(other.het_sites), target(other.target) {
+  TgenSegment() : target(std::numeric_limits<int>::max()), first(), past() {
   }
-  TgenSegment(int lo, int up) : _first(lo), _past(up), target(std::numeric_limits<int>::max()) {
+
+  TgenSegment(int lo, int up) : target(std::numeric_limits<int>::max()), first(lo), past(up) {
   }
+
   TgenSegment(int lo, int up, std::vector<int> _hets, int _target)
-      : _first(lo), _past(up), het_sites(_hets), target(_target) {
+      : het_sites(_hets), target(_target), first(lo), past(up) {
   }
 
-  std::vector<int> het_sites;
-  int target = std::numeric_limits<int>::max();
-
   [[nodiscard]] int lower() const {
-    return _first;
+    return first;
   }
+
   [[nodiscard]] int upper() const {
-    return _past;
+    return past;
   }
 
   friend std::ostream& operator<<(std::ostream& os, const TgenSegment& seg) {
@@ -40,7 +53,7 @@ class TgenSegment {
     return os;
   }
 
-  TgenSegment operator&(const TgenSegment& other) const {
+  TgenSegment calc_intersection_with(const TgenSegment& other) const {
     int new_first = std::max(lower(), other.lower());
     int new_past = std::min(upper(), other.upper());
 
@@ -53,9 +66,13 @@ class TgenSegment {
     return {new_first, new_past, merged_sites, std::min(target, other.target)};
   }
 
+public:
+  std::vector<int> het_sites;
+  int target = std::numeric_limits<int>::max();
+
 private:
-  int _first = 0;
-  int _past = 0;
+  int first = 0;
+  int past = 0;
 };
 
 #endif // THREADS_ARG_TGEN_SEGMENT_HPP
diff --git a/src/ThreadsFastLS.cpp b/src/ThreadsFastLS.cpp
index c9a1016..1f81f5e 100644
--- a/src/ThreadsFastLS.cpp
+++ b/src/ThreadsFastLS.cpp
@@ -1,4 +1,21 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "ThreadsFastLS.hpp"
+
 #include <algorithm>
 #include <boost/math/special_functions/gamma.hpp>
 #include <cassert>
@@ -13,18 +30,31 @@
 #include <unordered_set>
 #include <vector>
 
+// Uncomment this #define to enable a runtime check that genetic position are in order. This
+// diagnostic check is left in whilst we may have issues during development.
+// #define THREADS_FAST_LS_CHECK_IN_ORDER
+
+namespace {
+
+const int END_ALLELE = 0;
+const int HMM_SPLIT_THRESHOLD = 1000;
 
-int END_ALLELE = 0;
+inline std::size_t pair_key(int i, int j) {
+  return (static_cast<std::size_t>(i) << 32) | static_cast<std::size_t>(j);
+}
 
-ThreadsFastLS::ThreadsFastLS(std::vector<double> _physical_positions, std::vector<double> _genetic_positions,
-                 double _mutation_rate, std::vector<double> ne, std::vector<double> ne_times,
-                 bool _sparse_sites,
-                 int _n_prune, // Threshold for pruning states in the tdpbwt algorithm
-                 bool _use_hmm, int _burn_in_left, int _burn_in_right)
-    : physical_positions(_physical_positions), genetic_positions(_genetic_positions),
-      mutation_rate(_mutation_rate), demography(Demography(ne, ne_times)),
-      sparse_sites(_sparse_sites), n_prune(_n_prune), use_hmm(_use_hmm),
-      burn_in_left(_burn_in_left), burn_in_right(_burn_in_right) {
+} // namespace
+
+ThreadsFastLS::ThreadsFastLS(std::vector<double> _physical_positions,
+                             std::vector<double> _genetic_positions, double _mutation_rate,
+                             std::vector<double> ne, std::vector<double> ne_times,
+                             bool _sparse_sites,
+                             int _n_prune, // Threshold for pruning states in the tdpbwt algorithm
+                             bool _use_hmm, int _burn_in_left, int _burn_in_right)
+    : n_prune(_n_prune), mutation_rate(_mutation_rate), burn_in_left(_burn_in_left),
+      burn_in_right(_burn_in_right), sparse_sites(_sparse_sites), use_hmm(_use_hmm),
+      physical_positions(_physical_positions), genetic_positions(_genetic_positions),
+      demography(Demography(ne, ne_times)) {
   if (physical_positions.size() != genetic_positions.size()) {
     std::cerr << "Map lengths don't match.\n";
     exit(1);
@@ -37,26 +67,26 @@ ThreadsFastLS::ThreadsFastLS(std::vector<double> _physical_positions, std::vecto
     std::cerr << "Need a strictly positive mutation rate.\n";
     exit(1);
   }
-  num_sites = physical_positions.size();
+  num_sites = static_cast<int>(physical_positions.size());
   num_samples = 0;
 
-  // Check maps are strictly increasing
-  // for (int i = 0; i < num_sites - 1; i++) {
-  //   if (physical_positions[i + 1] <= physical_positions[i]) {
-  //     cerr << "Physical positions must be strictly increasing, found ";
-  //     cerr << physical_positions[i + 1] << " after " << physical_positions[i] << endl;
-  //     exit(1);
-  //   }
-  //   if (genetic_positions[i + 1] <= genetic_positions[i]) {
-  //     cerr << "Genetic coordinates must be strictly increasing, found ";
-  //     cerr << genetic_positions[i + 1] << " after " << genetic_positions[i] << endl;
-  //     exit(1);
-  //   }
-  // }
+#ifdef THREADS_FAST_LS_CHECK_IN_ORDER
+  for (int i = 0; i < num_sites - 1; i++) {
+    if (physical_positions[i + 1] <= physical_positions[i]) {
+      cerr << "Physical positions must be strictly increasing, found ";
+      cerr << physical_positions[i + 1] << " after " << physical_positions[i] << endl;
+      exit(1);
+    }
+    if (genetic_positions[i + 1] <= genetic_positions[i]) {
+      cerr << "Genetic coordinates must be strictly increasing, found ";
+      cerr << genetic_positions[i + 1] << " after " << genetic_positions[i] << endl;
+      exit(1);
+    }
+  }
+#endif // THREADS_FAST_LS_CHECK_IN_ORDER
 
   // Initialize map burn-in
   threading_start = physical_positions.front() + burn_in_left;
-  // threading_end = physical_positions.back() - burn_in_right + 1;
   threading_end = physical_positions.back() - burn_in_right;
   trim_pos_start_idx = 0;
   for (int i = 0; i < num_sites; i++) {
@@ -78,7 +108,8 @@ ThreadsFastLS::ThreadsFastLS(std::vector<double> _physical_positions, std::vecto
     }
   }
   if (trim_pos_start_idx >= trim_pos_end_idx - 3) {
-    std::cerr << "Too few positions left after applying burn-in, need at least 3. Aborting." << std::endl;
+    std::cerr << "Too few positions left after applying burn-in, need at least 3. Aborting."
+              << std::endl;
     exit(1);
   }
 
@@ -114,15 +145,15 @@ ThreadsFastLS::ThreadsFastLS(std::vector<double> _physical_positions, std::vecto
 std::tuple<std::vector<double>, std::vector<double>>
 ThreadsFastLS::site_sizes(std::vector<double> positions) {
   // Find mid-points between sites
-  int M = positions.size();
+  std::size_t M = positions.size();
   std::vector<double> pos_means(M - 1);
-  for (int i = 0; i < M - 1; i++) {
+  for (std::size_t i = 0; i < M - 1; i++) {
     pos_means[i] = (positions[i] + positions[i + 1]) / 2.;
   }
   // Find the mean size of mid-point differences
   std::vector<double> site_sizes(M);
   // Mid-point deltas tell us about the area around each site
-  for (int i = 1; i < M - 1; i++) {
+  for (std::size_t i = 1; i < M - 1; i++) {
     site_sizes[i] = (pos_means[i] - pos_means[i - 1]);
   }
   double mean_size = (pos_means[M - 2] - pos_means[0]) / double(M - 2);
@@ -137,16 +168,13 @@ ThreadsFastLS::site_sizes(std::vector<double> positions) {
   std::vector<double> boundaries(M + 1);
   boundaries[0] = positions[0];
   boundaries[M] = positions[M - 1];
-  for (int i = 1; i < M; i++) {
+  for (std::size_t i = 1; i < M; i++) {
     boundaries[i] = pos_means[i - 1];
   }
   return std::tuple(boundaries, site_sizes);
 }
 
-/**
- *
- */
-std::vector<double> ThreadsFastLS::trimmed_positions() {
+std::vector<double> ThreadsFastLS::trimmed_positions() const {
   std::vector<double> trim_pos = {physical_positions.cbegin() + trim_pos_start_idx,
                                   physical_positions.cbegin() + trim_pos_end_idx};
   return trim_pos;
@@ -159,14 +187,6 @@ void ThreadsFastLS::delete_hmm() {
   }
 }
 
-/**
- * @brief Find the next insert position
- *
- * @param t Pointer to node below the sequence being inserted at site i
- * @param g Allele at site i+1
- * @param i The site
- * @return Node* Pointer to node below the sequence being inserted at site i+1
- */
 Node* ThreadsFastLS::extend_node(Node* t, bool g, int i) {
   Node* t_next;
   if (!g && t->w[g]->sample_ID == -1) {
@@ -183,18 +203,13 @@ Node* ThreadsFastLS::extend_node(Node* t, bool g, int i) {
 void ThreadsFastLS::insert(const std::vector<bool>& genotype) {
   insert(num_samples, genotype);
 }
-/**
- * @brief Insert a new sequence into the dynamic panel
- *
- * @param genotype
- */
 void ThreadsFastLS::insert(const int ID, const std::vector<bool>& genotype) {
 
   if (ID_map.find(ID) != ID_map.end()) {
     std::cerr << "ID " << ID << " is already in the panel.\n";
     exit(1);
   }
-  if (genotype.size() != num_sites) {
+  if (static_cast<int>(genotype.size()) != num_sites) {
     std::cerr << "Number of input markers does not match map.\n";
     exit(1);
   }
@@ -207,7 +222,7 @@ void ThreadsFastLS::insert(const int ID, const std::vector<bool>& genotype) {
   panel.emplace_back(std::vector<std::unique_ptr<Node>>(num_sites + 1));
 
   Node* t0 = bottoms[0].get();
-  panel[ID_map.at(ID)][0] = std::move(std::make_unique<Node>(ID, 0, genotype[0]));
+  panel[ID_map.at(ID)][0] = std::make_unique<Node>(ID, 0, genotype[0]);
   Node* z0 = panel[ID_map.at(ID)][0].get();
 
   // Inserts z0 above t0
@@ -223,7 +238,7 @@ void ThreadsFastLS::insert(const int ID, const std::vector<bool>& genotype) {
     bool g_k = genotype[k];
     bool next_genotype = (k == num_sites - 1) ? END_ALLELE : genotype[k + 1];
     // Add current thingy to panel
-    panel[ID_map.at(ID)][k + 1] = std::move(std::make_unique<Node>(ID, k + 1, next_genotype));
+    panel[ID_map.at(ID)][k + 1] = std::make_unique<Node>(ID, k + 1, next_genotype);
     z_next = panel[ID_map.at(ID)][k + 1].get();
     tmp = z_k->above;
     while (tmp->sample_ID != -1 && tmp->genotype != g_k) {
@@ -272,12 +287,6 @@ void ThreadsFastLS::insert(const int ID, const std::vector<bool>& genotype) {
   }
 }
 
-/**
- * @brief Deletes sequence ID from the dynamic panel. This moves the last sequence in the panel
- * to the position ID held. See alg 5 from d-PBWT paper.
- *
- * @param ID
- */
 void ThreadsFastLS::remove(int ID) {
   Node* s = panel[ID_map.at(ID)][0].get();
   // The last sequence in the panel
@@ -322,11 +331,7 @@ void ThreadsFastLS::remove(int ID) {
   num_samples--;
 }
 
-/**
- * @brief For debugging: print the sample-IDs of the arrayified panel.
- *
- */
-void ThreadsFastLS::print_sorting() {
+void ThreadsFastLS::print_sorting() const {
   for (int j = 0; j < num_sites + 1; ++j) {
     Node* node = tops[j].get();
     while (node != nullptr) {
@@ -339,13 +344,8 @@ void ThreadsFastLS::print_sorting() {
   }
 }
 
-/**
- * Run Li-Stephens on input haplotype *without* inserting into the dynamic panel.
- * See also Algorithm 4 of Lunter (2018), Bioinformatics.
- * For imputation we use the IMPUTE/Beagle mutation penalties
- */
 std::pair<TracebackState*, Node*> ThreadsFastLS::fastLS(const std::vector<bool>& genotype,
-                                                  bool imputation) {
+                                                        bool imputation) {
   // Get mutation/recombination penalties;
   std::vector<double> mu;
   std::vector<double> mu_c;
@@ -373,12 +373,11 @@ std::pair<TracebackState*, Node*> ThreadsFastLS::fastLS(const std::vector<bool>&
   // z holds the best current score
   double z;
   int max_states = 0;
-  bool allele;
   State best_extension = current_states.back();
 
   for (int i = 0; i < num_sites; i++) {
     bool allele = genotype[i];
-    int n_states = current_states.size();
+    int n_states = static_cast<int>(current_states.size());
     max_states = std::max(n_states, max_states);
     if (n_states == 0) {
       std::cerr << "No states left on stack, something is messed up in the algorithm.\n";
@@ -449,8 +448,7 @@ std::pair<TracebackState*, Node*> ThreadsFastLS::fastLS(const std::vector<bool>&
     }
 
     // Pruning is turned off by default
-    if (n_prune >= 0 && i % 100 == 0 && new_states.size() >= n_prune) {
-      int old_size = new_states.size();
+    if ((n_prune >= 0) && (i % 100 == 0) && (static_cast<int>(new_states.size()) >= n_prune)) {
       StateTree tree = StateTree(new_states);
       tree.prune();
       current_states = tree.dump();
@@ -467,18 +465,14 @@ std::pair<TracebackState*, Node*> ThreadsFastLS::fastLS(const std::vector<bool>&
                          [](const auto& s1, const auto& s2) { return s1.score < s2.score; }));
 
   if ((num_samples + 1) % 100 == 0) {
-    std::cout << "Found best path with score " << min_state.score << " for sequence " << num_samples + 1;
+    std::cout << "Found best path with score " << min_state.score << " for sequence "
+              << num_samples + 1;
     std::cout << ", using a maximum of " << max_states << " states.\n";
   }
 
   return std::pair<TracebackState*, Node*>(min_state.traceback, min_state.below->above);
 }
 
-/**
- * Run Li-Stephens on input diplotype *without* inserting into the dynamic panel.
- * See also Algorithm 4 of Lunter (2018), Bioinformatics.
- * Warning: The code here is more verbose than it has to be
- **/
 std::array<std::pair<TracebackState*, Node*>, 2>
 ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
   // Get mutation/recombination penalties;
@@ -506,7 +500,6 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
   // z holds the best current score for pairs and individual sequences
   double z;
   int max_state_pairs = 0;
-  bool allele;
   StatePair best_pair = current_pairs.back();
   bool extensible_a0;
   bool extensible_a1;
@@ -516,7 +509,7 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
   // Just like haploid, we iterate through sites
   for (int i = 0; i < num_sites; i++) {
     int allele = genotype[i];
-    int n_state_pairs = current_pairs.size();
+    int n_state_pairs = static_cast<int>(current_pairs.size());
     std::vector<StatePair> new_pairs;
     max_state_pairs = std::max(n_state_pairs, max_state_pairs);
     if (n_state_pairs == 0) {
@@ -571,14 +564,14 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
     }
 
     // Set local minima, this maps (anchor, traceback) to a score
-    std::unordered_map<size_t, double> local_min;
-    std::unordered_map<size_t, bool> extmap_0;
-    std::unordered_map<size_t, bool> extmap_1;
+    std::unordered_map<std::size_t, double> local_min;
+    std::unordered_map<std::size_t, bool> extmap_0;
+    std::unordered_map<std::size_t, bool> extmap_1;
 
     for (StatePair& p : current_pairs) {
-      size_t key_a = pair_key(p.below_a->above->sample_ID, p.traceback_a->site);
-      size_t key_b = pair_key(p.below_b->above->sample_ID, p.traceback_b->site);
-      double z_pair;
+      std::size_t key_a = pair_key(p.below_a->above->sample_ID, p.traceback_a->site);
+      std::size_t key_b = pair_key(p.below_b->above->sample_ID, p.traceback_b->site);
+      double z_pair = std::numeric_limits<double>::max();
 
       // Set/get extensibility
       if (extmap_0.count(key_a)) {
@@ -649,6 +642,10 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
           z_pair = p.score + 2 * mutation_cost;
         }
       }
+      else {
+        throw std::runtime_error("Illegal genotype value " + std::to_string(allele));
+      }
+
       if (!local_min.count(key_a)) {
         // Set local minima
         local_min[key_a] = z_pair;
@@ -665,16 +662,14 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
     }
 
     // START OF MAIN EXTENSION LOOP
-    std::unordered_map<size_t, double> new_local_min;
+    std::unordered_map<std::size_t, double> new_local_min;
     for (StatePair& p : current_pairs) {
       bool extended = false;
-      size_t key_a = pair_key(p.below_a->above->sample_ID, p.traceback_a->site);
-      size_t key_b = pair_key(p.below_b->above->sample_ID, p.traceback_b->site);
+      std::size_t key_a = pair_key(p.below_a->above->sample_ID, p.traceback_a->site);
+      std::size_t key_b = pair_key(p.below_b->above->sample_ID, p.traceback_b->site);
       double z_a = local_min.at(key_a);
       double z_b = local_min.at(key_b);
 
-      Node* a_next;
-      Node* b_next;
       extensible_a0 = extmap_0.at(key_a);
       extensible_a1 = extmap_1.at(key_a);
       extensible_b0 = extmap_0.at(key_b);
@@ -687,8 +682,8 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
       if (case1_cost <= std::min({z_a + rho_delta, z_b + rho_delta, z + 2 * rho_delta})) {
         if (allele == 0) {
           if (extensible_a0 && extensible_b0) {
-            a_next = extend_node(p.below_a, 0, i);
-            b_next = extend_node(p.below_b, 0, i);
+            Node* a_next = extend_node(p.below_a, 0, i);
+            Node* b_next = extend_node(p.below_b, 0, i);
             new_pairs.emplace_back(a_next, b_next, case1_cost, p.traceback_a, p.traceback_b);
             added_1a.push_back(a_next);
             added_1b.push_back(b_next);
@@ -697,16 +692,16 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
         }
         else if (allele == 1) {
           if (extensible_a0 && extensible_b1) {
-            a_next = extend_node(p.below_a, 0, i);
-            b_next = extend_node(p.below_b, 1, i);
+            Node* a_next = extend_node(p.below_a, 0, i);
+            Node* b_next = extend_node(p.below_b, 1, i);
             new_pairs.emplace_back(a_next, b_next, case1_cost, p.traceback_a, p.traceback_b);
             added_1a.push_back(a_next);
             added_1b.push_back(b_next);
             extended = true;
           }
           if (extensible_a1 && extensible_b0) {
-            a_next = extend_node(p.below_a, 1, i);
-            b_next = extend_node(p.below_b, 0, i);
+            Node* a_next = extend_node(p.below_a, 1, i);
+            Node* b_next = extend_node(p.below_b, 0, i);
             new_pairs.emplace_back(a_next, b_next, case1_cost, p.traceback_a, p.traceback_b);
             added_1a.push_back(a_next);
             added_1b.push_back(b_next);
@@ -715,8 +710,8 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
         }
         else {
           if (extensible_a1 && extensible_b1) {
-            a_next = extend_node(p.below_a, 1, i);
-            b_next = extend_node(p.below_b, 1, i);
+            Node* a_next = extend_node(p.below_a, 1, i);
+            Node* b_next = extend_node(p.below_b, 1, i);
             new_pairs.emplace_back(a_next, b_next, case1_cost, p.traceback_a, p.traceback_b);
             added_1a.push_back(a_next);
             added_1b.push_back(b_next);
@@ -729,7 +724,7 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
           local_min[key_a] = std::min(z_a, case1_cost);
           local_min[key_b] = std::min(z_b, case1_cost);
           for (auto a1 : added_1a) {
-            size_t new_key_a = pair_key(a1->above->sample_ID, p.traceback_a->site);
+            std::size_t new_key_a = pair_key(a1->above->sample_ID, p.traceback_a->site);
             if (new_local_min.count(new_key_a)) {
               new_local_min[new_key_a] = std::min(new_local_min.at(new_key_a), case1_cost);
             }
@@ -738,7 +733,7 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
             }
           }
           for (auto b1 : added_1b) {
-            size_t new_key_b = pair_key(b1->above->sample_ID, p.traceback_b->site);
+            std::size_t new_key_b = pair_key(b1->above->sample_ID, p.traceback_b->site);
             if (new_local_min.count(new_key_b)) {
               new_local_min[new_key_b] = std::min(new_local_min.at(new_key_b), case1_cost);
             }
@@ -797,7 +792,7 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
           local_min[key_a] = std::min(local_min.at(key_a), case2_cost);
           local_min[key_b] = std::min(local_min.at(key_b), case2_cost);
           for (auto a2 : added_2a) {
-            size_t new_key_a = pair_key(a2->above->sample_ID, p.traceback_a->site);
+            std::size_t new_key_a = pair_key(a2->above->sample_ID, p.traceback_a->site);
             if (new_local_min.count(new_key_a)) {
               new_local_min[new_key_a] = std::min(new_local_min.at(new_key_a), case2_cost);
             }
@@ -806,7 +801,7 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
             }
           }
           for (auto b2 : added_2b) {
-            size_t new_key_b = pair_key(b2->above->sample_ID, p.traceback_b->site);
+            std::size_t new_key_b = pair_key(b2->above->sample_ID, p.traceback_b->site);
             if (new_local_min.count(new_key_b)) {
               new_local_min[new_key_b] = std::min(new_local_min.at(new_key_b), case2_cost);
             }
@@ -867,7 +862,7 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
           local_min[key_a] = std::min(local_min.at(key_a), case3_cost);
           local_min[key_b] = std::min(local_min.at(key_b), case3_cost);
           for (auto a3 : added_3a) {
-            size_t new_key_a = pair_key(a3->above->sample_ID, p.traceback_a->site);
+            std::size_t new_key_a = pair_key(a3->above->sample_ID, p.traceback_a->site);
             if (new_local_min.count(new_key_a)) {
               new_local_min[new_key_a] = std::min(new_local_min.at(new_key_a), case3_cost);
             }
@@ -876,7 +871,7 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
             }
           }
           for (auto b3 : added_3b) {
-            size_t new_key_b = pair_key(b3->above->sample_ID, p.traceback_b->site);
+            std::size_t new_key_b = pair_key(b3->above->sample_ID, p.traceback_b->site);
             if (new_local_min.count(new_key_b)) {
               new_local_min[new_key_b] = std::min(new_local_min.at(new_key_b), case3_cost);
             }
@@ -896,12 +891,12 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
     }
 
     // SINGLE RECOMBINATION EVENTS
-    std::unordered_set<size_t> already_recombined;
+    std::unordered_set<std::size_t> already_recombined;
     std::vector<StatePair> rec_pairs;
 
     for (StatePair& p : new_pairs) {
-      size_t key_a = pair_key(p.below_a->above->sample_ID, p.traceback_a->site);
-      size_t key_b = pair_key(p.below_b->above->sample_ID, p.traceback_b->site);
+      std::size_t key_a = pair_key(p.below_a->above->sample_ID, p.traceback_a->site);
+      std::size_t key_b = pair_key(p.below_b->above->sample_ID, p.traceback_b->site);
       double recombinant_score = p.score - rho_c[i] + rho[i];
       if (!already_recombined.count(key_a) &&
           std::abs(new_local_min.at(key_a) - p.score) < 0.0001) {
@@ -967,15 +962,10 @@ ThreadsFastLS::fastLS_diploid(const std::vector<int>& genotype) {
           std::pair<TracebackState*, Node*>(min_pair.traceback_b, min_pair.below_b->above)};
 }
 
-/**
- * This is a basic traceback that samples a random haplotype per from all matches per segment and
- * also stores the lowest-numbered match (for compression)
- */
-std::vector<std::tuple<int, std::vector<int>>> ThreadsFastLS::traceback(TracebackState* tb, Node* match,
-                                                                  bool return_all) {
+std::vector<std::tuple<int, std::vector<int>>>
+ThreadsFastLS::traceback(TracebackState* tb, Node* match, bool return_all) {
   std::vector<std::tuple<int, std::vector<int>>> best_path;
   while (tb != nullptr) {
-    int n_matches = 1;
     int segment_start = tb->site;
     int match_id = match->sample_ID;
     std::vector<int> div_states = {match_id};
@@ -990,7 +980,6 @@ std::vector<std::tuple<int, std::vector<int>>> ThreadsFastLS::traceback(Tracebac
         break;
       }
       div_states.push_back(div_node->sample_ID);
-      n_matches += 1;
       if (div_node->sample_ID < min_id) {
         min_id = div_node->sample_ID;
       }
@@ -1000,7 +989,7 @@ std::vector<std::tuple<int, std::vector<int>>> ThreadsFastLS::traceback(Tracebac
       sampled_states = std::vector<int>(div_states.begin(), div_states.end());
     }
     else {
-      std::uniform_int_distribution<> distrib(0, div_states.size() - 1);
+      std::uniform_int_distribution<> distrib(0, static_cast<int>(div_states.size()) - 1);
       sampled_states.reserve(2);
       std::sample(div_states.begin(), div_states.end(), std::back_inserter(sampled_states), 1, rng);
       // Add the min-state as well
@@ -1016,17 +1005,12 @@ std::vector<std::tuple<int, std::vector<int>>> ThreadsFastLS::traceback(Tracebac
   return best_path;
 }
 
-/**
- * Similar to normal traceback, but picks the up-to neighborhood_size best matches and stores their overlap with the
- * input sequence returns a list of a tuple of lists :-P I.e., a list of segments, and each segment
- * is a tuple (sample_IDs, starts, ends) all of equal length <= neighborhood_size
- */
 std::vector<std::tuple<std::vector<int>, std::vector<int>, std::vector<int>>>
-ThreadsFastLS::traceback_impute(std::vector<bool>& genotypes, TracebackState* tb, Node* match, int neighborhood_size) {
+ThreadsFastLS::traceback_impute(std::vector<bool>& genotypes, TracebackState* tb, Node* match,
+                                int neighborhood_size) {
   std::vector<std::tuple<std::vector<int>, std::vector<int>, std::vector<int>>> imputation_path;
   int prev_end = num_sites;
   while (tb != nullptr) {
-    int n_matches = 1;
     int segment_start = tb->site;
     int segment_end = prev_end;
     prev_end = segment_start;
@@ -1045,7 +1029,6 @@ ThreadsFastLS::traceback_impute(std::vector<bool>& genotypes, TracebackState* tb
         break;
       }
       div_states.push_back(div_node->sample_ID);
-      n_matches += 1;
       if (div_node->sample_ID < min_id) {
         min_id = div_node->sample_ID;
       }
@@ -1054,7 +1037,6 @@ ThreadsFastLS::traceback_impute(std::vector<bool>& genotypes, TracebackState* tb
     div_node = match->below;
     while (div_node != nullptr && div_node->divergence <= segment_start) {
       div_states.push_back(div_node->sample_ID);
-      n_matches += 1;
       if (div_node->sample_ID < min_id) {
         min_id = div_node->sample_ID;
       }
@@ -1070,21 +1052,22 @@ ThreadsFastLS::traceback_impute(std::vector<bool>& genotypes, TracebackState* tb
     }
 
     // initialize original index locations
-    std::vector<size_t> idx(overlaps.size());
+    std::vector<std::size_t> idx(overlaps.size());
     std::iota(idx.begin(), idx.end(), 0);
 
     // sort indexes based on comparing values in v
     // using std::stable_sort instead of std::sort
     // to avoid unnecessary index re-orderings
     // when v contains elements of equal values
-    std::stable_sort(idx.begin(), idx.end(), [&overlaps](size_t i1, size_t i2) {
+    std::stable_sort(idx.begin(), idx.end(), [&overlaps](std::size_t i1, std::size_t i2) {
       return overlaps[i1].second - overlaps[i1].first < overlaps[i2].second - overlaps[i2].first;
     });
 
     std::vector<int> segment_starts;
     std::vector<int> sample_ids;
     std::vector<int> segment_ends;
-    for (int j = idx.size() - 1; j >= std::max(0, (int) (idx.size() - neighborhood_size)); j--) {
+    for (int j = static_cast<int>(idx.size()) - 1;
+         j >= std::max(0, static_cast<int>(idx.size()) - neighborhood_size); j--) {
       segment_starts.push_back(segment_start);
       segment_ends.push_back(segment_end);
       sample_ids.push_back(div_states[idx[j]]);
@@ -1098,16 +1081,6 @@ ThreadsFastLS::traceback_impute(std::vector<bool>& genotypes, TracebackState* tb
   return imputation_path;
 }
 
-/**
- * @brief Determine whether state can be extended through panel by appending g.
- *        May alter
- * @param s State at site i
- * @param t_next Node at site i+1
- * @param g Candidate genotype for s at i+1
- * @param i The site index
- * @return true
- * @return false
- */
 bool ThreadsFastLS::extensible_by(State& s, const Node* t_next, const bool g, const int i) {
   int next_above_candidate = t_next->above->sample_ID;
 
@@ -1141,7 +1114,7 @@ std::tuple<std::vector<double>, std::vector<double>> ThreadsFastLS::mutation_pen
   std::vector<double> mu_c(num_sites);
 
   double mean_bp_size =
-      (double) (physical_positions.back() - physical_positions[0]) / (double) num_sites;
+      (physical_positions.back() - physical_positions[0]) / static_cast<double>(num_sites);
 
   // The expected branch length
   const double t = demography.expected_branch_length(num_samples + 1);
@@ -1155,11 +1128,6 @@ std::tuple<std::vector<double>, std::vector<double>> ThreadsFastLS::mutation_pen
   return std::tuple(mu, mu_c);
 }
 
-/**
- * @brief This gives the IMPUTE5 (and Beagle) recombination penalties
- *
- * @return tuple of penalty vectors
- */
 std::tuple<std::vector<double>, std::vector<double>> ThreadsFastLS::mutation_penalties_impute5() {
   std::vector<double> mu(num_sites);
   std::vector<double> mu_c(num_sites);
@@ -1173,11 +1141,6 @@ std::tuple<std::vector<double>, std::vector<double>> ThreadsFastLS::mutation_pen
   return std::tuple(mu, mu_c);
 }
 
-/**
- * @brief This gives the *sparse* recombination penalties
- *
- * @return tuple of penalty vectors
- */
 std::tuple<std::vector<double>, std::vector<double>> ThreadsFastLS::recombination_penalties() {
   // Recall: 1cM means the expected average number of intervening
   // chromosomal crossovers in a single generation is 0.01
@@ -1199,12 +1162,8 @@ std::tuple<std::vector<double>, std::vector<double>> ThreadsFastLS::recombinatio
   return std::tuple(rho, rho_c);
 }
 
-/**
- * @brief This gives the correct, dense, recombination penalties
- *
- * @return tuple of penalty vectors
- */
-std::tuple<std::vector<double>, std::vector<double>> ThreadsFastLS::recombination_penalties_correct() {
+std::tuple<std::vector<double>, std::vector<double>>
+ThreadsFastLS::recombination_penalties_correct() {
   // Recall: 1cM means the expected average number of intervening
   // chromosomal crossovers in a single generation is 0.01
   std::vector<double> rho(num_sites);
@@ -1225,51 +1184,37 @@ std::tuple<std::vector<double>, std::vector<double>> ThreadsFastLS::recombinatio
   return std::tuple(rho, rho_c);
 }
 
-/**
- * @brief Date the segment based on length and n_mismatches using maximum likelihood. (No
- * demography)
- *
- * @param id1
- * @param id2
- * @param start inclusive
- * @param end exclusive
- * @return double
- */
 double ThreadsFastLS::date_segment(const int num_het_sites, const int start, const int end) {
   if (start > end) {
     std::cerr << "Can't date a segment with length <= 0\n";
     exit(1);
   }
-  double m = (double) num_het_sites;
   double bp_size = 0;
   double cm_size = 0;
   for (int i = start; i < end; i++) {
     bp_size += bp_sizes[i];
     cm_size += cm_sizes[i];
   }
-  double mu = 2. * mutation_rate * bp_size;
-  double rho = 2. * 0.01 * cm_size;
   if (sparse_sites) {
-    return ThreadsFastLS::date_segment_sparse(num_het_sites, cm_size, demography);
+    return ThreadsFastLS::date_segment_sparse(cm_size, demography);
   }
   else {
-    return ThreadsFastLS::date_segment(m, cm_size, bp_size, mutation_rate, demography);
+    return ThreadsFastLS::date_segment(num_het_sites, cm_size, bp_size, mutation_rate, demography);
   }
 }
 
 double ThreadsFastLS::date_segment(int num_het_sites, double cm_size, double bp_size,
-                             double mutation_rate, Demography& demography) {
+                                   double mutation_rate, Demography& demography) {
   int m = num_het_sites;
   double mu = 2. * mutation_rate * bp_size;
   double rho = 2. * 0.01 * cm_size;
   if (m > 15) {
-    // cout << "Warning: very many heterozygous sites, defaulting to const-demography method.\n";
     double gamma = 1. / demography.expected_time;
     return (m + 2) / (gamma + rho + mu);
   }
   double numerator = 0;
   double denominator = 0;
-  int K = demography.times.size();
+  int K = static_cast<int>(demography.times.size());
   for (int k = 0; k < K; k++) {
     double T1 = demography.times[k];
     double gamma_k = 1. / demography.sizes[k];
@@ -1296,12 +1241,12 @@ double ThreadsFastLS::date_segment(int num_het_sites, double cm_size, double bp_
   return numerator / denominator;
 }
 
-double ThreadsFastLS::date_segment_sparse(int num_het_sites, double cm_size, Demography& demography) {
-  int m = num_het_sites;
+double ThreadsFastLS::date_segment_sparse(double cm_size, Demography& demography) {
+
   double rho = 2. * 0.01 * cm_size;
   double numerator = 0;
   double denominator = 0;
-  int K = demography.times.size();
+  int K = static_cast<int>(demography.times.size());
   for (int k = 0; k < K; k++) {
     double T1 = demography.times[k];
     double gamma_k = 1. / demography.sizes[k];
@@ -1364,11 +1309,11 @@ ThreadsFastLS::thread(const int new_sample_ID, const std::vector<bool>& genotype
   std::vector<int> bp_starts;
   std::vector<std::vector<int>> target_IDs;
   std::vector<double> segment_ages;
-  int total_num_het_sites = 0;
   // Date segments
-  for (int i = 0; i < best_path.size(); i++) {
+  for (int i = 0; i < static_cast<int>(best_path.size()); i++) {
     int segment_start = std::get<0>(best_path[i]);
-    int segment_end = (i == best_path.size() - 1) ? num_sites : std::get<0>(best_path[i + 1]);
+    int segment_end =
+        (i == (static_cast<int>(best_path.size()) - 1)) ? num_sites : std::get<0>(best_path[i + 1]);
     std::vector<int> target_ID_L = std::get<1>(best_path[i]);
 
     std::vector<bool> het_hom_sites =
@@ -1380,19 +1325,20 @@ ThreadsFastLS::thread(const int new_sample_ID, const std::vector<bool>& genotype
         num_het_sites++;
       }
     }
-    total_num_het_sites += num_het_sites;
-    // is it ok to have 100 here?
-    if (use_hmm && num_samples < 1000) {
+
+    if (use_hmm && num_samples < HMM_SPLIT_THRESHOLD) {
       // is it ok to have 10 here?
       if (num_het_sites > 5) {
         std::vector<int> breakpoints = hmm->breakpoints(het_hom_sites, segment_start);
 
-        for (int i = 0; i < breakpoints.size(); i++) {
-          int breakpoint_start = breakpoints[i];
-          int breakpoint_end = (i == breakpoints.size() - 1) ? segment_end : breakpoints[i + 1];
+        for (int j = 0; j < static_cast<int>(breakpoints.size()); j++) {
+          int breakpoint_start = breakpoints[j];
+          int breakpoint_end =
+              (j == (static_cast<int>(breakpoints.size()) - 1)) ? segment_end : breakpoints[j + 1];
           target_IDs.push_back(target_ID_L);
           bp_starts.push_back(static_cast<int>(ceil(bp_boundaries[breakpoint_start])));
-          // This is wrong!!! need to actually re-do num het_sites!!!!
+          // TODO Pass right number of heterozygous sites to date segments when HMM is used (ticket
+          // #24)
           segment_ages.push_back(date_segment(num_het_sites, breakpoint_start, breakpoint_end));
         }
       }
@@ -1414,7 +1360,8 @@ ThreadsFastLS::thread(const int new_sample_ID, const std::vector<bool>& genotype
   return remove_burn_in(bp_starts, target_IDs, segment_ages, het_sites);
 }
 
-std::vector<ImputationSegment> ThreadsFastLS::impute(std::vector<bool>& genotype, int neighborhood_size) {
+std::vector<ImputationSegment> ThreadsFastLS::impute(std::vector<bool>& genotype,
+                                                     int neighborhood_size) {
   // vector of sample_ids, seg_starts, seg_ends (buffered)
   std::vector<std::tuple<std::vector<int>, std::vector<int>, std::vector<int>>> best_path;
   Node* match;
@@ -1434,7 +1381,7 @@ std::vector<ImputationSegment> ThreadsFastLS::impute(std::vector<bool>& genotype
     seg_ages.push_back(date_segment(0, seg_start, seg_end));
   }
 
-  int num_segs = best_path.size();
+  int num_segs = static_cast<int>(best_path.size());
   std::vector<ImputationSegment> imputation_segments;
 
   // Special case for the first segment
@@ -1443,15 +1390,12 @@ std::vector<ImputationSegment> ThreadsFastLS::impute(std::vector<bool>& genotype
     const std::tuple<std::vector<int>, std::vector<int>, std::vector<int>>& segment = best_path[i];
     const std::vector<int>& samples = std::get<0>(segment);
     ImputationSegment imp_seg;
-    imp_seg.seg_start = physical_positions[std::get<1>(segment)[0]];
+    imp_seg.seg_start = static_cast<int>(physical_positions[std::get<1>(segment)[0]]);
     imp_seg.ids = samples;
-    std::vector<double> wts;
+    std::vector<double> weights(samples.size(), 1. / static_cast<double>(samples.size()));
     std::vector<double> ages(samples.size(), seg_ages[i]);
     imp_seg.ages = ages;
-    for (auto s : samples) {
-      wts.push_back(1. / samples.size());
-    }
-    imp_seg.weights = wts;
+    imp_seg.weights = weights;
     imputation_segments.push_back(imp_seg);
   }
   return imputation_segments;
@@ -1470,9 +1414,10 @@ ThreadsFastLS::diploid_ls(std::vector<int> unphased_genotypes) {
 }
 
 std::tuple<std::vector<int>, std::vector<std::vector<int>>, std::vector<double>, std::vector<int>>
-ThreadsFastLS::remove_burn_in(std::vector<int>& bp_starts, std::vector<std::vector<int>>& target_IDs,
-                        std::vector<double>& segment_ages, std::vector<int>& het_sites) {
-  int num_segments = bp_starts.size();
+ThreadsFastLS::remove_burn_in(std::vector<int>& bp_starts,
+                              std::vector<std::vector<int>>& target_IDs,
+                              std::vector<double>& segment_ages, std::vector<int>& het_sites) {
+  int num_segments = static_cast<int>(bp_starts.size());
 
   std::vector<int> trim_starts;
   std::vector<std::vector<int>> trim_IDs;
@@ -1482,7 +1427,7 @@ ThreadsFastLS::remove_burn_in(std::vector<int>& bp_starts, std::vector<std::vect
     // Keep segments that start on or before threading_start
     int seg_start_i = 0;
     for (int i = 0; i < num_segments; i++) {
-      int seg_end = i == num_segments - 1 ? threading_end : bp_starts[i + 1];
+      int seg_end = i == num_segments - 1 ? static_cast<int>(threading_end) : bp_starts[i + 1];
       if (threading_start < seg_end) {
         break;
       }
@@ -1500,8 +1445,9 @@ ThreadsFastLS::remove_burn_in(std::vector<int>& bp_starts, std::vector<std::vect
         seg_end_i--;
       }
     }
+
     trim_starts = {bp_starts.begin() + seg_start_i, bp_starts.begin() + seg_end_i};
-    trim_starts[0] = threading_start;
+    trim_starts[0] = static_cast<int>(threading_start);
     trim_IDs = {target_IDs.begin() + seg_start_i, target_IDs.begin() + seg_end_i};
     trim_ages = {segment_ages.begin() + seg_start_i, segment_ages.begin() + seg_end_i};
   }
@@ -1515,16 +1461,8 @@ ThreadsFastLS::remove_burn_in(std::vector<int>& bp_starts, std::vector<std::vect
   return std::tie(trim_starts, trim_IDs, trim_ages, trim_hets);
 }
 
-/**
- * @brief
- * @param id1
- * @param id2
- * @param start inclusive!
- * @param end exclusive!
- * @return bool whether sequences match on the interval
- */
 bool ThreadsFastLS::genotype_interval_match(const int id1, const int id2, const int start,
-                                      const int end) {
+                                            const int end) {
   if (id1 == id2) {
     return true;
   }
@@ -1539,13 +1477,9 @@ bool ThreadsFastLS::genotype_interval_match(const int id1, const int id2, const
   return true;
 }
 
-/**
- * Assuming input genotypes match sample_id on [segment_start, segment_end), how much can we extend
- * the region in either direction with out hitting a mismatch
- */
 std::pair<int, int> ThreadsFastLS::overflow_region(const std::vector<bool>& genotypes,
-                                             const int sample_id, const int segment_start,
-                                             const int segment_end) {
+                                                   const int sample_id, const int segment_start,
+                                                   const int segment_end) {
   int overlap_start = segment_start;
   int overlap_end = segment_end;
 
@@ -1569,11 +1503,8 @@ std::pair<int, int> ThreadsFastLS::overflow_region(const std::vector<bool>& geno
   return std::pair<int, int>(overlap_start, overlap_end);
 }
 
-/**
- * Fetch het-hom status for id1 and id2 in the region specified by site indices
- */
 std::vector<bool> ThreadsFastLS::fetch_het_hom_sites(const int id1, const int id2, const int start,
-                                               const int end) {
+                                                     const int end) {
   if (ID_map.find(id1) == ID_map.end()) {
     std::cerr << "fetch_het_hom_sites bad id1 " << id1 << std::endl;
     exit(1);
@@ -1593,22 +1524,22 @@ std::vector<bool> ThreadsFastLS::fetch_het_hom_sites(const int id1, const int id
 }
 
 // Given threading instructions, find all heterozygous sites
-std::vector<int> ThreadsFastLS::het_sites_from_thread(const int focal_ID,
-                                                const std::vector<int> bp_starts,
-                                                const std::vector<std::vector<int>> target_IDs) {
+std::vector<int>
+ThreadsFastLS::het_sites_from_thread(const int focal_ID, const std::vector<int> bp_starts,
+                                     const std::vector<std::vector<int>> target_IDs) {
   std::vector<int> het_sites;
-  int num_segments = bp_starts.size();
+  int num_segments = static_cast<int>(bp_starts.size());
   int site_i = 0;
   for (int seg_i = 0; seg_i < num_segments; seg_i++) {
     int segment_start = bp_starts[seg_i];
-    int segment_end =
-        seg_i == num_segments - 1 ? physical_positions.back() + 1 : bp_starts[seg_i + 1];
+    int segment_end = seg_i == num_segments - 1 ? (static_cast<int>(physical_positions.back()) + 1)
+                                                : bp_starts[seg_i + 1];
     int target_ID = target_IDs[seg_i][0];
     while (segment_start <= physical_positions[site_i] &&
            physical_positions[site_i] < segment_end && site_i < num_sites) {
       if (panel[ID_map.at(focal_ID)][site_i]->genotype !=
           panel[ID_map.at(target_ID)][site_i]->genotype) {
-        het_sites.push_back(physical_positions[site_i]);
+        het_sites.push_back(static_cast<int>(physical_positions[site_i]));
       }
       site_i++;
     }
diff --git a/src/ThreadsFastLS.hpp b/src/ThreadsFastLS.hpp
index 0193c30..81a7ce5 100644
--- a/src/ThreadsFastLS.hpp
+++ b/src/ThreadsFastLS.hpp
@@ -1,8 +1,24 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_THREADS_HPP
 #define THREADS_ARG_THREADS_HPP
 
-#include "State.hpp"
 #include "HMM.hpp"
+#include "State.hpp"
 
 #include <memory>
 #include <random>
@@ -12,90 +28,77 @@
 struct ImputationSegment {
   int seg_start = 0;
   std::vector<int> ids;
+
   // these are at the *start* of the segment
   std::vector<double> weights;
   std::vector<double> ages;
 };
 
-// This class contains everything for Threads-fastLS
+/// This class contains everything for Threads-fastLS
 class ThreadsFastLS {
-
-private:
-  // To access the linked lists in each column
-  std::vector<std::unique_ptr<Node>> tops;
-  std::vector<std::unique_ptr<Node>> bottoms;  // Ha, ha
-
-  std::vector<std::unique_ptr<TracebackState>> traceback_states;
-
-  // Burn-in quantities
-  int trim_pos_start_idx = 0;
-  int trim_pos_end_idx = 0;
-
-  // TODO set random seed at runtime (ticket #20)
-  std::mt19937 rng{1234};
-
-  inline size_t pair_key(int i, int j) {
-    return (size_t) i << 32 | (unsigned int) j;
-  }
+public:
+  // Constructors and utils
+  ThreadsFastLS(std::vector<double> _physical_positions, std::vector<double> _genetic_positions,
+                double _mutation_rate, double ne, bool _sparse_sites, int _n_prune, bool _use_hmm,
+                int _burn_in_left, int _burn_in_right)
+      : ThreadsFastLS(_physical_positions, _genetic_positions, _mutation_rate,
+                      std::vector<double>{ne}, std::vector<double>{0.0}, _sparse_sites, _n_prune,
+                      _use_hmm, _burn_in_left, _burn_in_right){};
+  ThreadsFastLS(std::vector<double> _physical_positions, std::vector<double> _genetic_positions,
+                double _mutation_rate, std::vector<double> ne, std::vector<double> ne_times,
+                bool _sparse_sites, int _n_prune, bool _use_hmm, int _burn_in_left,
+                int _burn_in_right);
+
+  /// Find the next insert position
+  /// @param t Pointer to node below the sequence being inserted at site i
+  /// @param g Allele at site i+1
+  /// @param i The site
+  /// @return Node* Pointer to node below the sequence being inserted at site i+1
   Node* extend_node(Node* node, bool genotype, const int i);
+
+  /// Determine whether state can be extended through panel by appending genotype 'g'
+  /// @param s State at site i
+  /// @param t_next Node at site i+1
+  /// @param g Candidate genotype for s at i+1
+  /// @param i The site index
+  /// @return bool true if extensible
   bool extensible_by(State& s, const Node* t_next, const bool g, const int i);
+
   std::pair<bool, bool> pair_extensible_by(StatePair& p, const bool g, const int i);
+
+  /// @param id1
+  /// @param id2
+  /// @param start inclusive!
+  /// @param end exclusive!
+  /// @return bool whether sequences match on the interval
   bool genotype_interval_match(const int id1, const int id2, const int start, const int end);
+
+  /// Assuming input genotypes match sample_id on [segment_start, segment_end), how much can we
+  /// extend the region in either direction with out hitting a mismatch
   std::pair<int, int> overflow_region(const std::vector<bool>& genotypes, const int sample_id,
                                       const int segment_start, const int segment_end);
-  std::vector<bool> fetch_het_hom_sites(const int id1, const int id2, const int start, const int end);
-  std::vector<int> het_sites_from_thread(const int focal_ID, std::vector<int> bp_starts,
-                                         std::vector<std::vector<int>> target_IDs);
 
-public:
-  int n_prune;
-  int num_sites;
-  int num_samples;
-  double mutation_rate;
-  int burn_in_left;
-  int burn_in_right;
-  double threading_start;
-  double threading_end;
-  // This determines the segment dating formula
-  bool sparse_sites;
-  // Whether to internally break up big segments
-  bool use_hmm;
-  std::unordered_map<int, int> ID_map;
-  std::vector<double> physical_positions;
-  std::vector<double> genetic_positions;
-  std::vector<double> bp_sizes;
-  std::vector<double> cm_sizes;
-  std::vector<double> bp_boundaries;
-  std::vector<double> cm_boundaries;
-  Demography demography;
-  HMM* hmm;
+  /// Fetch het-hom status for id1 and id2 in the region specified by site indices
+  std::vector<bool> fetch_het_hom_sites(const int id1, const int id2, const int start,
+                                        const int end);
 
-  // The dynamic reference panel
-  std::vector<std::vector<std::unique_ptr<Node>>> panel;
-
-  // Constructors and utils
-  ThreadsFastLS(std::vector<double> _physical_positions, std::vector<double> _genetic_positions,
-          double _mutation_rate, double ne, bool _sparse_sites, int _n_prune, bool _use_hmm,
-          int _burn_in_left, int _burn_in_right)
-      : ThreadsFastLS(_physical_positions, _genetic_positions, _mutation_rate, std::vector<double>{ne},
-                std::vector<double>{0.0}, _sparse_sites, _n_prune, _use_hmm, _burn_in_left,
-                _burn_in_right){};
-  ThreadsFastLS(std::vector<double> _physical_positions, std::vector<double> _genetic_positions,
-          double _mutation_rate, std::vector<double> ne, std::vector<double> ne_times,
-          bool _sparse_sites, int _n_prune, bool _use_hmm, int _burn_in_left, int _burn_in_right);
+  std::vector<int> het_sites_from_thread(const int focal_ID, std::vector<int> bp_starts,
+                                         std::vector<std::vector<int>> target_IDs);
 
   static std::tuple<std::vector<double>, std::vector<double>>
   site_sizes(std::vector<double> positions);
 
   // More attributes
-  std::vector<double> trimmed_positions();
+  std::vector<double> trimmed_positions() const;
 
-  // Insertion/deletion
-  // Insert and assign generic ID
+  /// Insert and assign generic ID
   void insert(const std::vector<bool>& genotype);
-  // Insert and assign specific ID
+
+  /// Insert a new sequence into the dynamic panel and assign specific ID
   void insert(const int ID, const std::vector<bool>& genotype);
-  // Remove sample from panel
+
+  /// Deletes sequence ID from the dynamic panel. This moves the last sequence in the panel
+  /// to the position ID held. See alg 5 from d-PBWT paper.
   void remove(int ID);
 
   // HMM
@@ -112,27 +115,102 @@ class ThreadsFastLS {
   std::vector<std::tuple<int, std::vector<int>>> threads_ls(const std::vector<bool>& genotype);
 
   std::vector<ImputationSegment> impute(std::vector<bool>& genotype, int neighborhood_size);
+
+  /// Run Li-Stephens on input haplotype *without* inserting into the dynamic panel.
+  /// See also Algorithm 4 of Lunter (2018), Bioinformatics.
+  /// For imputation we use the IMPUTE/Beagle mutation penalties
   std::pair<TracebackState*, Node*> fastLS(const std::vector<bool>& genotype,
                                            bool imputation = false);
+
+  /// This is a basic traceback that samples a random haplotype per from all matches per segment and
+  /// also stores the lowest-numbered match (for compression)
   std::vector<std::tuple<int, std::vector<int>>> traceback(TracebackState* tb, Node* match,
                                                            bool return_all = false);
+
+  /// Similar to normal traceback, but picks the up-to neighborhood_size best matches and stores
+  /// their overlap with the input sequence returns a list of a tuple of lists :-P I.e., a list of
+  /// segments, and each segment is a tuple (sample_IDs, starts, ends) all of equal length <=
+  /// neighborhood_size
   std::vector<std::tuple<std::vector<int>, std::vector<int>, std::vector<int>>>
-  traceback_impute(std::vector<bool>& genotypes, TracebackState* tb, Node* match, int neighborhood_size);
+  traceback_impute(std::vector<bool>& genotypes, TracebackState* tb, Node* match,
+                   int neighborhood_size);
+
+  /// Run Li-Stephens on input diplotype *without* inserting into the dynamic panel.
+  /// See also Algorithm 4 of Lunter (2018), Bioinformatics.
+  /// Warning: The code here is more verbose than it has to be
   std::array<std::pair<TracebackState*, Node*>, 2> fastLS_diploid(const std::vector<int>& genotype);
+
   std::array<std::vector<std::tuple<int, std::vector<int>>>, 2>
   diploid_ls(std::vector<int> unphased_genotypes);
+
+  /// This gives the IMPUTE5 (and Beagle) recombination penalties
+  /// @return tuple of penalty vectors
   std::tuple<std::vector<double>, std::vector<double>> mutation_penalties_impute5();
+
+  /// This gives the *sparse* recombination penalties
+  /// @return tuple of penalty vectors
   std::tuple<std::vector<double>, std::vector<double>> recombination_penalties();
   std::tuple<std::vector<double>, std::vector<double>> mutation_penalties();
+
+  /// This gives the correct, dense, recombination penalties
+  /// @return tuple of penalty vectors
   std::tuple<std::vector<double>, std::vector<double>> recombination_penalties_correct();
 
   // TMRCA estimation
+  /// Date the segment based on length and n_mismatches using maximum likelihood. (No
+  /// demography)
+  /// @param id1
+  /// @param id2
+  /// @param start inclusive
+  /// @param end exclusive
+  /// @return double
   double date_segment(const int num_het_sites, const int start, const int end);
+
   static double date_segment(int num_het_sites, double cm_length, double bp_length,
                              double mutation_rate, Demography& demography);
-  static double date_segment_sparse(int num_het_sites, double cm_length, Demography& demography);
-  // Debugging
-  void print_sorting();
+  static double date_segment_sparse(double cm_length, Demography& demography);
+
+  /// For debugging: print the sample-IDs of the arrayified panel
+  void print_sorting() const;
+
+public:
+  int n_prune = 0;
+  int num_sites = 0;
+  int num_samples = 0;
+  double mutation_rate = 0.0;
+  int burn_in_left = 0;
+  int burn_in_right = 0;
+  double threading_start = 0.0;
+  double threading_end = 0.0;
+
+  bool sparse_sites = false; ///< This determines the segment dating formula
+  bool use_hmm = false;      ///< Whether to internally break up big segments
+
+  std::unordered_map<int, int> ID_map;
+  std::vector<double> physical_positions;
+  std::vector<double> genetic_positions;
+  std::vector<double> bp_sizes;
+  std::vector<double> cm_sizes;
+  std::vector<double> bp_boundaries;
+  std::vector<double> cm_boundaries;
+  Demography demography;
+  HMM* hmm = nullptr;
+
+  // The dynamic reference panel
+  std::vector<std::vector<std::unique_ptr<Node>>> panel;
+
+private:
+  // To access the linked lists in each column
+  std::vector<std::unique_ptr<Node>> tops;
+  std::vector<std::unique_ptr<Node>> bottoms;
+  std::vector<std::unique_ptr<TracebackState>> traceback_states;
+
+  // Burn-in quantities
+  int trim_pos_start_idx = 0;
+  int trim_pos_end_idx = 0;
+
+  // TODO set random seed at runtime (ticket #20)
+  std::mt19937 rng{1234};
 };
 
 #endif // THREADS_ARG_THREADS_HPP
diff --git a/src/ThreadsLowMem.cpp b/src/ThreadsLowMem.cpp
index f40bbf8..c7f5c67 100644
--- a/src/ThreadsLowMem.cpp
+++ b/src/ThreadsLowMem.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "ThreadsLowMem.hpp"
 
 #include <algorithm>
@@ -8,15 +24,14 @@
 #include <unordered_set>
 #include <vector>
 
-
 ThreadsLowMem::ThreadsLowMem(const std::vector<int> _target_ids,
                              const std::vector<double>& _physical_positions,
                              const std::vector<double>& _genetic_positions, std::vector<double> ne,
                              std::vector<double> ne_times, double _mutation_rate, bool _sparse)
-    : target_ids(_target_ids), physical_positions(_physical_positions),
-      genetic_positions(_genetic_positions), demography(Demography(ne, ne_times)),
-      mutation_rate(_mutation_rate), sparse(_sparse) {
-  num_samples = target_ids.size();
+    : target_ids(_target_ids), mutation_rate(_mutation_rate),
+      physical_positions(_physical_positions), genetic_positions(_genetic_positions),
+      sparse(_sparse), demography(Demography(ne, ne_times)) {
+  num_samples = static_cast<int>(target_ids.size());
   if (physical_positions.size() != genetic_positions.size()) {
     throw std::runtime_error("Map lengths don't match.");
   }
@@ -24,7 +39,7 @@ ThreadsLowMem::ThreadsLowMem(const std::vector<int> _target_ids,
     throw std::runtime_error("Need at least 3 sites, found " +
                              std::to_string(physical_positions.size()));
   }
-  num_sites = physical_positions.size();
+  num_sites = static_cast<int>(physical_positions.size());
 
   // Check maps are strictly increasing
   for (int i = 0; i < num_sites - 1; i++) {
@@ -49,7 +64,8 @@ ThreadsLowMem::ThreadsLowMem(const std::vector<int> _target_ids,
   }
 
   // Mean interval size in base-pairs
-  mean_bp_size = (double) (physical_positions.back() - physical_positions[0]) / (num_sites - 1);
+  mean_bp_size =
+      (physical_positions.back() - physical_positions[0]) / static_cast<double>(num_sites - 1);
   for (int target_id : target_ids) {
     segment_indices[target_id] = 0;
     expected_branch_lengths[target_id] = demography.expected_branch_length(target_id + 1);
@@ -60,7 +76,7 @@ ThreadsLowMem::ThreadsLowMem(const std::vector<int> _target_ids,
   het_sites_processed = 0;
   std::tie(bp_boundaries, bp_sizes) = ThreadsFastLS::site_sizes(physical_positions);
 
-  for (int i = 0; i < genetic_positions.size(); i++) {
+  for (std::size_t i = 0; i < genetic_positions.size(); i++) {
     if (i == genetic_positions.size() - 1) {
       // TO allow for closely spaced markers
       cm_sizes.push_back(0.0000001);
@@ -87,7 +103,7 @@ void ThreadsLowMem::initialize_viterbi(std::vector<std::vector<std::unordered_se
     throw std::runtime_error("Match-data is missing or does not have same shape as genetic map");
   }
   match_groups.reserve(match_ids.size());
-  for (int i = 0; i < match_ids.size(); i++) {
+  for (std::size_t i = 0; i < match_ids.size(); i++) {
     match_groups.emplace_back(target_ids, match_ids.at(i), cm_positions.at(i));
   }
 
@@ -107,9 +123,9 @@ void ThreadsLowMem::initialize_viterbi(std::vector<std::vector<std::unordered_se
 void ThreadsLowMem::process_site_viterbi(const std::vector<int>& genotype) {
   bool group_change = false;
 
-  if (match_group_idx < match_groups.size() - 1 &&
-      genetic_positions.at(hmm_sites_processed) >=
-          match_groups.at(match_group_idx + 1).cm_position) {
+  if (match_group_idx < (static_cast<int>(match_groups.size()) - 1) &&
+      (genetic_positions.at(hmm_sites_processed) >=
+       match_groups.at(match_group_idx + 1).cm_position)) {
     match_group_idx++;
     group_change = true;
   }
@@ -150,7 +166,6 @@ void ThreadsLowMem::traceback() {
 }
 
 void ThreadsLowMem::process_site_hets(const std::vector<int>& genotype) {
-
   for (int target_id : target_ids) {
     if (target_id == 0) {
       if (genotype.at(0) == 1) {
@@ -160,8 +175,8 @@ void ThreadsLowMem::process_site_hets(const std::vector<int>& genotype) {
     else {
       ViterbiPath& path = paths.at(target_id);
       int current_seg_idx = segment_indices.at(target_id);
-      while (current_seg_idx < path.segment_starts.size() - 1 &&
-             het_sites_processed >= path.segment_starts.at(current_seg_idx + 1)) {
+      while (current_seg_idx < (static_cast<int>(path.segment_starts.size()) - 1) &&
+             (het_sites_processed >= path.segment_starts.at(current_seg_idx + 1))) {
         current_seg_idx++;
       }
       segment_indices.at(target_id) = current_seg_idx;
@@ -184,7 +199,7 @@ void ThreadsLowMem::date_segments() {
     throw std::runtime_error(
         "Can't date segments, not all sites have been parsed for heterozygosity.");
   }
-  // for (int i = 1; i < num_samples; i++) {
+
   for (int target_id : target_ids) {
     if (target_id == 0) {
       continue;
@@ -198,54 +213,41 @@ void ThreadsLowMem::date_segments() {
     }
   }
 
-  // I get segfault on this?
-
-  // for (int i = 1; i < num_samples; i++) {
   for (int target_id : target_ids) {
     if (target_id == 0) {
       continue;
     }
-    // cout << "dating sample " << i << endl;
-    // compute length in bp/cM and num_hets for each segment
     ViterbiPath& path = paths.at(target_id);
     ViterbiPath new_path(target_id);
-    int n_segs = path.segment_starts.size();
-    for (int k = 0; k < n_segs; k++) {
+    std::size_t n_segs = path.segment_starts.size();
+    for (std::size_t k = 0; k < n_segs; k++) {
       int sample_id = path.sample_ids.at(k);
       int segment_start = path.segment_starts.at(k);
       int segment_end = k < n_segs - 1 ? path.segment_starts.at(k + 1) : num_sites - 1;
-      // cout << "dating segment [" << segment_start << ", " << segment_end << ") no. " << k << "
-      // out of " << n_segs - 1 << endl;
       if (segment_end == segment_start) {
         continue;
       }
-      double bp_size = physical_positions.at(segment_end) - physical_positions.at(segment_start);
-      double cm_size = genetic_positions.at(segment_end) - genetic_positions.at(segment_start);
 
       // This is inefficient but probably not that bad
       std::vector<int> segment_hets;
       for (int h : path.het_sites) {
-        if (segment_start <= h && h < segment_end ||
-            h == num_sites - 1 && segment_end == num_sites - 1) {
+        if (((segment_start <= h) && (h < segment_end)) ||
+            ((h == num_sites - 1) && (segment_end == num_sites - 1))) {
           segment_hets.push_back(h);
         }
         else if (h >= segment_end) {
           break;
         }
       }
-      // cout << "found bpsize: " << bp_size << " cmsize " << cm_size << " nhets " <<
-      // segment_hets.size() << " sample " << sample_id << endl; disabling hmm for now
-      if (target_id < n_hmm_samples && segment_hets.size() > hmm_min_sites) {
-        // cout << "running hmm on segment [" << segment_start << ", " << segment_end << ")... ";
+
+      if ((target_id < n_hmm_samples) && (static_cast<int>(segment_hets.size()) > hmm_min_sites)) {
         std::vector<bool> het_hom_sites(segment_end - segment_start, false);
         for (int h : segment_hets) {
-          // cout << h << endl;
           het_hom_sites[h - segment_start] = true;
         }
         // Here we use the hmm to break the big segment up into smaller segments
         std::vector<int> breakpoints = psmc.breakpoints(het_hom_sites, segment_start);
-        // cout << "found " << breakpoints.size() << " breakpoints" << endl;
-        for (int j = 0; j < breakpoints.size(); j++) {
+        for (std::size_t j = 0; j < breakpoints.size(); j++) {
           int breakpoint_start = breakpoints[j];
           int breakpoint_end = (j == breakpoints.size() - 1) ? segment_end : breakpoints[j + 1];
           // there may be off-by-one errors here on the last segment (but who cares?)
@@ -257,16 +259,16 @@ void ThreadsLowMem::date_segments() {
           // Same as above
           std::vector<int> breakpoint_hets;
           for (int h : segment_hets) {
-            if (breakpoint_start <= h && h < breakpoint_end ||
-                h == num_sites - 1 && breakpoint_end == num_sites - 1) {
+            if (((breakpoint_start <= h) && (h < breakpoint_end)) ||
+                ((h == num_sites - 1) && (breakpoint_end == num_sites - 1))) {
               breakpoint_hets.push_back(h);
             }
             else if (h >= breakpoint_end) {
               break;
             }
           }
-          double height = ThreadsFastLS::date_segment(
-              breakpoint_hets.size(), cm_size, bp_size, mutation_rate, demography);
+          double height = ThreadsFastLS::date_segment(static_cast<int>(breakpoint_hets.size()),
+                                                      cm_size, bp_size, mutation_rate, demography);
           new_path.append(breakpoint_start, sample_id, height, breakpoint_hets);
         }
       }
@@ -274,8 +276,8 @@ void ThreadsLowMem::date_segments() {
         // there are off-by-one errors here on the last segment (but who cares?)
         double bp_size = physical_positions.at(segment_end) - physical_positions.at(segment_start);
         double cm_size = genetic_positions.at(segment_end) - genetic_positions.at(segment_start);
-        double height =
-            ThreadsFastLS::date_segment(segment_hets.size(), cm_size, bp_size, mutation_rate, demography);
+        double height = ThreadsFastLS::date_segment(
+            static_cast<int>(segment_hets.size()), cm_size, bp_size, mutation_rate, demography);
         new_path.append(segment_start, sample_id, height, segment_hets);
       }
     }
@@ -284,9 +286,8 @@ void ThreadsLowMem::date_segments() {
   return;
 }
 
-int ThreadsLowMem::count_branches() {
+int ThreadsLowMem::count_branches() const {
   int n_branches = 0;
-  // for (int i = 1; i < num_samples; i++) {
   for (int target_id : target_ids) {
     if (target_id == 0) {
       continue;
@@ -297,7 +298,6 @@ int ThreadsLowMem::count_branches() {
 }
 
 void ThreadsLowMem::prune() {
-  // for (int i = 1; i < num_samples; i++) {
   for (int target_id : target_ids) {
     if (target_id == 0) {
       continue;
diff --git a/src/ThreadsLowMem.hpp b/src/ThreadsLowMem.hpp
index 222fa7c..182d694 100644
--- a/src/ThreadsLowMem.hpp
+++ b/src/ThreadsLowMem.hpp
@@ -1,52 +1,33 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_THREADS_LOW_MEM_HPP
 #define THREADS_ARG_THREADS_LOW_MEM_HPP
 
+#include "Demography.hpp"
 #include "Matcher.hpp"
 #include "ThreadsFastLS.hpp"
 #include "ViterbiLowMem.hpp"
-#include "Demography.hpp"
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
 class ThreadsLowMem {
-
-private:
-  Demography demography;
-
-  // 2. HMM quantites
-  int hmm_sites_processed;
-  std::unordered_map<int, ViterbiState> hmms;
-  int match_group_idx;
-  std::vector<MatchGroup> match_groups;
-
-  // 3. Path segment and path dating quantites
-  HMM psmc;
-  // std::vector<double> cm_sizes;
-  // std::vector<double> bp_sizes;
-  int het_sites_processed;
-  int n_hmm_samples = 100;
-  int hmm_min_sites = 10;
-
 public:
-  // This object will only run the HMM for these ids
-  std::vector<int> target_ids;
-  std::unordered_map<int, double> expected_branch_lengths;
-  double mean_bp_size;
-  std::unordered_map<int, int> segment_indices;
-  std::unordered_map<int, ViterbiPath> paths;
-  int num_samples;
-  int num_sites;
-  double mutation_rate;
-  std::vector<double> physical_positions;
-  std::vector<double> genetic_positions;
-  std::vector<double> bp_sizes;
-  std::vector<double> cm_sizes;
-  std::vector<double> bp_boundaries;
-  std::vector<double> cm_boundaries;
-  bool sparse;
-
   ThreadsLowMem(const std::vector<int> _target_ids, const std::vector<double>& _physical_positions,
                 const std::vector<double>& _genetic_positions, std::vector<double> ne,
                 std::vector<double> ne_times, double _mutation_rate, bool _sparse);
@@ -72,12 +53,45 @@ class ThreadsLowMem {
 
   // 4. save output (done on Python side)
 
-  int count_branches();
+  int count_branches() const;
 
   // Make pickle-able path output
   std::tuple<std::vector<std::vector<int>>, std::vector<std::vector<int>>,
              std::vector<std::vector<double>>, std::vector<std::vector<int>>>
   serialize_paths();
+
+public:
+  // This object will only run the HMM for these ids
+  std::vector<int> target_ids;
+  std::unordered_map<int, double> expected_branch_lengths;
+  double mean_bp_size = 0.0;
+  std::unordered_map<int, int> segment_indices;
+  std::unordered_map<int, ViterbiPath> paths;
+  int num_samples = 0;
+  int num_sites = 0;
+  double mutation_rate = 0.0;
+  std::vector<double> physical_positions;
+  std::vector<double> genetic_positions;
+  std::vector<double> bp_sizes;
+  std::vector<double> cm_sizes;
+  std::vector<double> bp_boundaries;
+  std::vector<double> cm_boundaries;
+  bool sparse = false;
+
+private:
+  Demography demography;
+
+  // 2. HMM quantites
+  int hmm_sites_processed = 0;
+  std::unordered_map<int, ViterbiState> hmms;
+  int match_group_idx = 0;
+  std::vector<MatchGroup> match_groups;
+
+  // 3. Path segment and path dating quantites
+  HMM psmc;
+  int het_sites_processed = 0;
+  int n_hmm_samples = 100;
+  int hmm_min_sites = 10;
 };
 
 #endif // THREADS_ARG_THREADS_LOW_MEM_HPP
diff --git a/src/ViterbiLowMem.cpp b/src/ViterbiLowMem.cpp
index 4c8106a..27273f5 100644
--- a/src/ViterbiLowMem.cpp
+++ b/src/ViterbiLowMem.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "ViterbiLowMem.hpp"
 #include <algorithm>
 #include <iostream>
@@ -7,12 +23,21 @@
 #include <unordered_set>
 #include <vector>
 
+namespace {
+
+const int ALLELE_UNPHASED_HET = -7;
+
+inline std::size_t coord_id_key(int i, int j) {
+  return (static_cast<std::size_t>(i) << 32) | static_cast<std::size_t>(j);
+}
+
+} // namespace
 
 TracebackNode::TracebackNode(int _sample_id, int _site, TracebackNode* _previous, double _score)
     : sample_id(_sample_id), site(_site), previous(_previous), score(_score) {
 }
 
-size_t TracebackNode::key() {
+std::size_t TracebackNode::key() const {
   return coord_id_key(site, sample_id);
 }
 
@@ -31,8 +56,8 @@ void ViterbiPath::reverse() {
   std::reverse(sample_ids.begin(), sample_ids.end());
 }
 
-int ViterbiPath::size() {
-  return segment_starts.size();
+int ViterbiPath::size() const {
+  return static_cast<int>(segment_starts.size());
 }
 
 void ViterbiPath::append(int segment_start, int sample_id) {
@@ -68,7 +93,7 @@ void ViterbiPath::map_positions(std::vector<int>& positions) {
 std::tuple<std::vector<int>, std::vector<int>, std::vector<double>>
 ViterbiPath::dump_data_in_range(int start, int end) {
   int n_segs = size();
-  if (start == -1 && end == -1 || n_segs == 0) {
+  if (((start == -1) && (end == -1)) || (n_segs == 0)) {
     return std::tuple<std::vector<int>, std::vector<int>, std::vector<double>>(
         bp_starts, sample_ids, heights);
   }
@@ -82,8 +107,8 @@ ViterbiPath::dump_data_in_range(int start, int end) {
   for (int i = 0; i < n_segs; i++) {
     int seg_start = bp_starts.at(i);
     int seg_end = i < n_segs - 1 ? bp_starts.at(i + 1) : tmp_end;
-    if (seg_start <= tmp_start && tmp_start < seg_end ||
-        tmp_start <= seg_start && seg_start < tmp_end) {
+    if (((seg_start <= tmp_start) && (tmp_start < seg_end)) ||
+        ((tmp_start <= seg_start) && (seg_start < tmp_end))) {
       out_starts.push_back(std::max(tmp_start, seg_start));
       out_ids.push_back(sample_ids.at(i));
       out_heights.push_back(heights.at(i));
@@ -104,7 +129,7 @@ ViterbiState::ViterbiState(int _target_id, std::vector<int> _sample_ids)
                              std::to_string(target_id));
   }
   for (int sample_id : sample_ids) {
-    size_t key = coord_id_key(0, sample_id);
+    std::size_t key = coord_id_key(0, sample_id);
     traceback_states.emplace(key, TracebackNode(sample_id, 0, nullptr, 0.));
     current_tracebacks[sample_id] = &traceback_states.at(key);
   }
@@ -112,7 +137,6 @@ ViterbiState::ViterbiState(int _target_id, std::vector<int> _sample_ids)
   best_match = sample_ids.at(0);
 }
 
-// -9 is "missing" (not implemented yet) and -7 is "unphased"
 void ViterbiState::process_site(const std::vector<int>& genotype, double rho, double rho_c,
                                 double mu, double mu_c) {
   int current_site = sites_processed;
@@ -124,8 +148,7 @@ void ViterbiState::process_site(const std::vector<int>& genotype, double rho, do
   for (int sample_id : sample_ids) {
     int allele = genotype.at(sample_id);
     double copy_penalty;
-    // "-7" encodes unphased heterozygotes, because why not
-    if (allele == -7 || observed_allele == -7) {
+    if ((allele == ALLELE_UNPHASED_HET) || (observed_allele == ALLELE_UNPHASED_HET)) {
       copy_penalty = (mu_c + mu) / 2.;
     }
     else {
@@ -135,7 +158,7 @@ void ViterbiState::process_site(const std::vector<int>& genotype, double rho, do
       // If we've just added new sites (this will happen vary rarely),
       // recombine from previous best state
       new_score = best_score + copy_penalty + rho;
-      size_t key = coord_id_key(current_site, sample_id);
+      std::size_t key = coord_id_key(current_site, sample_id);
       traceback_states.emplace(key, TracebackNode(sample_id, current_site, prev_best, new_score));
       current_tracebacks[sample_id] = &traceback_states.at(key);
     }
@@ -150,7 +173,7 @@ void ViterbiState::process_site(const std::vector<int>& genotype, double rho, do
       else {
         // If we recombine, add a new branch
         new_score = best_score + copy_penalty + rho;
-        size_t key = coord_id_key(current_site, sample_id);
+        std::size_t key = coord_id_key(current_site, sample_id);
         traceback_states.emplace(key, TracebackNode(sample_id, current_site, prev_best, new_score));
         current_tracebacks.at(sample_id) = &traceback_states.at(key);
       }
@@ -180,7 +203,7 @@ void ViterbiState::set_samples(std::unordered_set<int> new_sample_ids) {
 }
 
 void ViterbiState::prune() {
-  std::unordered_map<size_t, TracebackNode> tmp_traceback_states;
+  std::unordered_map<std::size_t, TracebackNode> tmp_traceback_states;
 
   for (int sample_id : sample_ids) {
     TracebackNode* state = current_tracebacks.at(sample_id);
@@ -197,12 +220,13 @@ void ViterbiState::prune() {
 }
 
 // add everything above and return a key to the new address
-TracebackNode* ViterbiState::recursive_insert(std::unordered_map<size_t, TracebackNode>& state_map,
-                                              TracebackNode* state) {
+TracebackNode*
+ViterbiState::recursive_insert(std::unordered_map<std::size_t, TracebackNode>& state_map,
+                               TracebackNode* state) {
   if (state == nullptr) {
     return nullptr;
   }
-  size_t key = state->key();
+  std::size_t key = state->key();
   if (!state_map.count(key)) {
     TracebackNode* parent = state->previous;
     TracebackNode* new_parent = recursive_insert(state_map, parent);
@@ -211,8 +235,8 @@ TracebackNode* ViterbiState::recursive_insert(std::unordered_map<size_t, Traceba
   return &state_map.at(key);
 }
 
-int ViterbiState::count_branches() {
-  return traceback_states.size();
+int ViterbiState::count_branches() const {
+  return static_cast<int>(traceback_states.size());
 }
 
 ViterbiPath ViterbiState::traceback() {
diff --git a/src/ViterbiLowMem.hpp b/src/ViterbiLowMem.hpp
index a3352be..9a288ea 100644
--- a/src/ViterbiLowMem.hpp
+++ b/src/ViterbiLowMem.hpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef THREADS_ARG_VITERBI_LOW_MEM_HPP
 #define THREADS_ARG_VITERBI_LOW_MEM_HPP
 
@@ -7,30 +23,19 @@
 #include <vector>
 
 class TracebackNode {
-private:
-  inline size_t coord_id_key(int i, int j) {
-    return (size_t) i << 32 | (unsigned int) j;
-  }
+public:
+  TracebackNode(int _sample_id, int _site, TracebackNode* _previous, double _score);
+  std::size_t key() const;
 
 public:
   int sample_id = 0;
   int site = 0;
-  double score = 0.0;
   TracebackNode* previous = nullptr;
-  TracebackNode(int _sample_id, int _site, TracebackNode* _previous, double _score);
-  size_t key();
+  double score = 0.0;
 };
 
 class ViterbiPath {
-private:
 public:
-  double score = 0.0;
-  int target_id = 0;
-  std::vector<int> bp_starts;
-  std::vector<int> segment_starts;
-  std::vector<int> sample_ids;
-  std::vector<double> heights;
-  std::vector<int> het_sites;
   ViterbiPath(int _target_id);
   ViterbiPath(int _target_id, std::vector<int> _segment_starts, std::vector<int> _sample_ids,
               std::vector<double> _heights, std::vector<int> _het_sites);
@@ -39,18 +44,33 @@ class ViterbiPath {
   std::tuple<std::vector<int>, std::vector<int>, std::vector<double>>
   dump_data_in_range(int start_idx, int end_idx);
   void reverse();
-  int size();
+  int size() const;
   void map_positions(std::vector<int>& positions);
+
+public:
+  double score = 0.0;
+  int target_id = 0;
+  std::vector<int> bp_starts;
+  std::vector<int> segment_starts;
+  std::vector<int> sample_ids;
+  std::vector<double> heights;
+  std::vector<int> het_sites;
 };
 
 class ViterbiState {
+public:
+  ViterbiState(int _target_id, std::vector<int> _sample_ids);
+
+  void process_site(const std::vector<int>& genotype, double rho, double rho_c, double _mu,
+                    double _mu_c);
+  void set_samples(std::unordered_set<int> new_sample_ids);
+  int count_branches() const;
+  void prune();
+  ViterbiPath traceback();
 
 private:
-  inline size_t coord_id_key(int i, int j) {
-    return (size_t) i << 32 | (unsigned int) j;
-  }
-  std::unordered_map<size_t, TracebackNode> traceback_states;
-  TracebackNode* recursive_insert(std::unordered_map<size_t, TracebackNode>& state_map,
+  std::unordered_map<std::size_t, TracebackNode> traceback_states;
+  TracebackNode* recursive_insert(std::unordered_map<std::size_t, TracebackNode>& state_map,
                                   TracebackNode* state);
 
 public:
@@ -62,15 +82,6 @@ class ViterbiState {
   std::vector<int> sample_ids;
   std::vector<double> sample_scores;
   std::unordered_map<int, TracebackNode*> current_tracebacks;
-
-  ViterbiState(int _target_id, std::vector<int> _sample_ids);
-
-  void process_site(const std::vector<int>& genotype, double rho, double rho_c, double _mu,
-                    double _mu_c);
-  void set_samples(std::unordered_set<int> new_sample_ids);
-  int count_branches();
-  void prune();
-  ViterbiPath traceback();
 };
 
 #endif // THREADS_ARG_VITERBI_LOW_MEM_HPP
diff --git a/src/threads_arg/__init__.py b/src/threads_arg/__init__.py
index 580401a..6ef93f0 100644
--- a/src/threads_arg/__init__.py
+++ b/src/threads_arg/__init__.py
@@ -1 +1,17 @@
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 from threads_arg_python_bindings import *
diff --git a/src/threads_arg/__main__.py b/src/threads_arg/__main__.py
index 380a079..62a579c 100644
--- a/src/threads_arg/__main__.py
+++ b/src/threads_arg/__main__.py
@@ -1,3 +1,19 @@
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 import os
 import gc
 import sys
@@ -263,9 +279,9 @@ def partial_viterbi(pgen, mode, num_samples_hap, physical_positions, genetic_pos
             phased_out = np.empty((g_size, num_samples_hap // 2), dtype=np.uint8)
             if (phased_out == 0).any():
                 unphased_sites, unphased_samples = (1 - phased_out).nonzero()
-                # We encode "unphased het" by "-7"
-                alleles_out[unphased_sites, 2 * unphased_samples] = -7
-                alleles_out[unphased_sites, 2 * unphased_samples + 1] = -7
+                ALLELE_UNPHASED_HET = -7
+                alleles_out[unphased_sites, 2 * unphased_samples] = ALLELE_UNPHASED_HET
+                alleles_out[unphased_sites, 2 * unphased_samples + 1] = ALLELE_UNPHASED_HET
             reader.read_alleles_and_phasepresent_range(b_start, b_end, alleles_out, phased_out)
 
             # For each variant in chunk, pass the genotypes through Threads-LS
diff --git a/src/threads_arg/mapping_utils.py b/src/threads_arg/mapping_utils.py
index 3dc00e0..9ae4d5a 100644
--- a/src/threads_arg/mapping_utils.py
+++ b/src/threads_arg/mapping_utils.py
@@ -1,3 +1,19 @@
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 import arg_needle_lib
 import numpy as np
 import logging
diff --git a/src/threads_arg/utils.py b/src/threads_arg/utils.py
index 311f5c5..f355c50 100644
--- a/src/threads_arg/utils.py
+++ b/src/threads_arg/utils.py
@@ -1,3 +1,19 @@
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 import os
 import numpy as np
 import h5py
@@ -54,9 +70,9 @@ def interpolate_map(map_gz, pgen):
     Reading in map file (format has columns [chrom, SNP, cM-pos, bp])
     """
     if (map_gz[:-3] == ".gz") :
-        maps = pd.read_table(map_gz, header=None, compression='gzip', delim_whitespace=True)
+        maps = pd.read_table(map_gz, header=None, compression='gzip', sep="\\s+")
     else:
-        maps = pd.read_table(map_gz, header=None, delim_whitespace=True)
+        maps = pd.read_table(map_gz, header=None, sep="\\s+")
     cm_pos_map = maps[2].values.astype(np.float64)
     phys_pos_map = maps[3].values.astype(np.float64)
     pvar = pgen.replace("pgen", "pvar")
@@ -64,9 +80,9 @@ def interpolate_map(map_gz, pgen):
 
     physical_positions = None
     if os.path.isfile(bim):
-        physical_positions = np.array(pd.read_table(bim, delim_whitespace=True, header=None, comment='#')[3]).astype(np.float64)
+        physical_positions = np.array(pd.read_table(bim, sep="\\s+", header=None, comment='#')[3]).astype(np.float64)
     elif os.path.isfile(pvar):
-        physical_positions = np.array(pd.read_table(pvar, delim_whitespace=True, header=None, comment='#')[1]).astype(np.float64)
+        physical_positions = np.array(pd.read_table(pvar, sep="\\s+", header=None, comment='#')[1]).astype(np.float64)
     else:
         raise RuntimeError(f"Can't find {bim} or {pvar}")
 
@@ -87,10 +103,10 @@ def get_map_from_bim(pgen, rho):
     cm_out = None
     physical_positions = None
     if os.path.isfile(bim):
-        physical_positions = np.array(pd.read_table(bim, delim_whitespace=True, header=None, comment='#')[3]).astype(int)
+        physical_positions = np.array(pd.read_table(bim, sep="\\s+", header=None, comment='#')[3]).astype(int)
         cm_out = rho * 100 * physical_positions
     elif os.path.isfile(pvar):
-        physical_positions = np.array(pd.read_table(pvar, delim_whitespace=True, header=None, comment='#')[1]).astype(int)
+        physical_positions = np.array(pd.read_table(pvar, sep="\\s+", header=None, comment='#')[1]).astype(int)
         cm_out = rho * 100 * physical_positions
     else:
         raise RuntimeError(f"Can't find {bim} or {pvar}")
@@ -101,5 +117,5 @@ def get_map_from_bim(pgen, rho):
     return cm_out, physical_positions
 
 def parse_demography(demography):
-    d = pd.read_table(demography, delim_whitespace=True, header=None)
+    d = pd.read_table(demography, sep="\\s+", header=None)
     return list(d[0]), list(d[1])
diff --git a/src/threads_arg_pybind.cpp b/src/threads_arg_pybind.cpp
index 9ac0c77..4168f65 100644
--- a/src/threads_arg_pybind.cpp
+++ b/src/threads_arg_pybind.cpp
@@ -1,8 +1,23 @@
-#include "TGEN.hpp"
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "ImputationMatcher.hpp"
+#include "TGEN.hpp"
 #include "ThreadsLowMem.hpp"
 
-#include <pybind11/eigen.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <vector>
@@ -63,7 +78,8 @@ PYBIND11_MODULE(threads_arg_python_bindings, m) {
   py::class_<Matcher>(m, "Matcher")
       .def(py::init<int, std::vector<double>, double, double, int, int>(), "Initialize",
            py::arg("num_samples"), py::arg("genetic_positions"), py::arg("query_interval_size"),
-           py::arg("match_group_interval_size"), py::arg("neighborhood_size") = 4, py::arg("min_matches") = 4)
+           py::arg("match_group_interval_size"), py::arg("neighborhood_size") = 4,
+           py::arg("min_matches") = 4)
       .def_readonly("query_sites", &Matcher::query_sites)
       .def_readonly("genetic_positions", &Matcher::genetic_positions)
       .def_readonly("query_interval_size", &Matcher::query_interval_size)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 0dddbfc..7614629 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,5 +1,18 @@
-# This file is part of https://github.com/PalamaraLab/threads which is released under the GPL-3.0 license.
-# See accompanying LICENSE and COPYING for copyright notice and full details.
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Get Catch2
 Include(FetchContent)
diff --git a/test/test_data_snapshot.py b/test/test_data_snapshot.py
index 9c9cb4f..318b76f 100644
--- a/test/test_data_snapshot.py
+++ b/test/test_data_snapshot.py
@@ -1,3 +1,19 @@
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 import numpy as np
 import h5py
 import tempfile
diff --git a/test/test_demography.cpp b/test/test_demography.cpp
index fa9b815..c2edca1 100644
--- a/test/test_demography.cpp
+++ b/test/test_demography.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "Demography.hpp"
 
 #include <catch2/catch_test_macros.hpp>
@@ -30,6 +46,6 @@ TEST_CASE("Demography constructor exceptions") {
 TEST_CASE("Demography std_to_gen exceptions") {
   SECTION("std_to_gen value must be non-negative") {
     Demography d{{1}, {0}};
-    CHECK_THROWS_WITH(d.std_to_gen(-1), Catch::Matchers::ContainsSubstring("Demography can only convert non-negative times to std"));
+    CHECK_THROWS_WITH(d.std_to_gen(-1), Catch::Matchers::ContainsSubstring("Demography can only convert times greater than the first entry"));
   }
 }
diff --git a/test/test_viterbi_state.cpp b/test/test_viterbi_state.cpp
index e1bdabe..3356a6d 100644
--- a/test/test_viterbi_state.cpp
+++ b/test/test_viterbi_state.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
 #include "HMM.hpp"
 
 #include <catch2/catch_test_macros.hpp>