diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index f814bb0..18f386b 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -9,7 +9,7 @@ RUN apt-get install -y \
# Packages for local testing
RUN pip install --upgrade pip setuptools wheel
-RUN pip install pytest h5py numpy
+RUN pip install pytest numpy
# Packages for local build
RUN pip install cmake ninja
@@ -25,5 +25,4 @@ RUN apt-get install -y \
RUN apt-get install -y \
python3-dev \
libboost-dev \
- libeigen3-dev \
pybind11-dev
diff --git a/.github/workflows/build-cpp-libs.yml b/.github/workflows/build-cpp-libs.yml
index c3b3dfa..05e8cd5 100644
--- a/.github/workflows/build-cpp-libs.yml
+++ b/.github/workflows/build-cpp-libs.yml
@@ -19,16 +19,16 @@ jobs:
include:
- os: ubuntu-22.04
build-type: Debug
- system-deps: "sudo apt -y install libboost-dev libboost-math-dev libeigen3-dev"
+ system-deps: "sudo apt -y install libboost-dev libboost-math-dev"
- os: ubuntu-22.04
build-type: Release
- system-deps: "sudo apt -y install libboost-dev libboost-math-dev libeigen3-dev"
+ system-deps: "sudo apt -y install libboost-dev libboost-math-dev"
- os: macos-14
build-type: Debug
- system-deps: "brew install boost eigen"
+ system-deps: "brew install boost"
- os: macos-14
build-type: Release
- system-deps: "brew install boost eigen"
+ system-deps: "brew install boost"
steps:
diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml
index f80bc56..02fc999 100644
--- a/.github/workflows/build-wheels.yml
+++ b/.github/workflows/build-wheels.yml
@@ -27,12 +27,12 @@ jobs:
- os: macos-13
arch: x86_64
py-vers: cp38-* cp39-* cp310-* cp311-* cp312-*
- before-all: brew install cmake ninja boost eigen
+ before-all: brew install cmake ninja boost
extra-env: CC="$(brew --prefix llvm@15)/bin/clang" CXX="$(brew --prefix llvm@15)/bin/clang++" HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
- os: macos-14
arch: arm64
py-vers: cp39-* cp310-* cp311-* cp312-*
- before-all: brew install cmake ninja boost eigen
+ before-all: brew install cmake ninja boost
extra-env: CC="$(brew --prefix llvm@15)/bin/clang" CXX="$(brew --prefix llvm@15)/bin/clang++" HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
env:
diff --git a/.github/workflows/ctest-macos.yml b/.github/workflows/ctest-macos.yml
index baf3af5..f0a8882 100644
--- a/.github/workflows/ctest-macos.yml
+++ b/.github/workflows/ctest-macos.yml
@@ -16,7 +16,7 @@ jobs:
uses: actions/checkout@v2
- name: install system packages
- run: brew install boost eigen
+ run: brew install boost
- name: make build directory
run: mkdir build_dir
diff --git a/.github/workflows/ctest-ubuntu.yml b/.github/workflows/ctest-ubuntu.yml
index e7cc5c0..dcd7e2f 100644
--- a/.github/workflows/ctest-ubuntu.yml
+++ b/.github/workflows/ctest-ubuntu.yml
@@ -21,7 +21,7 @@ jobs:
- name: install system packages
run: |
sudo apt -y update
- sudo apt -y install libboost-dev libeigen3-dev
+ sudo apt -y install libboost-dev
- name: make build directory
run: mkdir build_dir
diff --git a/.github/workflows/pytest-ubuntu.yml b/.github/workflows/pytest-ubuntu.yml
index c809702..7395f96 100644
--- a/.github/workflows/pytest-ubuntu.yml
+++ b/.github/workflows/pytest-ubuntu.yml
@@ -38,7 +38,7 @@ jobs:
- name: install system packages
run: |
sudo apt -y update
- sudo apt -y install libboost-dev libeigen3-dev
+ sudo apt -y install libboost-dev
- name: install python dependencies
run: |
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 6e42dfb..a67d847 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -7,7 +7,85 @@
"cmake.generator": "Ninja",
"cmake.configureOnOpen": false,
"cmake.configureSettings": {
- //"ENABLE_TESTING": "ON",
- //"WARNINGS_AS_ERRORS": "TRUE",
+ "ENABLE_TESTING": "ON",
+ "WARNINGS_AS_ERRORS": "TRUE"
+ },
+ "files.associations": {
+ "iosfwd": "cpp",
+ "cctype": "cpp",
+ "clocale": "cpp",
+ "cmath": "cpp",
+ "csignal": "cpp",
+ "cstdarg": "cpp",
+ "cstddef": "cpp",
+ "cstdio": "cpp",
+ "cstdlib": "cpp",
+ "cstring": "cpp",
+ "ctime": "cpp",
+ "cwchar": "cpp",
+ "cwctype": "cpp",
+ "array": "cpp",
+ "atomic": "cpp",
+ "strstream": "cpp",
+ "bit": "cpp",
+ "*.tcc": "cpp",
+ "bitset": "cpp",
+ "chrono": "cpp",
+ "codecvt": "cpp",
+ "compare": "cpp",
+ "complex": "cpp",
+ "concepts": "cpp",
+ "condition_variable": "cpp",
+ "cstdint": "cpp",
+ "deque": "cpp",
+ "forward_list": "cpp",
+ "list": "cpp",
+ "map": "cpp",
+ "set": "cpp",
+ "string": "cpp",
+ "unordered_map": "cpp",
+ "unordered_set": "cpp",
+ "vector": "cpp",
+ "exception": "cpp",
+ "algorithm": "cpp",
+ "functional": "cpp",
+ "iterator": "cpp",
+ "memory": "cpp",
+ "memory_resource": "cpp",
+ "numeric": "cpp",
+ "optional": "cpp",
+ "random": "cpp",
+ "ratio": "cpp",
+ "regex": "cpp",
+ "string_view": "cpp",
+ "system_error": "cpp",
+ "tuple": "cpp",
+ "type_traits": "cpp",
+ "utility": "cpp",
+ "fstream": "cpp",
+ "future": "cpp",
+ "initializer_list": "cpp",
+ "iomanip": "cpp",
+ "iostream": "cpp",
+ "istream": "cpp",
+ "limits": "cpp",
+ "mutex": "cpp",
+ "new": "cpp",
+ "numbers": "cpp",
+ "ostream": "cpp",
+ "semaphore": "cpp",
+ "sstream": "cpp",
+ "stdexcept": "cpp",
+ "stop_token": "cpp",
+ "streambuf": "cpp",
+ "thread": "cpp",
+ "cfenv": "cpp",
+ "cinttypes": "cpp",
+ "typeindex": "cpp",
+ "typeinfo": "cpp",
+ "valarray": "cpp",
+ "variant": "cpp",
+ "core": "cpp",
+ "geometry": "cpp"
}
}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 60d9b8b..197cef7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,18 @@
-# This file is part of https://github.com/PalamaraLab/threads which is released under the GPL-3.0 license.
-# See accompanying LICENSE and COPYING for copyright notice and full details.
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
cmake_minimum_required(VERSION 3.16)
message(STATUS "Using CMake version ${CMAKE_VERSION}")
@@ -16,9 +29,7 @@ include(cmake/ProjectSettings.cmake)
# Link this 'library' to use the warnings specified in CompilerWarnings.cmake
add_library(project_warnings INTERFACE)
include(cmake/CompilerWarnings.cmake)
-
-# FIXME Uncomment to enable warnings
-#set_project_warnings(project_warnings)
+set_project_warnings(project_warnings)
# Sanitiser options if supported by compiler
include(cmake/Sanitisers.cmake)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/README.md b/README.md
index 7ecd6aa..1abd197 100644
--- a/README.md
+++ b/README.md
@@ -8,12 +8,9 @@ module load git/2.36.0-GCCcore-11.3.0-nodocs
module load Python/3.10.4-GCCcore-11.3.0
module load GSL/2.7-GCC-11.3.0
module load Boost/1.79.0-GCC-11.3.0
-module load Eigen/3.4.0-GCCcore-11.3.0
module load pybind11/2.9.2-GCCcore-11.3.0
```
-NB: Eigen is only used for functionality in TGEN.cpp. We can consider releasing without TGEN to have fewer dependencies.
-
Fire up and activate a new venv:
```
python -m venv venv
diff --git a/cmake/CompilerWarnings.cmake b/cmake/CompilerWarnings.cmake
index c2965c8..2adab9c 100644
--- a/cmake/CompilerWarnings.cmake
+++ b/cmake/CompilerWarnings.cmake
@@ -44,7 +44,9 @@ function(set_project_warnings project_name)
-Woverloaded-virtual # warn if you overload (not override) a virtual function
-Wpedantic # warn if non-standard C++ is used
-Wconversion # warn on type conversions that may lose data
- -Wsign-conversion # warn on sign conversions
+ # TODO replace -Wno-sign-conversion with -Wsign-conversion (ticket #26)
+ -Wno-sign-conversion # disabled until int/size_t resolved
+ #-Wsign-conversion # warn on sign conversions
-Wnull-dereference # warn if a null dereference is detected
-Wdouble-promotion # warn if float is implicit promoted to double
-Wformat=2 # warn on security issues around functions that format output (ie printf)
diff --git a/setup.py b/setup.py
index 1823220..70a9b2e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,19 @@
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
# Based on https://github.com/pybind/cmake_example
import os
@@ -46,7 +62,7 @@ def build_extension(self, ext):
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}",
f"-DPYTHON_EXECUTABLE={sys.executable}",
f"-DCMAKE_BUILD_TYPE={cfg}",
- "-DWARNINGS_AS_ERRORS=OFF",
+ "-DWARNINGS_AS_ERRORS=ON",
"-DENABLE_TESTING=OFF",
"-DBoost_NO_BOOST_CMAKE=ON", # from arni: o/w boost 1.74 gets confused re. mtx
"-DMAKE_DOCS=OFF"
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5dd6146..451e5c2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,5 +1,18 @@
-# This file is part of https://github.com/PalamaraLab/threads which is released under the GPL-3.0 license.
-# See accompanying LICENSE and COPYING for copyright notice and full details.
+# This file is part of the Threads software suite.
+# Copyright (C) 2024 Threads Developers.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
# Find Boost
find_package(Boost REQUIRED)
@@ -14,23 +27,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
endif()
endif()
-# Try to find Eigen3 version 3.4 locally installed
-find_package(Eigen3 3.4 QUIET)
-
-if(NOT Eigen3_FOUND)
- include(FetchContent)
- message(STATUS "Suitable local version of Eigen3 (>= 3.4) not found, fetching from repository...")
- FetchContent_Declare(
- Eigen3
- GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
- GIT_TAG 9df21dc8b4b576a7aa5c0094daa8d7e8b8be60f0 # 3.4 branch at 2024-03-15
- GIT_SHALLOW TRUE
- )
- FetchContent_MakeAvailable(Eigen3)
- set(Eigen3_VERSION "3.4")
-endif()
-message(STATUS "Found Eigen3 ${Eigen3_VERSION}")
-
# Threads static library
set(threads_arg_src
Demography.cpp
@@ -77,8 +73,6 @@ target_link_libraries(threads_arg
PRIVATE
Boost::headers
project_warnings
- PUBLIC
- Eigen3::Eigen
)
# Conditionally create python bindings
diff --git a/src/Demography.cpp b/src/Demography.cpp
index c6ca316..6c1c9c4 100644
--- a/src/Demography.cpp
+++ b/src/Demography.cpp
@@ -1,20 +1,36 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#include "Demography.hpp"
-#include // for std::sort
+
+#include
#include
#include
#include
-#include // to throw errors
+#include
#include
-
Demography::Demography(std::vector _sizes, std::vector _times)
: times(_times), sizes(_sizes), std_times(std::vector()) {
if (times.size() != sizes.size()) {
throw std::runtime_error("Demography times and sizes must have equal length");
}
- for (int i = 0; i < times.size(); i++) {
- if (times[i] < 0 || sizes[i] <= 0) {
+ for (std::size_t i = 0; i < times.size(); i++) {
+ if ((times[i] < 0.0) || (sizes[i] <= 0.0)) {
throw std::runtime_error("Demography expects non-negative times and strictly positive sizes");
}
@@ -33,9 +49,9 @@ Demography::Demography(std::vector _sizes, std::vector _times)
}
// Compute times in standard coalescent space
- int K = times.size();
+ std::size_t K = times.size();
std_times.push_back(0.0);
- for (int i = 1; i < K; i++) {
+ for (std::size_t i = 1; i < K; i++) {
double d = (times[i] - times[i - 1]) / sizes[i - 1];
std_times.push_back(std_times[i - 1] + d);
}
@@ -45,27 +61,28 @@ Demography::Demography(std::vector _sizes, std::vector _times)
}
double Demography::std_to_gen(const double t) {
- if (t < 0) {
- throw std::runtime_error("Demography can only convert non-negative times to std");
+ if (t < std_times.front()) {
+ throw std::runtime_error("Demography can only convert times greater than the first entry");
}
+
// Find the highest i s.t. std_times[i] <= t.
- int i =
- std::distance(std_times.begin(), std::upper_bound(std_times.begin(), std_times.end(), t)) - 1;
+ const auto it = std::upper_bound(std_times.begin(), std_times.end(), t);
+
+ // Defensive check as the t < std_times.front check above should mean `it` is never first
+ if (it == std_times.begin()) {
+ throw std::runtime_error("Unexpected std_to_gen upper bound finding first element");
+ }
+
+ std::size_t i = static_cast(it - std_times.begin() - 1);
return times[i] + (t - std_times[i]) * sizes[i];
}
-/**
- * @brief Compute the expected length of the N-th branch
- *
- * @param N
- * @return double
- */
double Demography::expected_branch_length(const int N) {
return std_to_gen(2. / N);
}
std::ostream& operator<<(std::ostream& os, const Demography& d) {
- for (int i = 0; i < d.sizes.size(); i++) {
+ for (std::size_t i = 0; i < d.sizes.size(); i++) {
std::cout << d.times[i] << " " << d.sizes[i] << " " << d.std_times[i] << std::endl;
}
return os;
diff --git a/src/Demography.hpp b/src/Demography.hpp
index 06efa23..f6249ee 100644
--- a/src/Demography.hpp
+++ b/src/Demography.hpp
@@ -1,30 +1,45 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#ifndef THREADS_ARG_DEMOGRAPHY_HPP
#define THREADS_ARG_DEMOGRAPHY_HPP
-#include
#include
+#include
-// This class is a wrapper for simple coalescence time queries under a piecewise-constant demography
+/// This class is a wrapper for simple coalescence time queries under a piecewise-constant
+/// demography
class Demography {
public:
- // These are in generations
- std::vector times;
- // These are *haploid*
- std::vector sizes;
- // Normalised coalescence times
- std::vector std_times;
- // Expected pairwise coalescent time
- double expected_time = 0.0;
-
Demography(std::vector _times, std::vector _sizes);
- // Map a time in the standard coalescent to generations under this demography
+ /// Map a time in the standard coalescent to generations under this demography
double std_to_gen(const double t);
- // The expected branch length of a new branch in a tree with N leaves
+
+ /// The expected branch length of a new branch in a tree with N leaves
double expected_branch_length(const int N);
- // Output
+ /// Stream output
friend std::ostream& operator<<(std::ostream& os, const Demography& demography);
+
+public:
+ std::vector times; ///< These are in generations
+ std::vector sizes; ///< These are *haploid*
+ std::vector std_times; ///< Normalised coalescence times
+ double expected_time = 0.0; ///< Expected pairwise coalescent time
};
#endif // THREADS_ARG_DEMOGRAPHY_HPP
diff --git a/src/HMM.cpp b/src/HMM.cpp
index bbea44a..dd48636 100644
--- a/src/HMM.cpp
+++ b/src/HMM.cpp
@@ -1,9 +1,25 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#include "HMM.hpp"
+
#include
#include
#include
-
HMM::HMM(Demography demography, std::vector bp_sizes, std::vector cm_sizes,
double mutation_rate, int K)
: num_states(K) {
@@ -12,7 +28,8 @@ HMM::HMM(Demography demography, std::vector bp_sizes, std::vector trellis_row(num_states, 0.0);
std::vector pointer_row(num_states, 0);
trellis.push_back(trellis_row);
@@ -21,19 +38,19 @@ HMM::HMM(Demography demography, std::vector bp_sizes, std::vector HMM::compute_expected_times(Demography demography, const int K) {
- std::vector expected_times;
+ std::vector result;
double k = static_cast(num_states);
boost::math::exponential e;
for (int i = 1; i <= K; i++) {
double t = demography.std_to_gen(quantile(e, (i - 0.5) / k));
- expected_times.push_back(t);
+ result.push_back(t);
}
- return expected_times;
+ return result;
}
void HMM::compute_recombination_scores(std::vector cm_sizes) {
- for (int i = 0; i < cm_sizes.size(); i++) {
+ for (std::size_t i = 0; i < cm_sizes.size(); i++) {
non_transition_score.push_back(std::vector());
transition_score.push_back(std::vector());
for (int k = 0; k < num_states; k++) {
@@ -51,14 +68,14 @@ void HMM::compute_recombination_scores(std::vector cm_sizes) {
}
void HMM::compute_mutation_scores(std::vector bp_sizes, double mutation_rate) {
- for (int i = 0; i < bp_sizes.size(); i++) {
+ for (std::size_t i = 0; i < bp_sizes.size(); i++) {
hom_score.push_back(std::vector());
het_score.push_back(std::vector());
for (int k = 0; k < num_states; k++) {
double t = expected_times[k];
+
// TODO: use mean-bp sizes here as in the main algorithm
const double l = 2. * mutation_rate * bp_sizes[i] * t;
- const double trans = std::log1p(-std::exp(-l));
// log-prob of mutating
het_score[i].push_back(std::log1p(-std::exp(-l)));
@@ -75,17 +92,20 @@ void HMM::compute_mutation_scores(std::vector bp_sizes, double mutation_
std::vector HMM::breakpoints(std::vector observations, int start) {
// Viterbi
// Initialize
- int neighborhood_size = observations.size();
+ int neighborhood_size = static_cast(observations.size());
std::vector z(neighborhood_size);
- int end = start + neighborhood_size;
for (int i = 0; i < num_states; i++) {
double score = observations[0] ? het_score[start][i] : hom_score[start][i];
trellis[start][i] = score;
}
+ if (num_states > std::numeric_limits::max()) {
+ throw std::runtime_error("Unable to store breakpoints for more than 2^16 states");
+ }
+
// Main routine
- double score;
- unsigned short running_argmax;
+ double score = 0.0;
+ unsigned short running_argmax = 0;
for (int j = 1; j < neighborhood_size; j++) {
for (int i = 0; i < num_states; i++) {
double running_max = 0;
@@ -98,7 +118,7 @@ std::vector HMM::breakpoints(std::vector observations, int start) {
if (score > running_max || k == 0) {
running_max = score;
- running_argmax = k;
+ running_argmax = static_cast(k);
}
}
trellis[j + start][i] = running_max;
@@ -113,7 +133,7 @@ std::vector HMM::breakpoints(std::vector observations, int start) {
double s = trellis[start + neighborhood_size - 1][k];
if (s > running_max) {
running_max = s;
- argmax = k;
+ argmax = static_cast(k);
}
}
diff --git a/src/HMM.hpp b/src/HMM.hpp
index 379ecba..0ce0201 100644
--- a/src/HMM.hpp
+++ b/src/HMM.hpp
@@ -1,32 +1,47 @@
-#ifndef DEMOGRAPHY
-#define DEMOGRAPHY
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#include "Demography.hpp"
-#endif // DEMOGRAPHY
-#include
#include
+#include
-
-// This class contains the PSMC-like algorithm used to break up segments for small-N inference
+/// This class contains the PSMC-like algorithm used to break up segments for small-N inference
class HMM {
public:
- // HMM data
- int num_states = 0;
- std::vector expected_times;
+ HMM(Demography demography, std::vector bp_sizes, std::vector cm_sizes,
+ double mutation_rate, int K);
+ HMM() = default;
- std::vector> non_transition_score;
- std::vector> transition_score;
- std::vector> hom_score;
- std::vector> het_score;
void compute_recombination_scores(std::vector cm_sizes);
void compute_mutation_scores(std::vector bp_sizes, double mutation_rate);
std::vector compute_expected_times(Demography demography, int K);
+ std::vector breakpoints(std::vector observations, int start);
+
+public:
+ // HMM data
+ int num_states = 0;
+ std::vector expected_times;
// HMM working quantities
std::vector> trellis;
std::vector> pointers;
- HMM(Demography demography, std::vector bp_sizes, std::vector cm_sizes, double mutation_rate, int K);
- HMM() {};
- std::vector breakpoints(std::vector observations, int start);
+ std::vector> non_transition_score;
+ std::vector> transition_score;
+ std::vector> hom_score;
+ std::vector> het_score;
};
diff --git a/src/ImputationMatcher.cpp b/src/ImputationMatcher.cpp
index 90cefd4..19e0390 100644
--- a/src/ImputationMatcher.cpp
+++ b/src/ImputationMatcher.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#include "ImputationMatcher.hpp"
#include
@@ -8,33 +24,37 @@
#include
#include
+// Uncomment this #define to enable a runtime check that genetic position are in order. This
+// diagnostic check is left in whilst we may have issues during development.
+// #define IMPUTATION_MATCHER_CHECK_IN_ORDER
ImputationMatcher::ImputationMatcher(int _n_ref, int _n_target,
const std::vector& _genetic_positions,
double _query_interval_size, int _neighborhood_size)
- : num_reference(_n_ref), num_target(_n_target), genetic_positions(_genetic_positions),
- query_interval_size(_query_interval_size), neighborhood_size(_neighborhood_size) {
+ : neighborhood_size(_neighborhood_size), num_reference(_n_ref), num_target(_n_target),
+ query_interval_size(_query_interval_size), genetic_positions(_genetic_positions) {
if (genetic_positions.size() <= 2) {
throw std::runtime_error("Need at least 3 sites, found " +
std::to_string(genetic_positions.size()));
}
- num_sites = genetic_positions.size();
+ num_sites = static_cast(genetic_positions.size());
num_samples = num_reference + num_target;
sites_processed = 0;
- // Check maps are strictly increasing
- // for (int i = 0; i < num_sites - 1; i++) {
- // if (genetic_positions.at(i + 1) <= genetic_positions.at(i)) {
- // std::string prompt = "Genetic coordinates must be strictly increasing, found ";
- // prompt += std::to_string(genetic_positions[i + 1]) + " after " +
- // std::to_string(genetic_positions[i]);
- // throw std::runtime_error(prompt);
- // }
- // }
+#ifdef IMPUTATION_MATCHER_CHECK_IN_ORDER
+ for (int i = 0; i < num_sites - 1; i++) {
+ if (genetic_positions.at(i + 1) <= genetic_positions.at(i)) {
+ std::string prompt = "Genetic coordinates must be strictly increasing, found ";
+ prompt += std::to_string(genetic_positions[i + 1]) + " after " +
+ std::to_string(genetic_positions[i]);
+ throw std::runtime_error(prompt);
+ }
+ }
+#endif // IMPUTATION_MATCHER_CHECK_IN_ORDER
int query_site_idx = 1;
double gen_pos_offset = genetic_positions[0];
- for (int i = 0; i < genetic_positions.size(); i++) {
+ for (int i = 0; i < static_cast(genetic_positions.size()); i++) {
double cm = genetic_positions.at(i);
if (cm > gen_pos_offset + query_interval_size * query_site_idx) {
query_sites.push_back(i);
@@ -82,11 +102,11 @@ void ImputationMatcher::process_site(const std::vector& genotype) {
}
}
- if (genotype.size() != num_samples) {
+ if (static_cast(genotype.size()) != num_samples) {
throw std::runtime_error("invalid genotype vector size");
}
- if (next_query_site_idx < query_sites.size() &&
+ if (next_query_site_idx < static_cast(query_sites.size()) &&
sites_processed == query_sites.at(next_query_site_idx)) {
next_query_site_idx++;
int ref_allele_count = 0;
@@ -175,10 +195,10 @@ void ImputationMatcher::process_site(const std::vector& genotype) {
sites_processed++;
}
-std::unordered_map> ImputationMatcher::get_matches() {
+const std::unordered_map>& ImputationMatcher::get_matches() const {
return match_sets;
}
-std::vector ImputationMatcher::get_sorting() {
+const std::vector& ImputationMatcher::get_sorting() const {
return sorting;
}
diff --git a/src/ImputationMatcher.hpp b/src/ImputationMatcher.hpp
index d2202f7..8479db8 100644
--- a/src/ImputationMatcher.hpp
+++ b/src/ImputationMatcher.hpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#ifndef THREADS_ARG_IMPUTATION_MATCHER_HPP
#define THREADS_ARG_IMPUTATION_MATCHER_HPP
@@ -5,23 +21,19 @@
#include
#include
-// NB this recycles a lot of code from Matcher.hpp
+/// This is a version of the Threads haplotype matching algorithm
+/// adapted to be used for imputation.
+/// NB this recycles a lot of code from Matcher.hpp
class ImputationMatcher {
+public:
+ ImputationMatcher(int _n_ref, int _n_target, const std::vector& _genetic_positions,
+ double _query_interval_size, int _neighborhood_size);
-private:
- int sites_processed = 0;
- int next_query_site_idx = 0;
-
- // pbwt quantities
- std::vector sorting;
- std::vector next_sorting;
-
- // querying quantities
- std::vector ref_sorting;
+ void process_site(const std::vector& genotype);
+ const std::unordered_map>& get_matches() const;
+ const std::vector& get_sorting() const;
public:
- // This is a version of the Threads haplotype matching algorithm
- // adapted to be used for imputation.
// TODO: include a second pass through data here to get divergence values and not do that using
// Threads-fastLS
int neighborhood_size = 0;
@@ -33,14 +45,17 @@ class ImputationMatcher {
double query_interval_size = 0.0;
std::unordered_map> match_sets;
std::vector genetic_positions;
- ImputationMatcher(int _n_ref, int _n_target, const std::vector& _genetic_positions,
- double _query_interval_size, int _neighborhood_size);
- // Do all the work
- void process_site(const std::vector& genotype);
- std::unordered_map> get_matches();
+private:
+ int sites_processed = 0;
+ int next_query_site_idx = 0;
- std::vector get_sorting();
+ // pbwt quantities
+ std::vector sorting;
+ std::vector next_sorting;
+
+ // querying quantities
+ std::vector ref_sorting;
};
#endif // THREADS_ARG_IMPUTATION_MATCHER_HPP
diff --git a/src/Matcher.cpp b/src/Matcher.cpp
index 733118e..a95ac05 100644
--- a/src/Matcher.cpp
+++ b/src/Matcher.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#include "Matcher.hpp"
#include
@@ -8,7 +24,6 @@
#include
#include
-
// For a given interval, this contains all the matches for all the samples
MatchGroup::MatchGroup(int _num_samples, double _cm_position)
: num_samples(_num_samples), cm_position(_cm_position) {
@@ -24,7 +39,7 @@ MatchGroup::MatchGroup(const std::vector& target_ids,
if (target_ids.size() != matches.size()) {
throw std::runtime_error("Inconsistent target/matches sizes");
}
- for (int i = 0; i < target_ids.size(); i++) {
+ for (int i = 0; i < static_cast(target_ids.size()); i++) {
match_candidates[target_ids[i]] = matches[i];
}
}
@@ -78,7 +93,7 @@ void MatchGroup::filter_matches(int min_matches) {
// Then determine top 4 candidates for neighboring groups
top_four_maps.reserve(num_samples);
for (int i = 0; i < num_samples; i++) {
- top_four_maps.emplace_back(std::min(4, (int) match_candidates.at(i).size()));
+ top_four_maps.emplace_back(std::min(4, static_cast(match_candidates.at(i).size())));
std::partial_sort_copy(match_candidates_counts.at(i).begin(),
match_candidates_counts.at(i).end(), top_four_maps.at(i).begin(),
top_four_maps.at(i).end(),
@@ -107,12 +122,13 @@ Matcher::Matcher(int _n, const std::vector& _genetic_positions, double _
double _match_group_interval_size, int _neighborhood_size, int _min_matches)
: num_samples(_n), genetic_positions(_genetic_positions),
query_interval_size(_query_interval_size),
- match_group_interval_size(_match_group_interval_size), neighborhood_size(_neighborhood_size), min_matches(_min_matches) {
+ match_group_interval_size(_match_group_interval_size), neighborhood_size(_neighborhood_size),
+ min_matches(_min_matches) {
if (genetic_positions.size() <= 2) {
throw std::runtime_error("Need at least 3 sites, found " +
std::to_string(genetic_positions.size()));
}
- num_sites = genetic_positions.size();
+ num_sites = static_cast(genetic_positions.size());
sites_processed = 0;
// Check maps are strictly increasing
@@ -130,7 +146,7 @@ Matcher::Matcher(int _n, const std::vector& _genetic_positions, double _
int match_group_site_idx = 1;
double gen_pos_offset = genetic_positions[0];
match_group_sites = {0};
- for (int i = 0; i < genetic_positions.size(); i++) {
+ for (int i = 0; i < static_cast(genetic_positions.size()); i++) {
double cm = genetic_positions.at(i);
if (cm > gen_pos_offset + query_interval_size * query_site_idx) {
query_sites.push_back(i);
@@ -162,7 +178,7 @@ Matcher::Matcher(int _n, const std::vector& _genetic_positions, double _
}
match_group_idx = 0;
std::cout << "Will use " << query_sites.size() << " query sites and " << match_group_sites.size()
- << " match_group_sites" << std::endl;
+ << " match_group_sites" << std::endl;
match_groups.reserve(match_group_sites.size());
for (int match_group_site : match_group_sites) {
@@ -195,7 +211,7 @@ void Matcher::process_site(const std::vector& genotype) {
}
int counter0 = 0;
int counter1 = 0;
- if (genotype.size() != num_samples) {
+ if (static_cast(genotype.size()) != num_samples) {
throw std::runtime_error("invalid genotype vector size");
}
@@ -217,14 +233,14 @@ void Matcher::process_site(const std::vector& genotype) {
sorting = next_sorting;
// Threading-neighbor queries
- if (match_group_idx < match_group_sites.size() - 1 &&
- sites_processed >= match_group_sites.at(match_group_idx + 1)) {
+ if (match_group_idx < (static_cast(match_group_sites.size()) - 1) &&
+ (sites_processed >= match_group_sites.at(match_group_idx + 1))) {
match_group_idx++;
match_groups.at(match_group_idx - 1).filter_matches(min_matches);
}
// If we've reached a query site, query
- if (next_query_site_idx < query_sites.size() &&
+ if (next_query_site_idx < static_cast(query_sites.size()) &&
sites_processed == query_sites.at(next_query_site_idx)) {
// Get the arg-sort of the sorting
for (int i = 0; i < num_samples; i++) {
@@ -238,18 +254,18 @@ void Matcher::process_site(const std::vector& genotype) {
// Insert sequences and query in order
for (int i = 1; i < num_samples; i++) {
std::vector matches;
- int allele = genotype.at(i);
matches.reserve(neighborhood_size);
auto iter = threaded.insert(permutation.at(i));
auto iter_up = iter.first;
auto iter_down = iter.first;
// Check if genotypes are identical, just to be sure
- while (matches.size() < neighborhood_size && (iter_down != threaded.begin() || iter_up != threaded.end())) {
+ while ((static_cast(matches.size()) < neighborhood_size) &&
+ (iter_down != threaded.begin() || iter_up != threaded.end())) {
if (iter_down != threaded.begin()) {
iter_down--;
matches.push_back(sorting.at(*iter_down));
}
- if (matches.size() < neighborhood_size && iter_up != threaded.end()) {
+ if (static_cast(matches.size()) < neighborhood_size && iter_up != threaded.end()) {
iter_up++;
if (iter_up != threaded.end()) {
matches.push_back(sorting.at(*iter_up));
@@ -273,7 +289,7 @@ void Matcher::process_site(const std::vector& genotype) {
}
// Special case for last query
- if (next_query_site_idx == query_sites.size()) {
+ if (next_query_site_idx == static_cast(query_sites.size())) {
match_groups.at(match_group_sites.size() - 1).filter_matches(min_matches);
}
}
@@ -282,7 +298,7 @@ void Matcher::process_site(const std::vector& genotype) {
// Propagate top 4 matches from left and right match groups
void Matcher::propagate_adjacent_matches() {
- for (int i = 1; i < match_groups.size(); i++) {
+ for (int i = 1; i < static_cast(match_groups.size()); i++) {
MatchGroup& group = match_groups.at(i);
MatchGroup& prev = match_groups.at(i - 1);
group.insert_tops_from(prev);
diff --git a/src/Matcher.hpp b/src/Matcher.hpp
index ff468dd..fbe5a41 100644
--- a/src/Matcher.hpp
+++ b/src/Matcher.hpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#ifndef THREADS_ARG_MATCHER_HPP
#define THREADS_ARG_MATCHER_HPP
@@ -5,45 +21,26 @@
#include
#include
-// for a certain interval, store the matches for all samples
+/// for a certain interval, store the matches for all samples
class MatchGroup {
public:
- int num_samples;
- std::unordered_map> match_candidates;
- std::vector> match_candidates_counts;
- std::vector>> top_four_maps;
- double cm_position;
MatchGroup(int _num_samples, double cm_position);
MatchGroup(const std::vector& target_ids,
const std::vector>& matches, const double _cm_position);
void filter_matches(int min_matches);
void insert_tops_from(MatchGroup& other);
void clear();
+
+public:
+ int num_samples = 0;
+ std::unordered_map> match_candidates;
+ std::vector> match_candidates_counts;
+ std::vector>> top_four_maps;
+ double cm_position = 0.0;
};
class Matcher {
-
-private:
- int min_matches = 0;
- int sites_processed = 0;
- int next_query_site_idx = 0;
- int match_group_idx = 0;
- int min_match_length = 1;
- std::vector match_groups;
- std::vector sorting;
- std::vector next_sorting;
- std::vector permutation;
-
public:
- int neighborhood_size = 0;
- std::vector query_sites;
- std::vector match_group_sites;
- int num_samples = 0;
- int num_sites = 0;
- double query_interval_size = 0.0;
- // matches in these groups are considered together in the hmm
- double match_group_interval_size = 0.0;
- std::vector genetic_positions;
Matcher(int _n, const std::vector& _genetic_positions, double _query_interval_size,
double _match_group_interval_size, int _neighborhood_size, int _min_matches);
@@ -51,6 +48,7 @@ class Matcher {
void process_site(const std::vector& genotype);
void propagate_adjacent_matches();
void clear();
+
std::vector get_matches();
std::vector>>
serializable_matches(std::vector& target_ids);
@@ -58,6 +56,27 @@ class Matcher {
std::vector get_sorting();
std::vector get_permutation();
+
+public:
+ int num_samples = 0;
+ std::vector genetic_positions;
+ double query_interval_size = 0.0;
+ double match_group_interval_size = 0.0;
+ int neighborhood_size = 0;
+ std::vector query_sites;
+ std::vector match_group_sites;
+ int num_sites = 0;
+ // matches in these groups are considered together in the hmm
+
+private:
+ int min_matches = 0;
+ int sites_processed = 0;
+ int next_query_site_idx = 0;
+ int match_group_idx = 0;
+ std::vector match_groups;
+ std::vector sorting;
+ std::vector next_sorting;
+ std::vector permutation;
};
#endif // THREADS_ARG_MATCHER_HPP
diff --git a/src/Node.cpp b/src/Node.cpp
index 4eb6faa..12bf8a9 100644
--- a/src/Node.cpp
+++ b/src/Node.cpp
@@ -1,8 +1,23 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#include "Node.hpp"
#include
-
Node::Node(int _sample_ID, int _divergence, bool _genotype)
: sample_ID(_sample_ID), divergence(_divergence), genotype(_genotype) {
}
diff --git a/src/Node.hpp b/src/Node.hpp
index 1b228ef..1c07075 100644
--- a/src/Node.hpp
+++ b/src/Node.hpp
@@ -1,11 +1,36 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#ifndef THREADS_ARG_NODE_HPP
#define THREADS_ARG_NODE_HPP
#include
#include
-
class Node {
+public:
+ // Constructors
+ Node(int sample_ID, int divergence, bool genotype);
+
+ // Node movers-arounders
+ void insert_above(Node* node);
+
+ // Output
+ friend std::ostream& operator<<(std::ostream& os, const Node& node);
+
public:
// Node data
int sample_ID = 0;
@@ -18,15 +43,6 @@ class Node {
// "Next below to the right" for 0 and 1
std::array w = {nullptr, nullptr};
-
- // Constructors
- Node(int sample_ID, int divergence, bool genotype);
-
- // Node movers-arounders
- void insert_above(Node* node);
-
- // Output
- friend std::ostream& operator<<(std::ostream& os, const Node& node);
};
#endif // THREADS_ARG_NODE_HPP
\ No newline at end of file
diff --git a/src/State.cpp b/src/State.cpp
index 6c8125d..b726adb 100644
--- a/src/State.cpp
+++ b/src/State.cpp
@@ -1,3 +1,19 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#include "State.hpp"
#include
@@ -5,7 +21,6 @@
#include
#include
-
TracebackState::TracebackState(int _site, Node* _best_prev_node, TracebackState* _prev)
: site(_site), best_prev_node(_best_prev_node), prev(_prev) {
}
@@ -14,7 +29,7 @@ State::State(Node* _below, double _score, TracebackState* _traceback)
: below(_below), score(_score), traceback(_traceback) {
}
-bool State::genotype() {
+bool State::genotype() const {
return this->below->above->genotype;
}
@@ -55,7 +70,7 @@ void StateBranch::prune() {
});
std::vector new_states;
- double running_min_score = std::numeric_limits::infinity();
+ double running_min_score = std::numeric_limits::max();
for (const State& s : states) {
if (s.score < running_min_score) {
new_states.push_back(s);
@@ -81,7 +96,7 @@ void StateTree::prune() {
}
}
-std::vector StateTree::dump() {
+std::vector StateTree::dump() const {
std::vector states;
for (auto pair : branches) {
for (auto s : pair.second.states) {
diff --git a/src/State.hpp b/src/State.hpp
index 9ad796b..fcc589f 100644
--- a/src/State.hpp
+++ b/src/State.hpp
@@ -1,69 +1,85 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#ifndef THREADS_ARG_STATE_HPP
#define THREADS_ARG_STATE_HPP
#include "Node.hpp"
-#include
#include
+#include
class TracebackState {
+public:
+ TracebackState(int _site, Node* _best_prev_node, TracebackState* _prev);
+
public:
int site = 0;
- // ID to trace back through to last traceback state
- Node* best_prev_node = nullptr;
+ Node* best_prev_node = nullptr; ///< ID to trace back through to last traceback state
TracebackState* prev = nullptr;
-
- TracebackState(int _site, Node* _best_prev_node, TracebackState* _prev);
};
class State {
public:
- // Nothing here can be const because we want to use std::sort later on
- // Panel entry directly below
- Node* below = nullptr;
- // Score of the current state
- double score = 0.0;
- // Pointer to last recombinant state
- TracebackState* traceback = nullptr;
- // Shorthand for this.below->site
- bool genotype();
-
State(Node* _below, double _score, TracebackState* _traceback);
+ /// Shorthand for this.below->site
+ bool genotype() const;
+
friend std::ostream& operator<<(std::ostream& os, State& state);
+
+public:
+ // Nothing here can be const because we want to use std::sort later on
+ Node* below = nullptr; ///< Panel entry directly below
+ double score = 0.0; ///< Score of the current state
+ TracebackState* traceback = nullptr;
};
class StatePair {
public:
- // Panel entry directly below
- Node* below_a = nullptr;
- Node* below_b = nullptr;
- // Score of the current state
- double score = 0.0;
- // Pointer to last recombinant state
- TracebackState* traceback_a = nullptr;
- TracebackState* traceback_b = nullptr;
-
StatePair(Node* _below_a, Node* _below_b, double _score, TracebackState* _traceback_a,
TracebackState* _traceback_b);
friend std::ostream& operator<<(std::ostream& os, StatePair& state_pair);
+
+public:
+ Node* below_a = nullptr; ///< Panel entry directly below
+ Node* below_b = nullptr;
+ double score = 0.0; ///< Score of the current state
+ TracebackState* traceback_a = nullptr; ///< Pointer to last recombinant state
+ TracebackState* traceback_b = nullptr;
};
class StateBranch {
public:
- std::vector states;
void insert(const State& state);
void prune();
+
+public:
+ std::vector states;
};
class StateTree {
public:
- std::unordered_map branches;
-
StateTree(std::vector& states);
void prune();
- std::vector dump();
+ std::vector dump() const;
+
+public:
+ std::unordered_map branches;
};
#endif // THREADS_ARG_STATE_HPP
diff --git a/src/TGEN.cpp b/src/TGEN.cpp
index f5f6d08..fdee447 100644
--- a/src/TGEN.cpp
+++ b/src/TGEN.cpp
@@ -1,8 +1,23 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#include "TGEN.hpp"
#include "TgenSegment.hpp"
-#include
#include
#include
@@ -10,7 +25,6 @@
#include
#include
-
namespace boost::icl {
// See
// https://www.boost.org/doc/libs/1_81_0/libs/icl/doc/html/boost_icl/examples/custom_interval.html
@@ -52,15 +66,12 @@ class TGENImpl {
std::vector interval_sets;
std::unordered_map pos_idx_map;
- Eigen::VectorXi reference_genome;
std::vector reference_genome_vec;
std::vector> bp_starts;
std::vector> target_IDs;
std::vector> het_sites;
std::vector positions;
std::vector> genotypes;
-
- Eigen::MatrixXi genotype_cache;
std::unordered_map cached_genotypes_map;
// Constructor
@@ -70,27 +81,23 @@ class TGENImpl {
het_sites(std::move(_het_sites)), positions(std::move(_positions)) {
positions.push_back(std::numeric_limits::max());
// position-to-site map
- for (int i = 0; i < positions.size(); i++) {
+ for (int i = 0; i < static_cast(positions.size()); i++) {
pos_idx_map[positions[i]] = i;
}
- // set reference genome here
- reference_genome = Eigen::VectorXi::Zero(positions.size());
reference_genome_vec = std::vector(positions.size(), false);
for (int h : het_sites[0]) {
- reference_genome[pos_idx_map.at(h)] = 1;
reference_genome_vec[pos_idx_map.at(h)] = true;
}
interval_sets.reserve(bp_starts.size());
// Initialize interval maps for each sample. This can get too slow
- for (int i = 0; i < bp_starts.size(); i++) {
+ for (std::size_t i = 0; i < bp_starts.size(); i++) {
SegmentSet iset;
- int n_segs = bp_starts[i].size();
+ int n_segs = static_cast(bp_starts[i].size());
std::vector& sample_hets = het_sites[i];
- int n_hets = sample_hets.size();
+ int n_hets = static_cast(sample_hets.size());
int het_site_idx = 0;
- int pos_idx = 0;
std::vector seg_hets;
for (int j = 0; j < n_segs; j++) {
int seg_start = bp_starts[i][j];
@@ -116,84 +123,7 @@ class TGENImpl {
cached_genotypes_map[0] = -1;
}
- // Eigen-based query
-// Eigen::MatrixXi& query(const int bp_from, const int bp_to, const std::vector& samples) {
-// clear_cache();
-
-// Deprecated eigen-based query
-// Eigen::MatrixXi& TGEN::query(const int bp_from, const int bp_to, const std::vector& samples) {
-// clear_cache();
-
-// // Find number of expected sites
-// int start_pos = *std::lower_bound(positions.begin(), positions.end(), bp_from);
-// int end_pos = *std::upper_bound(positions.begin(), positions.end(), bp_to);
-// int idx_offset = pos_idx_map[start_pos];
-// genotype_cache.resize(samples.size(), pos_idx_map[end_pos] - idx_offset);
-
-// TgenSegment range(start_pos, end_pos);
-
-// for (int i = 0; i < samples.size(); i++) {
-// cached_genotypes_map[samples[i]] = i;
-// if (samples[i] == 0) {
-// auto insert_range =
-// Eigen::seq(0, pos_idx_map[end_pos] - idx_offset - 1); // eigen seq is inclusive
-// auto copy_range = Eigen::seq(idx_offset, pos_idx_map[end_pos] - 1);
-// genotype_cache(i, insert_range) = reference_genome(
-// copy_range); //.eval(); //WARNING need .eval() here (or do we? I don't think we do)
-// }
-// else {
-// SegmentSet& segments(interval_sets[samples[i]]);
-
-// // Initialize the queue
-// std::queue seg_queue;
-// auto eqr = segments.equal_range(range);
-// for (SegmentSet::const_iterator iter = eqr.first; iter != eqr.second; iter++) {
-// seg_queue.push(*iter & range);
-// }
-
-// // Process everything in the queue
-// while (!seg_queue.empty()) {
-// TgenSegment& segment = seg_queue.front();
-// if (cached_genotypes_map.find(segment.target) != cached_genotypes_map.end()) {
-// // We've reached somewhere along the tree where we can copy from
-// int seg_start_idx = pos_idx_map[segment.lower()];
-// int seg_end_idx = pos_idx_map[segment.upper()];
-// auto insert_range = Eigen::seq(
-// seg_start_idx - idx_offset, seg_end_idx - idx_offset - 1); // eigen seq is inclusive
-// if (segment.target == 0) {
-// auto copy_range = Eigen::seq(seg_start_idx, seg_end_idx - 1);
-// // We've reached the root of the tree and copy from the "reference" genome
-// genotype_cache(i, insert_range) = reference_genome(copy_range);
-// }
-// else {
-// // We've found a cached genotype to copy from
-
-// genotype_cache(i, insert_range) =
-// genotype_cache(cached_genotypes_map[segment.target], insert_range);
-// }
-
-// // We then flip all the het sites
-// for (int h : segment.het_sites) {
-// genotype_cache(i, pos_idx_map[h] - idx_offset) =
-// 1 - genotype_cache(i, pos_idx_map[h] - idx_offset);
-// }
-// }
-// else {
-// // We've not yet reached somewhere to copy from, so we keep traversing
-// auto new_eqr = interval_sets[segment.target].equal_range(segment);
-// for (SegmentSet::const_iterator iter = new_eqr.first; iter != new_eqr.second; iter++) {
-// seg_queue.push(*iter & segment);
-// }
-// }
-// seg_queue.pop();
-// }
-// }
-// }
-// return genotype_cache;
-// }
-
-// std::vector-based query
-// Warning: This makes a copy when returned through the python interface
+ // Warning: This makes a copy when returned through the python interface
std::vector>& query(const int bp_from, const int bp_to,
const std::vector& samples) {
genotypes.clear();
@@ -203,15 +133,14 @@ class TGENImpl {
int end_pos = *std::upper_bound(positions.begin(), positions.end(), bp_to);
int idx_offset = pos_idx_map[start_pos];
- int n_samples = samples.size();
int n_sites = pos_idx_map[end_pos] - idx_offset;
genotypes.reserve(samples.size());
- for (int i = 0; i < samples.size(); i++) {
+ for (std::size_t i = 0; i < samples.size(); i++) {
genotypes.push_back(std::vector(n_sites));
}
TgenSegment range(start_pos, end_pos);
- for (int i = 0; i < samples.size(); i++) {
+ for (int i = 0; i < static_cast(samples.size()); i++) {
std::vector& current_gt = genotypes.at(i);
cached_genotypes_map[samples[i]] = i;
if (samples[i] == 0) {
@@ -225,7 +154,7 @@ class TGENImpl {
std::queue seg_queue;
auto eqr = segments.equal_range(range);
for (SegmentSet::const_iterator iter = eqr.first; iter != eqr.second; iter++) {
- seg_queue.push(*iter & range);
+ seg_queue.push(iter->calc_intersection_with(range));
}
// Process everything in the queue
@@ -259,7 +188,7 @@ class TGENImpl {
// We've not yet reached somewhere to copy from, so we keep traversing
auto new_eqr = interval_sets[segment.target].equal_range(segment);
for (SegmentSet::const_iterator iter = new_eqr.first; iter != new_eqr.second; iter++) {
- seg_queue.push(*iter & segment);
+ seg_queue.push(iter->calc_intersection_with(segment));
}
}
seg_queue.pop();
@@ -271,7 +200,6 @@ class TGENImpl {
void clear_cache() {
cached_genotypes_map.clear();
- genotype_cache.resize(0, 0);
cached_genotypes_map[0] = -1;
}
};
@@ -279,11 +207,15 @@ class TGENImpl {
TGEN::TGEN(std::vector _positions, std::vector> _bp_starts,
std::vector> _target_IDs, std::vector> _het_sites)
: pimpl(std::make_unique(std::move(_positions), std::move(_bp_starts),
- std::move(_target_IDs), std::move(_het_sites))) {}
+ std::move(_target_IDs), std::move(_het_sites))) {
+}
-TGEN::~TGEN() = default;
+TGEN::~TGEN() {
+ // Empty destructor is declared in source rather than header so pimpl destructor is accessible
+}
-std::vector>& TGEN::query(int start_pos, int end_pos, const std::vector& samples) {
+std::vector>& TGEN::query(int start_pos, int end_pos,
+ const std::vector& samples) {
// Forward the call to pimpl
return pimpl->query(start_pos, end_pos, samples);
}
diff --git a/src/TGEN.hpp b/src/TGEN.hpp
index 1a886d1..79b9c2a 100644
--- a/src/TGEN.hpp
+++ b/src/TGEN.hpp
@@ -1,32 +1,42 @@
+// This file is part of the Threads software suite.
+// Copyright (C) 2024 Threads Developers.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
#ifndef THREADS_ARG_TGEN_HPP
#define THREADS_ARG_TGEN_HPP
-#include
#include
#include
#include
-
-class TGENImpl; // Forward declaration of the implementation class
+// Forward declaration of the implementation class
// Using PIMPL idiom to prevent libraries that #include "TGEN.hpp" needing to see boost/icl headers
+class TGENImpl;
class TGEN {
-
-private:
- std::unique_ptr pimpl; // Pointer to implementation
-
public:
- // Constructors
TGEN(std::vector _positions, std::vector> _bp_starts,
std::vector> _target_IDs, std::vector> _het_sites);
+ ~TGEN();
- ~TGEN(); // Define the destructor for proper deletion of pimpl
-
- std::vector>& query(int start_pos, int end_pos, const std::vector& samples);
-
- // Deprecated Eigen version:
- // Eigen::MatrixXi& query(const int start_pos, const int end_pos, const std::vector& samples);
+ std::vector>& query(int start_pos, int end_pos,
+ const std::vector