prepare for 1.5.0

COMBINE-lab · May 12, 2021 · 8485624 · 8485624
1 parent 787fdb3
commit 8485624
Show file tree

Hide file tree

Showing 8 changed files with 59 additions and 13 deletions.
diff --git a/current_version.txt b/current_version.txt
@@ -1,3 +1,3 @@
 VERSION_MAJOR 1
-VERSION_MINOR 4
+VERSION_MINOR 5
 VERSION_PATCH 0
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -55,9 +55,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.4'
+version = '1.5'
 # The full version, including alpha/beta/rc tags.
-release = '1.4.0'
+release = '1.5.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/doc/source/file_formats.rst b/doc/source/file_formats.rst
@@ -176,7 +176,7 @@ learned (or expected) fragment-GC distribution.
 Equivalence class file
 """"""""""""""""""""""
 
-If Salmon was run with the ``--dumpEq`` option, then a file called ``eq_classes.txt``
+If salmon was run with the ``--dumpEq`` option, then a file called ``eq_classes.txt``
 will exist in the auxiliary directory.  The format of that file is as follows:
 
 
@@ -206,6 +206,49 @@ class (the number of different transcripts to which fragments in this
 class map --- call this k). The line then contains the k transcript
 IDs. Finally, the line contains the count of fragments in this
 equivalence class (how many fragments mapped to these
-transcripts). The values in each such line are tab separated.
+transcripts). The values in each such line are tab separated. 
+**Note**: The indices for transcripts referenced in this file start 
+at 0. 
 
+If salmon was run with the ``--dumpEqWeights`` or ``-d`` option, then the ``eq_classes.txt``
+file will include a textual representation of the `range-factorized equivalence classes <https://academic.oup.com/bioinformatics/article/33/14/i142/3953977>`_  will 
+exist in the auxiliary directory.  The format of that file is specified as follows:
 
+
+::
+   
+   N (num transcripts)
+   M (num equiv classes)
+   tn_1
+   tn_2
+   ...
+   tn_N
+   eq_1_size t_11 t_12 ... p_11 p_12 ... count
+   eq_2_size t_21 t_22 ... p_21 p_22 ... count
+
+   
+That is, the file begins with a line that contains the number of
+transcripts (say N) then a line that contains the number of
+equivalence classes (say M). It is then followed by N lines that list
+the transcript names --- the order here is important, because the
+labels of the equivalence classes are given in terms of the ID's of
+the transcripts. The rank of a transcript in this list is the ID with
+which it will be labeled when it appears in the label of an
+equivalence class. Finally, the file contains M lines, each of which
+describes a range-factorized equivalence class of fragments. The first entry in this
+line is the number of transcripts in the label of this equivalence
+class (the number of different transcripts to which fragments in this
+class map --- call this k). The line then contains the k transcript
+IDs that *partially* define the label of this range-factorized equivalence class
+followed by k floating point values which correspond to the conditional probabilities 
+of drawing a fragment from each of these k transcripts within this range-factorized 
+equivalence class. Finally, the line contains the count of fragments in this
+equivalence class (how many fragments mapped to these
+transcripts with approximately this conditional probability distribution). 
+The values in each such line are tab separated.  
+**Note**: The indices for transcripts referenced in this file start at 0.
+**Note**: Unlike the *simple* equivalence classes, the same transcript set can 
+appear more than once in the set of range-factorized equivalence classes.  This is 
+because different sets of fragments can induce quite different conditional probability 
+distributions among these transcripts.  For more details on this representation, please 
+check the `paper describing range-factorized equivalence classes <https://academic.oup.com/bioinformatics/article/33/14/i142/3953977>`_.
diff --git a/doc/source/salmon.rst b/doc/source/salmon.rst
@@ -704,7 +704,10 @@ map to the transcriptome.  When mapping paired-end reads, the entire
 fragment (both ends of the pair) are identified by the name of the first
 read (i.e. the read appearing in the ``_1`` file).  Each line of the unmapped
 reads file contains the name of the unmapped read followed by a simple flag
-that designates *how* the read failed to map completely.  For single-end
+that designates *how* the read failed to map completely.  If fragmetns are 
+aligned against a decoy-aware index, then fragments that are confidently 
+assigned as decoys are written in this file followed by the ``d`` (decoy)
+flag.  Apart from the decoy flag, for single-end
 reads, the only valid flag is ``u`` (unmapped).  However, for paired-end
 reads, there are a number of different possibilities, outlined below:
 

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -6,7 +6,7 @@ MAINTAINER salmon.maintainer@gmail.com
 
 ENV PACKAGES git gcc make g++ libboost-all-dev liblzma-dev libbz2-dev \
     ca-certificates zlib1g-dev libcurl4-openssl-dev curl unzip autoconf apt-transport-https ca-certificates gnupg software-properties-common wget
-ENV SALMON_VERSION 1.4.0
+ENV SALMON_VERSION 1.5.0
 
 # salmon binary will be installed in /home/salmon/bin/salmon
 

diff --git a/docker/build_test.sh b/docker/build_test.sh
@@ -1,3 +1,3 @@
 #! /bin/bash
-SALMON_VERSION=1.4.0
+SALMON_VERSION=1.5.0
 docker build --no-cache -t combinelab/salmon:${SALMON_VERSION} -t combinelab/salmon:latest .
diff --git a/include/SalmonConfig.hpp b/include/SalmonConfig.hpp
@@ -1,6 +1,6 @@
 /**
 >HEADER
-    Copyright (c) 2014-2019 Rob Patro rob@cs.umd.edu
+    Copyright (c) 2014-2021 Rob Patro rob@cs.umd.edu
 
     This file is part of Salmon.
 
@@ -26,9 +26,9 @@
 
 namespace salmon {
 constexpr char majorVersion[] = "1";
-constexpr char minorVersion[] = "4";
+constexpr char minorVersion[] = "5";
 constexpr char patchVersion[] = "0";
-constexpr char version[] = "1.4.0";
+constexpr char version[] = "1.5.0";
 constexpr uint32_t indexVersion = 5;
 constexpr char requiredQuasiIndexVersion[] = "p7";
 } // namespace salmon

diff --git a/scripts/fetchPufferfish.sh b/scripts/fetchPufferfish.sh
@@ -22,11 +22,11 @@ if [ -d ${INSTALL_DIR}/src/pufferfish ] ; then
     rm -fr ${INSTALL_DIR}/src/pufferfish
 fi
 
-SVER=salmon-v1.4.0
+SVER=salmon-v1.5.0
 #SVER=develop
 #SVER=sketch-mode
 
-EXPECTED_SHA256=059207e8d3134060ed70595e53f4189954c9e5edfaa6361b46304f55d1b71bc7
+EXPECTED_SHA256=e72470c58a7f9b1f66dece73ebf27df07b24b1585d4b155466f165dc9dfcf586
 
 mkdir -p ${EXTERNAL_DIR}
 curl -k -L https://github.com/COMBINE-lab/pufferfish/archive/${SVER}.zip -o ${EXTERNAL_DIR}/pufferfish.zip