diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 47fec2af7..ae06734e0 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -39,7 +39,7 @@ jobs:
       id: cache-miniconda
       uses: actions/cache@v2
       env:
-        CACHE_NUMBER: 0
+        CACHE_NUMBER: 1
       with:
         repo-token: ${{ secrets.GITHUB_TOKEN }}
         path: ~/conda_pkgs_dir
diff --git a/Mikado/_transcripts/scoring_configuration.py b/Mikado/_transcripts/scoring_configuration.py
index fc866babe..dc75d6f24 100644
--- a/Mikado/_transcripts/scoring_configuration.py
+++ b/Mikado/_transcripts/scoring_configuration.py
@@ -23,6 +23,24 @@ def compiler(expression):
         raise InvalidConfiguration("Invalid expression:\n{}".format(expression))
 
 
+class Unique(validate.Validator):
+    message = "{input} is Not unique"
+
+    def __init__(self, error=None):
+        self.error = error
+
+    def _repr_args(self):
+        return ""
+
+    def _format_error(self, value):
+        return self.message.format(input=value)
+
+    def __call__(self, value):
+        if len(value) != len(set(value)):
+            raise validate.ValidationError(self._format_error(value))
+        return value
+
+
 @dataclass
 class SizeFilter:
     operator: str = field(metadata={"required": True, "validate": validate.OneOf(["gt", "ge", "lt", "le"])})
@@ -30,6 +48,7 @@ class SizeFilter:
     metric: Optional[str] = field(metadata={"required": False}, default=None)
     name: Optional[str] = field(default=None)
     source: Optional[str] = field(default=None)
+    default: Optional[float] = field(default=0)
 
 
 @dataclass
@@ -39,42 +58,28 @@ class NumBoolEqualityFilter:
     metric: Optional[str] = field(metadata={"required": False}, default=None)
     name: Optional[str] = field(default=None)
     source: Optional[str] = field(default=None)
+    default: Optional[Union[float, bool]] = field(default=0)
 
 
 @dataclass
 class InclusionFilter:
-    value: list = field(metadata={"required": True})
+    value: list = field(metadata={"required": True, "validate": [Unique]})
     operator: str = field(metadata={"required": True, "validate": validate.OneOf(["in", "not in"])})
     metric: Optional[str] = field(metadata={"required": False}, default=None)
     name: Optional[str] = field(default=None)
     source: Optional[str] = field(default=None)
+    default: Optional[List[float]] = field(default=None)
 
 
 @dataclass
 class RangeFilter:
-    class Unique(validate.Validator):
-        message = "{input} is Not unique"
-
-        def __init__(self, error=None):
-            self.error = error
-
-        def _repr_args(self):
-            return ""
-
-        def _format_error(self, value):
-            return self.message.format(input=value)
-
-        def __call__(self, value):
-            if len(value) != len(set(value)):
-                raise validate.ValidationError(self._format_error(value))
-            return value
-
     value: List[float] = field(metadata={
         "required": True,
         "validate": [validate.Length(min=2, max=2), Unique]})
     operator: str = field(metadata={"required": True, "validate": validate.OneOf(["within", "not within"])})
     metric: Optional[str] = field(metadata={"required": False}, default=None)
     name: Optional[str] = field(default=None)
+    default: Optional[float] = field(default=0)
 
 
 @dataclass
diff --git a/Mikado/_transcripts/transcript_base.py b/Mikado/_transcripts/transcript_base.py
index bed1cfcfe..2a5616218 100644
--- a/Mikado/_transcripts/transcript_base.py
+++ b/Mikado/_transcripts/transcript_base.py
@@ -213,7 +213,6 @@ def __init__(self, *args,
         self.__derived_children = set()
         self.__original_source = None
         self.__external_scores = Namespace(default=(0, False))
-        self.__internal_orf_transcripts = []
         self.__cds_not_maximal = None
 
         # Starting settings for everything else
@@ -230,7 +229,6 @@ def __init__(self, *args,
         self.splices = set()
         self.selected_internal_orf_index = None
         self.__verified_introns = set()
-        self.segments = []
         self.intron_range = intron_range
         self.internal_orfs = []
         self.blast_hits = []
@@ -296,6 +294,30 @@ def __initialize_with_line(self, transcript_row):
         else:
             raise TypeError("Invalid data type: {0}".format(type(transcript_row)))
 
+    @staticmethod
+    def __parse_attributes(attributes):
+        new_attributes = dict()
+        booleans = {"True": True, "False": False, "None": None}
+        for key, val in attributes.items():
+            if not isinstance(val, Hashable):
+                pass
+            elif val in booleans:
+                val = booleans[val]
+            elif isinstance(val, bool):
+                pass
+            else:
+                try:
+                    val = int(val)
+                except (ValueError, OverflowError):
+                    try:
+                        val = float(val)
+                    except ValueError:
+                        pass
+                except TypeError:
+                    pass
+            new_attributes[intern(key)] = val
+        return new_attributes
+
     def __initialize_with_bed12(self, transcript_row: BED12):
 
         """
@@ -333,6 +355,8 @@ def __initialize_with_bed12(self, transcript_row: BED12):
                     cds.append((int(max(exon[0], transcript_row.thick_start)),
                                 int(min(exon[1], transcript_row.thick_end))))
             self.add_exons(cds, features="CDS")
+
+        self.attributes = self.__parse_attributes(transcript_row.attributes)
         self.finalize()
 
     def __initialize_with_bam(self, transcript_row: pysam.AlignedSegment):
@@ -423,26 +447,7 @@ def __initialize_with_gf(self, transcript_row: (GffLine, GtfLine)):
         self.score = transcript_row.score
         self.scores = dict()
 
-        booleans = {"True": True, "False": False, "None": None}
-
-        for key, val in transcript_row.attributes.items():
-            if not isinstance(val, Hashable):
-                pass
-            elif val in booleans:
-                val = booleans[val]
-            elif isinstance(val, bool):
-                pass
-            else:
-                try:
-                    val = int(val)
-                except (ValueError, OverflowError):
-                    try:
-                        val = float(val)
-                    except ValueError:
-                        pass
-                except TypeError:
-                    pass
-            self.attributes[intern(key)] = val
+        self.attributes = self.__parse_attributes(transcript_row.attributes)
 
         self.blast_hits = []
         if transcript_row.is_transcript is False:
@@ -794,8 +799,10 @@ def get_internal_orf_beds(self) -> List[BED12]:
             row.block_sizes = [0]
             row = BED12(row, seq,
                         coding=False, transcriptomic=True, max_regression=0, start_adjustment=False,
+                        lenient=True,
                         table=self.codon_table)
-            assert row.invalid is False, ("\n".join([str(row), row.invalid_reason]))
+            if row.invalid is True:
+                raise AssertionError("\n".join([str(row), row.invalid_reason]))
             yield row
         else:
             for index, iorf in enumerate(self.internal_orfs):
@@ -832,8 +839,10 @@ def get_internal_orf_beds(self) -> List[BED12]:
                 new_row.phase = phase
                 # self.logger.debug(new_row)
                 new_row = BED12(new_row,
+                                lenient=False,
                                 sequence=seq,
                                 phase=phase,
+                                logger=self.logger,
                                 coding=True, transcriptomic=True, max_regression=0, start_adjustment=False,
                                 table=self.codon_table)
                 if (cds_len - phase) % 3 != 0 and cds_end not in (self.start, self.end):
@@ -889,39 +898,51 @@ def configuration(self):
         return self.__configuration
 
     @property
-    def frames(self):
+    def frames(self) -> dict:
         """This property will return a dictionary with three keys - the three possible frames, 0, 1 and 2 - and within
         each, a set of the positions that are in that frame. If the transcript does not have """
         self.finalize()
-        frames = {0: set(), 1: set(), 2: set()}
-        for orf in self.internal_orfs:
-            if self.strand == "-":
-                exons = sorted([(_[1][0], _[1][1], _[2]) for _ in orf
-                                if _[0] == "CDS"], key=operator.itemgetter(0, 1), reverse=True)
-                for start, end, phase in exons:
-                    frame = ((3 - phase) % 3 - 1) % 3
-                    for pos in range(end, start - 1, -1):
-                        frame = abs((frame + 1) % 3)
-                        frames[frame].add(pos)
-            else:
-                exons = sorted([(_[1][0], _[1][1], _[2]) for _ in orf
-                                if _[0] == "CDS"], key=operator.itemgetter(0, 1), reverse=False)
-                for start, end, phase in exons:
-                    frame = ((3 - phase) % 3 - 1) % 3  # Previous frame before beginning of the feature
-                    for pos in range(start, end + 1):
-                        frame = abs((frame + 1) % 3)
-                        frames[frame].add(pos)
-        return frames
+
+        @functools.lru_cache()
+        def calculate_frames(internal_orfs, strand) -> dict:
+            frames = {0: set(), 1: set(), 2: set()}
+            for orf in internal_orfs:
+                if strand == "-":
+                    exons = sorted([(_[1][0], _[1][1], _[2]) for _ in orf
+                                    if _[0] == "CDS"], key=operator.itemgetter(0, 1), reverse=True)
+                    for start, end, phase in exons:
+                        frame = ((3 - phase) % 3 - 1) % 3
+                        for pos in range(end, start - 1, -1):
+                            frame = abs((frame + 1) % 3)
+                            frames[frame].add(pos)
+                else:
+                    exons = sorted([(_[1][0], _[1][1], _[2]) for _ in orf
+                                    if _[0] == "CDS"], key=operator.itemgetter(0, 1), reverse=False)
+                    for start, end, phase in exons:
+                        frame = ((3 - phase) % 3 - 1) % 3  # Previous frame before beginning of the feature
+                        for pos in range(start, end + 1):
+                            frame = abs((frame + 1) % 3)
+                            frames[frame].add(pos)
+            for frame in frames:
+                frames[frame] = frozenset(frames[frame])
+            return frames
+
+        return calculate_frames(tuple(tuple(_) for _ in self.internal_orfs), self.strand)
 
     @property
-    def framed_codons(self):
+    def framed_codons(self) -> List:
         """Return the list of codons as calculated by self.frames."""
 
-        codons = list(zip(*[sorted(self.frames[0]), sorted(self.frames[1]), sorted(self.frames[2])]))
-        if self.strand == "-":
-            codons = list(reversed(codons))
+        @functools.lru_cache()
+        def calculate_codons(frames, strand) -> list:
+            # Reconvert to dictionary. We had to turn into a tuple of items for hashing.
+            frames = dict(frames)
+            codons = list(zip(*[sorted(frames[0]), sorted(frames[1]), sorted(frames[2])]))
+            if strand == "-":
+                codons = list(reversed(codons))
+            return codons
 
-        return codons
+        return calculate_codons(tuple(self.frames.items()), self.strand)
 
     @property
     def _selected_orf_transcript(self):
@@ -939,16 +960,14 @@ def _internal_orfs_transcripts(self):
         Note: this will exclude the UTR part, even when the transcript only has one ORF."""
 
         self.finalize()
-        if not self.is_coding:
-            return []
-        elif len(self.__internal_orf_transcripts) == len(self.internal_orfs):
-            return self.__internal_orf_transcripts
-        else:
-            for num, orf in enumerate(self.internal_orfs, start=1):
+        @functools.lru_cache()
+        def calculate_orf_transcripts(internal_orfs, chrom, strand, tid):
+            orf_transcripts = []
+            for num, orf in enumerate(internal_orfs, start=1):
                 torf = TranscriptBase()
-                torf.chrom, torf.strand = self.chrom, self.strand
-                torf.derives_from = self.id
-                torf.id = "{}.orf{}".format(self.id, num)
+                torf.chrom, torf.strand = chrom, strand
+                torf.derives_from = tid
+                torf.id = "{}.orf{}".format(tid, num)
                 __exons, __phases = [], []
                 for segment in [_ for _ in orf if _[0] == "CDS"]:
                     __exons.append(segment[1])
@@ -956,9 +975,13 @@ def _internal_orfs_transcripts(self):
                 torf.add_exons(__exons, features="exon", phases=None)
                 torf.add_exons(__exons, features="CDS", phases=__phases)
                 torf.finalize()
-                self.__internal_orf_transcripts.append(torf)
+                orf_transcripts.append(torf)
+            return orf_transcripts
 
-        return self.__internal_orf_transcripts
+        orf_transcripts = calculate_orf_transcripts(tuple(tuple(_) for _ in self.internal_orfs),
+                                                    self.chrom, self.strand, self.id)
+
+        return orf_transcripts
 
     def as_bed12(self) -> BED12:
 
@@ -992,14 +1015,6 @@ def remove_exon(self, exon):
 
         if exon in self.exons:
             self.exons.remove(exon)
-            if ("exon", exon) in self.segments:
-                self.segments.remove(("exon", exon))
-            tr = set()
-            for segment in self.segments:
-                if self.overlap(segment[1], exon) >= 0:
-                    tr.add(segment)
-            for _ in tr:
-                self.segments.remove(_)
             tr = set()
             for segment in self.combined_cds:
                 if self.overlap(segment, exon) >= 0:
@@ -1056,7 +1071,6 @@ def remove_utrs(self):
         self.start = cds_start
         self.end = cds_end
         self.internal_orfs, self.combined_utr = [], []
-        self.segments = []
         # Need to recalculate it
         self._cdna_length = None
         self.finalize()
@@ -1089,7 +1103,6 @@ def strip_cds(self, strand_specific=True):
         self._selected_cds_introns = set()
         self.selected_internal_orf_index = None
         self.combined_utr = []
-        self.segments = []
         self.internal_orfs = []
         self.finalize()
 
@@ -1131,7 +1144,6 @@ def unfinalize(self):
             return
 
         self.internal_orfs = []
-        self.__internal_orf_transcripts = []
         self.combined_utr = []
         self._cdna_length = None
         self.finalized = False
@@ -1207,10 +1219,6 @@ def as_dict(self, remove_attributes=False):
         # Now let'add the things that we calculate with the basic lengths
         state["calculated"] = self.__get_calculated_stats()
         state["feature"] = self.feature
-        state["segments"] = []
-        for segment in self.segments:
-            segment = [segment[0], [segment[1][0], segment[1][1]]]
-            state["segments"].append(segment)
 
         for metric in mmetrics:
             state[metric] = getattr(self, metric)
@@ -1245,7 +1253,6 @@ def load_dict(self, state, trust_orf=False, accept_undefined_multi=False):
         self.combined_cds = []
         self._selected_cds_introns = set()
         self._combined_cds_introns = set()
-        self.segments = []
         self._cdna_length = None
 
         for dump, store in zip(
@@ -1260,14 +1267,6 @@ def load_dict(self, state, trust_orf=False, accept_undefined_multi=False):
                 else:
                     store.append(tuple(iv))
 
-        for segment in state["segments"]:
-            if len(segment) != 2:
-                raise CorruptIndex("Invalid segment values for {}: {}".format(self.id, segment))
-            feature, (start, end) = segment
-            if not all([isinstance(feature, str), isinstance(start, int), isinstance(end, int)]):
-                raise CorruptIndex("Invalid segment values for {}: {}".format(self.id, segment))
-            self.segments.append((feature, (start, end)))
-
         self.splices = set(state["splices"])
         self._trust_orf = trust_orf
         self._accept_undefined_multi = accept_undefined_multi
@@ -1741,10 +1740,7 @@ def original_source(self, value):
     @property
     def gene(self):
 
-        if "gene_id" not in self.attributes:
-            self.attributes["gene_id"] = self.parent[0]
-
-        return self.attributes["gene_id"]
+        return self.attributes.get("gene_id", self.parent[0])
 
     @property
     def location(self):
@@ -1774,7 +1770,7 @@ def score(self, score):
             if not isinstance(score, (float, int)):
                 try:
                     score = float(score)
-                except:
+                except (ValueError, TypeError):
                     raise ValueError(
                         "Invalid value for score: {0}, type {1}".format(score, type(score)))
         self.__score = score
diff --git a/Mikado/_transcripts/transcript_methods/finalizing.py b/Mikado/_transcripts/transcript_methods/finalizing.py
index e11958bbd..a5558c647 100644
--- a/Mikado/_transcripts/transcript_methods/finalizing.py
+++ b/Mikado/_transcripts/transcript_methods/finalizing.py
@@ -502,35 +502,31 @@ def _check_phase_correctness(transcript, strip_faulty_cds=True):
     :return: Mikado.loci.transcript.Transcript
     """
 
-    segments, internal_orfs = transcript.segments, transcript.internal_orfs
-
-    if min(len(segments), len(internal_orfs)) == 0:
-        # transcript.logger.debug("Redefining segments for %s", transcript.id)
-        segments = [("exon", tuple([e[0], e[1]])) for e in transcript.exons]
-        # Define CDS
-        if len(internal_orfs) > 0:
-            for segment in itertools.chain(internal_orfs):
-                if segment[0] == "exon":
-                    continue
-                elif segment[0] == "UTR":
-                    segments.append(("UTR", (segment[1][0], segment[1][1])))
-                elif segment[0] == "CDS":
-                    segments.append(("CDS", (segment[1][0], segment[1][1])))
-        else:
-            segments.extend([("CDS", tuple([c[0], c[1]])) for c in transcript.combined_cds])
-        # Define UTR segments
-        segments.extend([("UTR", tuple([u[0], u[1]])) for u in transcript.combined_utr])
-        # Mix and sort
-        segments = sorted(segments, key=operator.itemgetter(1, 0))
-        # Add to the store as a single entity
-        if not internal_orfs and any(_[0] == "CDS" for _ in segments):
-            internal_orfs = [segments]
-        else:
-            transcript.selected_internal_orf_index = None
+    internal_orfs = transcript.internal_orfs
+
+    segments = [("exon", tuple([e[0], e[1]])) for e in transcript.exons]
+    # Define CDS
+    if len(internal_orfs) > 0:
+        for segment in itertools.chain(internal_orfs):
+            if segment[0] == "exon":
+                continue
+            elif segment[0] == "UTR":
+                segments.append(("UTR", (segment[1][0], segment[1][1])))
+            elif segment[0] == "CDS":
+                segments.append(("CDS", (segment[1][0], segment[1][1])))
     else:
-        pass
+        segments.extend([("CDS", tuple([c[0], c[1]])) for c in transcript.combined_cds])
+    # Define UTR segments
+    segments.extend([("UTR", tuple([u[0], u[1]])) for u in transcript.combined_utr])
+    # Mix and sort
+    segments = sorted(segments, key=operator.itemgetter(1, 0))
+    # Add to the store as a single entity
+    if not internal_orfs and any(_[0] == "CDS" for _ in segments):
+        internal_orfs = [segments]
+    else:
+        transcript.selected_internal_orf_index = None
 
-    transcript.segments, transcript.internal_orfs = segments, internal_orfs
+    transcript.internal_orfs = internal_orfs
 
     __orfs_to_remove = []
     orf_excs = []
@@ -669,7 +665,6 @@ def finalize(transcript):
                     transcript.logger.exception(exc)
                     transcript.combined_cds = []
                     transcript.combined_utr = []
-                    transcript.segments = []
                     transcript.internal_orfs = []
                     _basic_final_checks(transcript)
                     _check_cdna_vs_utr(transcript)
@@ -704,8 +699,6 @@ def finalize(transcript):
 
     try:
         _check_completeness(transcript)
-        assert all([segment[1] in transcript.exons for segment in transcript.segments if
-                    segment[0] == "exon"]), (transcript.exons, transcript.segments)
         transcript.logger.debug("Verifying phase correctness for %s", transcript.id)
         _check_phase_correctness(transcript, strip_faulty_cds=strip_faulty_cds)
         transcript.logger.debug("Calculating intron correctness for %s", transcript.id)
diff --git a/Mikado/_transcripts/transcript_methods/printing.py b/Mikado/_transcripts/transcript_methods/printing.py
index c6fb2d23d..4ee688828 100644
--- a/Mikado/_transcripts/transcript_methods/printing.py
+++ b/Mikado/_transcripts/transcript_methods/printing.py
@@ -377,22 +377,8 @@ def as_bed12(transcript, transcriptomic=False, with_cds=True):
             except KeyError:
                 raise KeyError((transcript.selected_cds[-1], transcript.phases))
 
-        name = "ID={ID};coding={coding};phase={phase}".format(
-            ID=transcript.id,
-            coding=transcript.is_coding,
-            # Now we have to get the phase of the first CDS exon ..
-            phase=phase)
-    else:
-        name = "ID={ID};coding={coding}".format(
-            ID=transcript.id,
-            coding=transcript.is_coding,
-            # Now we have to get the phase of the first CDS exon ..
-            )
-
-    if transcript.alias is not None and transcript.alias != transcript.id:
-        name += ";alias={}".format(transcript.alias)
-
-    bed12.name = name
+    bed12.coding = transcript.is_coding
+    bed12.name = transcript.id
     bed12.score = transcript.score if transcript.score else 0
     bed12.strand = transcript.strand
     if transcript.is_coding and with_cds is True:
@@ -407,6 +393,9 @@ def as_bed12(transcript, transcriptomic=False, with_cds=True):
         bed12.thick_end = bed12.end
         bed12.coding = False
 
+    for key, val in transcript.attributes.items():
+        bed12.attributes[key] = val
+
     bed12.block_count = transcript.exon_num
     bed12.block_sizes = [exon[1] - exon[0] + 1 for exon in transcript.exons]
     _introns = np.concatenate([np.array([intron[1] - intron[0] + 1 for intron in sorted(transcript.introns)],
@@ -419,6 +408,7 @@ def as_bed12(transcript, transcriptomic=False, with_cds=True):
         bed12 = bed12.to_transcriptomic(alias=transcript.alias, start_adjustment=False,
                                         coding=(transcript.is_coding and with_cds))
         bed12.chrom = transcript.id
+
     return bed12
 
 
diff --git a/Mikado/configuration/configurator.py b/Mikado/configuration/configurator.py
index e41114a2a..c8d01320c 100644
--- a/Mikado/configuration/configurator.py
+++ b/Mikado/configuration/configurator.py
@@ -185,11 +185,14 @@ def load_and_validate_config(raw_configuration: Union[None, MikadoConfiguration,
                 try:
                     config = DaijinConfiguration.Schema().load(config, partial=external)
                 except marshmallow.exceptions.ValidationError as exc:
-                    logger.critical("The configuration file is invalid. Validation errors:\n%s\n%s\n\n",
-                                    pprint.pformat(mikado_exc.messages),
-                                    pprint.pformat(exc.messages))
+
+                    exc = marshmallow.exceptions.ValidationError(
+                        "The configuration file is invalid. \n"
+                        f"Validation errors if a Daijin configuration file was expected:\n{exc}\n"
+                        f"Validation errors if a Mikado configuration file was expected:\n{mikado_exc}"
+                    )
                     logger.critical(exc)
-                    raise
+                    raise exc
 
             config.filename = raw_configuration
             if config.scoring is None:
@@ -202,7 +205,7 @@ def load_and_validate_config(raw_configuration: Union[None, MikadoConfiguration,
         raise
     except Exception as exc:
         logger.exception("Loading the configuration file failed with error:\n%s\n\n\n", exc)
-        raise InvalidConfiguration("The configuration file passed is invalid. Please double check.")
+        raise InvalidConfiguration(f"The configuration file passed is invalid. Please double check. Exception: {exc}")
 
     random.seed(config.seed % (2 ** 32 - 1))
 
diff --git a/Mikado/loci/abstractlocus.py b/Mikado/loci/abstractlocus.py
index cdd5c6362..e9a3a9290 100644
--- a/Mikado/loci/abstractlocus.py
+++ b/Mikado/loci/abstractlocus.py
@@ -375,6 +375,10 @@ def evaluate(param: Union[str, int, bool, float],
             comparison = (param not in conf.value)
         elif comparison_operator == "within":
             start, end = sorted([conf.value[0], conf.value[1]])
+            try:
+                float(param)
+            except (TypeError, ValueError):
+                raise ValueError(param)
             comparison = (start <= float(param) <= end)
         elif comparison_operator == "not within":
             start, end = sorted([conf.value[0], conf.value[1]])
@@ -1079,6 +1083,20 @@ def _create_metrics_row(self, tid: str, metrics: dict, transcript: Transcript) -
                 value = "NA"
             row[key] = value
 
+        # Add the attributes
+        for param_dict in [self.configuration.scoring.scoring, self.configuration.scoring.cds_requirements.parameters,
+                           self.configuration.scoring.as_requirements.parameters,
+                           self.configuration.scoring.not_fragmentary.parameters,
+                           self.configuration.scoring.requirements.parameters]:
+            for attr_name, attr_metric in [(key, metric) for key, metric in param_dict.items()
+                                           if key.startswith("attributes")]:
+                value = transcript.attributes.get(attr_name.replace("attributes.", ""), attr_metric.default)
+                if isinstance(value, float):
+                    value = round(value, 2)
+                elif value is None or value == "":
+                    value = "NA"
+                row[attr_name] = value
+
         return row
 
     def print_metrics(self):
@@ -1281,16 +1299,21 @@ def _check_not_passing(self, previous_not_passing=(), section_name="requirements
 
             evaluated = dict()
             for key in section.parameters:
-                if section.parameters[key].name is not None:
-                    name = section.parameters[key].name
+                if "attributes" in key:
+                    key_parts = key.split('.')
+                    default = section.parameters[key].default
+                    value = self.transcripts[tid].attributes.get(key_parts[1], default)
                 else:
-                    name = key
-                try:
-                    value = operator.attrgetter(name)(self.transcripts[tid])
-                except AttributeError:
-                    raise AttributeError((section_name, key, section.parameters[key]))
-                if "external" in key:
-                    value = value[0]
+                    if section.parameters[key].name is not None:
+                        name = section.parameters[key].name
+                    else:
+                        name = key
+                    try:
+                        value = operator.attrgetter(name)(self.transcripts[tid])
+                    except AttributeError:
+                        raise AttributeError((section_name, key, section.parameters[key]))
+                    if "external" in key:
+                        value = value[0]
 
                 evaluated[key] = self.evaluate(value, section.parameters[key])
             # pylint: disable=eval-used
diff --git a/Mikado/loci/locus.py b/Mikado/loci/locus.py
index a97c78529..675ac26aa 100644
--- a/Mikado/loci/locus.py
+++ b/Mikado/loci/locus.py
@@ -10,6 +10,8 @@
 import operator
 from collections import defaultdict
 import pysam
+
+from ..scales.resultstorer import ResultStorer
 from ..transcripts.pad import pad_transcript
 from ..transcripts.transcript import Transcript
 from .abstractlocus import Abstractlocus
@@ -580,9 +582,14 @@ def _check_as_requirements(self, transcript: Transcript, is_reference=False) ->
         evaluated = dict()
         for key in section.parameters:
             name = section.parameters[key].name
-            value = operator.attrgetter(name)(transcript)
-            if "external" in key:
-                value = value[0]
+            if 'attributes' in name:
+                name_parts = name.split('.')
+                default = section.parameters[key].default
+                value = transcript.attributes.get(name_parts[1], default)
+            else:
+                value = operator.attrgetter(name)(transcript)
+                if "external" in key:
+                    value = value[0]
 
             evaluated[key] = self.evaluate(value, section.parameters[key])
             # pylint: disable=eval-used
@@ -611,7 +618,11 @@ def is_putative_fragment(self):
         evaluated = dict()
         for key, params in self.configuration.scoring.not_fragmentary.parameters.items():
             name = params.name
-            value = operator.attrgetter(name)(self.primary_transcript)
+            if name.startswith("attributes"):
+                default = params.default
+                value = self.primary_transcript.attributes.get(name.replace("attributes.", ""), default)
+            else:
+                value = operator.attrgetter(name)(self.primary_transcript)
             if "external" in key:
                 value = value[0]
             try:
@@ -640,7 +651,7 @@ def is_putative_fragment(self):
         assert self.id == current_id
         return fragment
 
-    def other_is_fragment(self, other):
+    def other_is_fragment(self, other) -> (bool, Union[None, ResultStorer]):
         """
         If the 'other' locus is marked as a potential fragment (see 'is_putative_fragment'), then this function
         will check whether the other locus is within the distance and with the correct comparison class code to be
@@ -657,6 +668,10 @@ def other_is_fragment(self, other):
             self.logger.debug("Self-comparisons are not allowed!")
             return False, None
 
+        if other.is_putative_fragment() is False:
+            self.logger.debug(f"{other.id} cannot be a fragment according to the scoring file definition.")
+            return False, None
+
         if any(other.transcripts[tid].is_reference is True for tid in other.transcripts.keys()):
             self.logger.debug("Locus %s has a reference transcript, hence it will not be discarded", other.id)
             return False, None
diff --git a/Mikado/parsers/__init__.py b/Mikado/parsers/__init__.py
index 6c8f17847..73cc6aa19 100644
--- a/Mikado/parsers/__init__.py
+++ b/Mikado/parsers/__init__.py
@@ -15,6 +15,7 @@
 from . import bed12
 from . import blast_utils
 from . import bam_parser
+from .bed12 import BED12
 
 
 def parser_factory(string, input_format=None):
diff --git a/Mikado/parsers/bed12.py b/Mikado/parsers/bed12.py
index a7312dbd0..92eeca313 100644
--- a/Mikado/parsers/bed12.py
+++ b/Mikado/parsers/bed12.py
@@ -9,7 +9,7 @@
 import os
 from Bio import Seq
 import Bio.SeqRecord
-from .parser import Parser
+from .parser import Parser, _attribute_definition
 from sys import intern
 import copy
 from ..exceptions import InvalidParsingFormat
@@ -339,6 +339,7 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
         self.alias = None
         self.__logger = create_null_logger()
         self.logger = logger
+        self.attributes = dict()
         self.logger.debug("Set the basic properties for %s", self.chrom)
 
         if len(args) == 0:
@@ -390,6 +391,7 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
             raise InvalidParsingFormat("I need an ordered array, not {0}".format(type(self._line)))
         else:
             self._fields = self._line
+            print("Line", self._fields)
             self.__set_values_from_fields()
 
         self.__check_validity(transcriptomic, fasta_index, sequence)
@@ -404,7 +406,6 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
     @property
     def is_transcript(self):
         """BED12 files are always transcripts for Mikado."""
-
         return True
 
     @property
@@ -495,6 +496,7 @@ def _parse_attributes(self, attributes):
         """
 
         self.attribute_order = []
+        print("Parsing", attributes)
 
         infolist = self._attribute_pattern.findall(attributes.rstrip().rstrip(";"))
 
@@ -514,6 +516,8 @@ def _parse_attributes(self, attributes):
             elif key.lower() == "id":
                 self.name = val
             else:
+                print(key.capitalize(), val)
+                self.attributes[key.capitalize()] = _attribute_definition(val)
                 continue
 
     def __set_values_from_fields(self):
@@ -546,9 +550,10 @@ def __set_values_from_fields(self):
             self.block_starts = [int(x) for x in block_starts.split(",") if x]
         else:
             self.block_starts = [int(x) for x in block_starts]
-        self._parse_attributes(self.name)
+        to_parse = self.name
         if len(self._fields) == 13:
-            self._parse_attributes(self._fields[-1])
+            to_parse = ";".join([to_parse, self._fields[-1]])
+        self._parse_attributes(to_parse)
         self.has_start_codon = False
         self.has_stop_codon = False
         self.start_codon = None
@@ -668,7 +673,7 @@ def __check_validity(self, transcriptomic, fasta_index, sequence):
                               self.chrom, self.has_start_codon, self.has_stop_codon, not self.invalid)
 
             # Get only a proper multiple of three
-            if self.__lenient is False:
+            if self.lenient is False and self.coding is True:
                 if self.strand != "-":
                     orf_sequence = sequence[
                                    (self.thick_start - 1 if not self.phase
@@ -685,6 +690,19 @@ def __check_validity(self, transcriptomic, fasta_index, sequence):
                                                 gap='N')
 
                 self._internal_stop_codons = str(translated_seq).count("*")
+                if self._internal_stop_codons == 0 and len(orf_sequence[last_pos:]) > 3:
+                    if orf_sequence[last_pos:][:3] in self.table.stop_codons:
+                        # We need to shift either the thick start or the thick end. Note that last_pos is negative
+                        # so we need to minus it.
+                        if self.strand == "-":
+                            self.thick_start += -last_pos % 3
+                            self.logger.warning(
+                                f"Shifting the position of the thick start of {self.name} by {-last_pos % 3}")
+                        else:
+                            self.thick_end -= -last_pos % 3
+                            self.logger.warning(
+                                f"Shifting the position of the thick end of {self.name} by {-last_pos % 3}")
+
             del self.invalid
             if self.__is_invalid() is True:
                 return
@@ -743,7 +761,7 @@ def _adjust_start(self, sequence, orf_sequence):
                         self.chrom, self.invalid, self.invalid_reason)
                     break
         else:
-            self.__regression(orf_sequence)
+            self._regression(orf_sequence)
 
         if self.has_start_codon is False:
             # The validity will be automatically checked
@@ -772,7 +790,7 @@ def _adjust_start(self, sequence, orf_sequence):
             self.logger.debug("%s is not coding after checking. Reason: %s", self.chrom, self.invalid_reason)
             self.coding = False
 
-    def __regression(self, orf_sequence):
+    def _regression(self, orf_sequence):
         self.logger.debug(
             "Starting the regression algorithm to find an internal start for %s (end: %s; thick start/end: %s, %s; phase %s)",
             self.chrom, self.end, self.thick_start, self.thick_end, self.phase)
@@ -834,18 +852,12 @@ def __str__(self):
                 return "#"
 
         line = [self.chrom, self.start - 1, self.end]
-
-        if self.transcriptomic is True:
-            name = "ID={};coding={}".format(self.id, self.coding)
-            if self.coding:
-                name += ";phase={}".format(self.phase)
-            if self.alias is not None and self.alias != self.id:
-                name += ";alias={}".format(self.alias)
-
-            line.append(name)
-        else:
-            line.append(self.name)
-
+        name = "ID={};coding={}".format(self.id, self.coding)
+        if self.coding:
+            name += ";phase={}".format(self.phase)
+        if self.alias is not None and self.alias != self.id:
+            name += ";alias={}".format(self.alias)
+        line.append(name)
         if not self.score:
             line.append(0)
         else:
@@ -862,6 +874,20 @@ def __str__(self):
         line.append(self.block_count)
         line.append(",".join([str(x) for x in self.block_sizes]))
         line.append(",".join([str(x) for x in self.block_starts]))
+        attributes = dict((key.lower(), val) for key, val in self.attributes.items() if key.lower() not in
+                          ("geneid", "gene_id", "name", "phase", "coding", "alias", "id"))
+        if self.parent is not None:
+            attributes["Parent"] = self.parent[0]
+            assert "Parent" in attributes
+        elif "parent" in attributes:
+            parent = attributes.pop("parent")
+            if isinstance(parent, (tuple, list)) and len(parent) > 0:
+                parent = ",".join(parent)
+            elif isinstance(parent, (tuple, list)) and len(parent) == 0:
+                parent = None
+            attributes["Parent"] = parent
+        if attributes:
+            line.append(";".join(f"{key}={val}" for key, val in attributes.items() if val is not None))
         return "\t".join([str(x) for x in line])
 
     def __eq__(self, other):
@@ -1011,6 +1037,10 @@ def invalid(self):
     def invalid(self):
         self.__invalid = None
 
+    @property
+    def lenient(self):
+        return self.__lenient
+
     def __is_invalid(self):
 
         if self._internal_stop_codons >= 1:
@@ -1045,7 +1075,7 @@ def __is_invalid(self):
                 )
                 return True
 
-            if self.__lenient is True:
+            if self.lenient is True:
                 pass
             else:
                 if (self.cds_len - self.phase) % 3 != 0:
diff --git a/Mikado/parsers/gfannotation.py b/Mikado/parsers/gfannotation.py
index 189811924..5578f31a9 100644
--- a/Mikado/parsers/gfannotation.py
+++ b/Mikado/parsers/gfannotation.py
@@ -9,6 +9,7 @@
 import copy
 from sys import intern
 import re
+from .parser import _attribute_definition
 
 
 __author__ = 'Luca Venturini'
@@ -18,22 +19,6 @@
 [intern(_) for _ in ["+", "-", "?", "true", "True", "false", "False"]]
 
 
-def _attribute_definition(val):
-    try:
-        val = float(val)
-        if val.is_integer():
-            return int(val)
-        return val
-    except (ValueError, TypeError):
-        if val.lower() in ("true", "false"):
-            val = val.capitalize()
-            if val == "True":
-                return True
-            else:
-                return False
-        return val
-
-
 # This class has exactly how many attributes I need it to have
 # pylint: disable=too-many-instance-attributes
 class GFAnnotation(metaclass=abc.ABCMeta):
diff --git a/Mikado/parsers/parser.py b/Mikado/parsers/parser.py
index 4222af9a4..d729abe6e 100644
--- a/Mikado/parsers/parser.py
+++ b/Mikado/parsers/parser.py
@@ -117,3 +117,19 @@ def __setstate__(self, state):
         self.__dict__.update(state)
         self._handle = self.__get_handle(state["_handle"], position=position)
 
+
+def _attribute_definition(val):
+    try:
+        val = float(val)
+        if val.is_integer():
+            return int(val)
+        return val
+    except (ValueError, TypeError):
+        if val.lower() in ("true", "false"):
+            val = val.capitalize()
+            if val == "True":
+                return True
+            else:
+                return False
+        return val
+
diff --git a/Mikado/picking/_loci_serialiser.py b/Mikado/picking/_loci_serialiser.py
index 3adf64b3e..3df09c9db 100644
--- a/Mikado/picking/_loci_serialiser.py
+++ b/Mikado/picking/_loci_serialiser.py
@@ -23,8 +23,8 @@ def serialise_locus(stranded_loci: [Superlocus],
         if print_subloci is True:
             batch = []
             batch.append(stranded_locus.__str__(level="subloci", print_cds=print_cds))
-            for sublocus in stranded_locus.subloci:
-                assert sublocus.scores_calculated is True, (sublocus.id, sublocus.scores_calculated)
+            if not all(sublocus.scores_calculated is True for sublocus in stranded_locus.subloci):
+                raise RuntimeError("The locus we are trying to serialise has not been processed correctly.")
 
             sub_metrics_rows = [_ for _ in stranded_locus.print_subloci_metrics() if _ != {} and "tid" in _]
             sub_scores_rows = [_ for _ in stranded_locus.print_subloci_scores() if _ != {} and "tid" in _]
diff --git a/Mikado/picking/loci_processer.py b/Mikado/picking/loci_processer.py
index 94b1f110b..dc1b737bc 100644
--- a/Mikado/picking/loci_processer.py
+++ b/Mikado/picking/loci_processer.py
@@ -349,6 +349,10 @@ def terminate(self):
         self.__close_handles()
         super().terminate()
 
+    def kill(self) -> None:
+        self.__close_handles()
+        super(LociProcesser, self).kill()
+
     def __close_handles(self):
         """Private method to flush and close all handles."""
         if self.engine is not None:
@@ -437,10 +441,12 @@ def run(self):
                 self.status_queue.put(accumulator)
                 self.logger.debug("Put all the remaining results into the queue for %s (size %s)", self.name,
                                   sys.getsizeof(accumulator))
-                self.locus_queue.task_done()
+                # self.locus_queue.task_done()
                 self.locus_queue.put((counter, None))
+                accumulator = []
                 break
-            else:
+
+            try:
                 try:
                     transcripts = msgpack.loads(transcripts, raw=False)
                 except TypeError as err:
@@ -487,6 +493,8 @@ def run(self):
 
                 if len(stranded_loci) == 0:
                     self.logger.warning("No loci left for index %d", counter)
+                    accumulator = serialise_locus([], accumulator, counter, print_cds=print_cds,
+                                                  print_monosubloci=print_monoloci, print_subloci=print_subloci)
                 else:
                     if len(accumulator) >= 100:
                         accumulator = zlib.compress(msgpack.dumps(accumulator))
@@ -496,6 +504,19 @@ def run(self):
                     accumulator = serialise_locus(
                         stranded_loci, accumulator, counter, print_cds=print_cds,
                         print_monosubloci=print_monoloci, print_subloci=print_subloci)
+            except (InvalidTranscript, InvalidCDS, AssertionError, ValueError, RuntimeError, TypeError) as exc:
+                self.logger.error(f"{self.name} has encountered an error, details:\n{exc}")
+                current = len(accumulator)
+                accumulator = serialise_locus([], accumulator, counter, print_cds=print_cds,
+                                              print_monosubloci=print_monoloci, print_subloci=print_subloci)
+                assert len(accumulator) > current
+            finally:
                 self.locus_queue.task_done()
 
+        if accumulator:
+            assert isinstance(accumulator, list)
+            self.logger.error(accumulator)
+            accumulator = zlib.compress(msgpack.dumps(accumulator))
+            self.status_queue.put(accumulator)
+        self.logger.info(f"{self.name} has finished")
         return
diff --git a/Mikado/picking/picker.py b/Mikado/picking/picker.py
index 16b8911b9..5fe448b47 100644
--- a/Mikado/picking/picker.py
+++ b/Mikado/picking/picker.py
@@ -36,7 +36,7 @@
 from ..loci.superlocus import Superlocus
 from ..configuration.configurator import load_and_validate_config
 from ..utilities import dbutils
-from ..exceptions import UnsortedInput, InvalidConfiguration, InvalidTranscript
+from ..exceptions import UnsortedInput, InvalidConfiguration, InvalidTranscript, InvalidCDS
 from .loci_processer import analyse_locus, LociProcesser, merge_loci
 from ._locus_single_printer import print_locus
 import multiprocessing.managers
@@ -460,6 +460,18 @@ def __get_output_files(self):
         requested_external.update({param for param in self.configuration.scoring.scoring.keys()
                                    if param.startswith("external")})
         
+        attribute_metrics = set()
+        attribute_metrics.update({param for param in self.configuration.scoring.requirements.parameters.keys()
+                                   if param.startswith("attributes")})
+        attribute_metrics.update({param for param in self.configuration.scoring.not_fragmentary.parameters.keys()
+                                   if param.startswith("attributes")})
+        attribute_metrics.update({param for param in self.configuration.scoring.cds_requirements.parameters.keys()
+                                   if param.startswith("attributes")})
+        attribute_metrics.update({param for param in self.configuration.scoring.cds_requirements.parameters.keys()
+                                   if param.startswith("attributes")})
+        attribute_metrics.update({param for param in self.configuration.scoring.scoring.keys()
+                                   if param.startswith("attributes")})
+
         # Check that the external scores are all present. If they are not, raise a warning.
         if requested_external - set(available_external_metrics):
             self.logger.error(
@@ -486,6 +498,7 @@ def __get_output_files(self):
             metrics.extend(available_external_metrics)
         else:
             metrics.extend(requested_external)
+        metrics.extend(attribute_metrics)
         metrics = Superlocus.available_metrics[:5] + sorted(metrics)
         session.close()
         engine.dispose()
@@ -689,8 +702,18 @@ def __submit_multi_threading(self):
                 self.logger.error("We have not retrieved all loci!")
                 raise ValueError
             results = msgpack.loads(zlib.decompress(results))
+            self.logger.info(f"Length of results to analyse: {len(results)}")
             for result in results:
-                counter, chrom, num_genes, loci, subloci, monoloci = result
+                try:
+                    counter, chrom, num_genes, loci, subloci, monoloci = result
+                except (InvalidTranscript, InvalidCDS, TypeError, RuntimeError, ValueError, AssertionError) as exc:
+                    self.logger.critical(
+                        f"Mikado has encountered a fatal crash. Invalid result:\n{result}\n"
+                        "Please inspect the logs and report the bug at "
+                        "https://github.com/EI-CoreBioinformatics/mikado/issues. Exception:\n"
+                        f"{exc}")
+                    [_.kill() for _ in working_processes]
+                    sys.exit(1)
                 mapper["done"].add(counter)
                 if counter in mapper["results"]:
                     self.logger.fatal("%d double index found!", counter)
@@ -708,7 +731,9 @@ def __submit_multi_threading(self):
                 mapper[chrom]["done"].add(counter)
                 if mapper[chrom]["done"] == mapper[chrom]["submit"]:
                     self.logger.info("Finished with chromosome %s", chrom)
+            status_queue.task_done()
 
+        self.logger.info("Joining children processes")
         [_.join() for _ in working_processes]
         self.logger.info("Joined children processes; starting to merge partial files")
 
diff --git a/Mikado/subprograms/prepare.py b/Mikado/subprograms/prepare.py
index 156243e3a..c582d545a 100644
--- a/Mikado/subprograms/prepare.py
+++ b/Mikado/subprograms/prepare.py
@@ -123,6 +123,9 @@ def parse_prepare_options(args, mikado_config) -> Union[DaijinConfiguration, Mik
     mikado_config.prepare.strip_faulty_cds = True if getattr(args, "strip_faulty_cds", None) else \
         mikado_config.prepare.strip_faulty_cds
     mikado_config.prepare.strip_cds = True if getattr(args, "strip_cds", None) else mikado_config.prepare.strip_cds
+    if len(mikado_config.prepare.files.strip_cds) == 0:
+        mikado_config.prepare.files.strip_cds = [mikado_config.prepare.strip_cds] * len(mikado_config.prepare.files.gff)
+
     mikado_config.serialise.codon_table = str(args.codon_table) if (
             getattr(args, "codon_table", None) not in (None, False, True)) else mikado_config.serialise.codon_table
 
@@ -228,7 +231,7 @@ def positive(string):
     1- add the "transcript" feature
     2- sort by coordinates
     3- check the strand""")
-    parser.add_argument("--fasta", "--reference", dest="reference",
+    parser.add_argument("--fasta", "--reference", "--genome", dest="reference",
                         type=argparse.FileType(), help="Genome FASTA file. Required.")
     verbosity = parser.add_mutually_exclusive_group()
     verbosity.add_argument("--verbose", default=None, dest="log_level", action="store_const", const="DEBUG")
diff --git a/Mikado/subprograms/util/trim.py b/Mikado/subprograms/util/trim.py
index d5c01e496..4733dad2a 100644
--- a/Mikado/subprograms/util/trim.py
+++ b/Mikado/subprograms/util/trim.py
@@ -134,7 +134,6 @@ def trim_end(transcript, cds_end, max_length=0):
 
     transcript.end = newlast[1]
     assert all([(exon[1] <= newlast[1] for exon in transcript.exons)])
-    assert all([(segment[1][1] <= newlast[1] for segment in transcript.segments)])
     return transcript
 
 
diff --git a/Mikado/tests/locus_test.py b/Mikado/tests/locus_test.py
index 052c6d620..ed9224f48 100644
--- a/Mikado/tests/locus_test.py
+++ b/Mikado/tests/locus_test.py
@@ -2271,7 +2271,7 @@ def test_only_CDS_overlap(self):
             with self.subTest(min_overlap=min_overlap):
                 cds_overlap = 0
                 for frame in range(3):
-                    cds_overlap += len(set.intersection(
+                    cds_overlap += len(frozenset.intersection(
                         self.t1.frames[frame], t2.frames[frame]
                     ))
 
diff --git a/Mikado/tests/test_bed12.py b/Mikado/tests/test_bed12.py
index e86160d51..f507f46a1 100644
--- a/Mikado/tests/test_bed12.py
+++ b/Mikado/tests/test_bed12.py
@@ -437,7 +437,7 @@ def test_tran_to_bed12_neg(self):
                 t.add_exons([(71, 100), (201, end)], features="CDS", phases=[0, phase])
                 t.finalize()
                 r = t.as_bed12()
-                self.assertEqual(r.name, "ID={};coding={};phase={}".format(t.id, True, phase))
+                self.assertEqual(r.name, t.id)
                 self.assertEqual(r.phase, phase, (end, phase))
                 self.assertEqual(r.thick_end, end)
                 self.assertFalse(r.invalid)
diff --git a/Mikado/tests/test_external_scores.py b/Mikado/tests/test_external_scores.py
index a9dadc79d..d1b30e6ae 100644
--- a/Mikado/tests/test_external_scores.py
+++ b/Mikado/tests/test_external_scores.py
@@ -2,7 +2,7 @@
 import unittest
 
 from .. import create_default_logger
-from .._transcripts.scoring_configuration import ScoringFile, MinMaxScore
+from .._transcripts.scoring_configuration import ScoringFile, MinMaxScore, SizeFilter, RangeFilter
 from ..loci import Transcript, Superlocus
 import pkg_resources
 import os
@@ -35,6 +35,18 @@ def setUp(self):
         self.transcript2 = self.transcript.copy()
         self.transcript2.id = "ENST00000560637"
         # self.assertIn("scoring", self.conf)
+        
+        # This is for the is_fragment test
+        self.other_transcript = Transcript()
+        self.other_transcript.start = 48053001
+        self.other_transcript.end = 48053301
+
+        exons = [(48053001, 48053301)]
+
+        self.other_transcript.strand = "+"
+        self.other_transcript.add_exons(exons)
+        self.other_transcript.id = "ENST00000560646"
+        self.other_transcript.parent = "ENSG00000137873"
 
     def test_copying(self):
         self.transcript.external_scores.update({"test": 0, "test1": 1})
@@ -185,3 +197,167 @@ def test_attribute_use_raw_percentage(self):
         sup.get_metrics()
         self.assertIn("attributes.tpm", sup._metrics[self.transcript.id])
         self.assertEqual(sup._metrics[self.transcript.id]["attributes.tpm"], 0.1)
+        sup.define_loci()
+
+    def test_attribute_use_as_alternative_splicing(self):
+        from Mikado.loci import Locus
+        checked_conf = self.conf.copy()
+        self.transcript.attributes['cov'] = 10
+        loci = Locus(self.transcript, configuration=checked_conf)
+        loci.configuration.scoring.as_requirements.parameters['attributes.cov'] = SizeFilter(operator='ge', value=3,
+                                                                                             metric=None,
+                                                                                             name='attributes.cov')
+        loci.configuration.scoring.as_requirements.expression = [
+            loci.configuration.scoring.as_requirements.expression[0] + ' and attributes.cov']
+        loci.configuration.scoring.as_requirements._check_my_requirements()
+        self.assertTrue(loci._check_as_requirements(self.transcript))
+
+    def test_attribute_use_as_alternative_splicing_fail_filter(self):
+        from Mikado.loci import Locus
+        checked_conf = self.conf.copy()
+        self.transcript.attributes['cov'] = 10
+        loci = Locus(self.transcript, configuration=checked_conf)
+        loci.configuration.scoring.as_requirements.parameters['attributes.cov'] = SizeFilter(operator='ge', value=15,
+                                                                                             metric=None,
+                                                                                             name='attributes.cov')
+        loci.configuration.scoring.as_requirements.expression = [
+            loci.configuration.scoring.as_requirements.expression[0] + ' and attributes.cov']
+        loci.configuration.scoring.as_requirements._check_my_requirements()
+        self.assertFalse(loci._check_as_requirements(self.transcript))
+
+    def test_attributes_use_as_cds_requirements(self):
+        from Mikado.loci import Locus
+        checked_conf = self.conf.copy()
+        checked_conf.scoring.cds_requirements.expression = ["attributes.something"]
+        checked_conf.scoring.cds_requirements.parameters = {"attributes.something": SizeFilter(value=1, operator="ge")}
+        checked_conf.scoring.cds_requirements._check_my_requirements()
+        self.transcript.attributes['something'] = 2
+        loci = Locus(self.transcript, configuration=checked_conf)
+        not_passing = loci._check_not_passing(section_name="cds_requirements")
+        self.assertSetEqual(not_passing, set())
+
+    def test_attributes_use_as_fragment_filter(self):
+        from Mikado.loci import Locus
+        checked_conf = self.conf.copy()
+        checked_conf.pick.fragments.max_distance = checked_conf.pick.clustering.flank = 5000
+        checked_conf.scoring.not_fragmentary.expression = ["attributes.something"]
+        checked_conf.scoring.not_fragmentary.parameters = {"attributes.something": SizeFilter(value=1, operator="ge")}
+        checked_conf.scoring.not_fragmentary._check_my_requirements()
+        self.transcript.attributes['something'] = 2
+        logger = create_default_logger("test_attributes_use_as_fragment_filter", level="DEBUG")
+        loci = Locus(self.transcript, configuration=checked_conf, logger=logger)
+        self.other_transcript.attributes["something"] = 0.5
+        fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger)
+        self.assertEqual(fragment.other_is_fragment(loci), (False, None))
+        self.assertTrue(loci.other_is_fragment(fragment)[0])
+        del self.other_transcript.attributes["something"]
+        fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger)
+        self.assertEqual(fragment.other_is_fragment(loci), (False, None))
+        self.assertTrue(loci.other_is_fragment(fragment)[0])
+
+    def test_attributes_range_use_as_fragment_filter(self):
+        from Mikado.loci import Locus
+        checked_conf = self.conf.copy()
+        checked_conf.pick.fragments.max_distance = checked_conf.pick.clustering.flank = 5000
+        checked_conf.scoring.not_fragmentary.expression = ["attributes.something"]
+        checked_conf.scoring.not_fragmentary.parameters = {"attributes.something": RangeFilter(value=[10, 50],
+                                                                                               operator="within")}
+        checked_conf.scoring.not_fragmentary._check_my_requirements()
+        self.transcript.attributes['something'] = 11
+        logger = create_default_logger("test_attributes_range_as_fragment_filter", level="DEBUG")
+        loci = Locus(self.transcript, configuration=checked_conf, logger=logger)
+        self.other_transcript.attributes["something"] = 0.5
+        fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger)
+        self.assertEqual(fragment.other_is_fragment(loci), (False, None))
+        self.assertTrue(loci.other_is_fragment(fragment)[0])
+        del self.other_transcript.attributes["something"]
+        fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger)
+        self.assertEqual(fragment.other_is_fragment(loci), (False, None))
+        self.assertTrue(loci.other_is_fragment(fragment)[0])
+        # Now change the default.
+        checked_conf.scoring.not_fragmentary.parameters = {"attributes.something": RangeFilter(value=[10, 50],
+                                                                                               operator="within",
+                                                                                               default=20)}
+        checked_conf.scoring.not_fragmentary._check_my_requirements()
+        not_fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger)
+        self.assertNotIn("something", not_fragment.transcripts[self.other_transcript.id].attributes)
+        # Not a fragment any more, the default of 15 is within the range.
+        self.assertFalse(loci.other_is_fragment(fragment)[0])
+
+    def test_print_metrics_with_attributes_from_scoring(self):
+        from Mikado.loci import Locus, Sublocus
+        checked_conf = self.conf.copy()
+        checked_conf.scoring.scoring = {"attributes.something": MinMaxScore(default=0, rescaling="max", filter=None)}
+        checked_conf.scoring.cds_requirements.expression = ["cdna_length"]
+        checked_conf.scoring.cds_requirements.parameters = {"cdna_length": SizeFilter(operator="ge", value=1)}
+        checked_conf.scoring.requirements.expression = ["cdna_length"]
+        checked_conf.scoring.requirements.parameters = {"cdna_length": SizeFilter(operator="ge", value=1)}
+        checked_conf.scoring.check(checked_conf.pick.orf_loading.minimal_orf_length)
+        for lclass in [Locus, Sublocus]:
+            logger = create_default_logger(f"test_print_metrics_with_attributes_{lclass.name}", level="DEBUG")
+            locus = lclass(self.transcript, configuration=checked_conf, logger=logger)
+            self.assertIn(self.transcript.id, locus.transcripts)
+            rows = list(locus.print_metrics())
+            self.assertEqual(len(rows), 1)
+            row = rows[0]
+            self.assertNotIn("something", locus.transcripts[self.transcript.id].attributes)
+            self.assertIn("attributes.something", row.keys())
+            self.assertEqual(row["attributes.something"], checked_conf.scoring.scoring["attributes.something"].default)
+            self.transcript.attributes["something"] = 5
+            locus = Locus(self.transcript, configuration=checked_conf)
+            rows = list(locus.print_metrics())
+            self.assertEqual(len(rows), 1)
+            row = rows[0]
+            self.assertIn("something", locus.transcripts[self.transcript.id].attributes)
+            self.assertIn("attributes.something", row.keys())
+            self.assertEqual(row["attributes.something"], 5)
+            del self.transcript.attributes["something"]
+
+    def test_print_metrics_with_attributes_from_requirements(self):
+        from Mikado.loci import Locus, Sublocus
+        checked_conf = self.conf.copy()
+        checked_conf.scoring.scoring = {"attributes.something": MinMaxScore(default=0, rescaling="max", filter=None)}
+        checked_conf.scoring.cds_requirements.expression = ["attributes.cds"]
+        checked_conf.scoring.cds_requirements.parameters = {"attributes.cds": SizeFilter(operator="ge", value=1,
+                                                                                         default=1)}
+        checked_conf.scoring.requirements.expression = ["attributes.req"]
+        checked_conf.scoring.requirements.parameters = {"attributes.req": SizeFilter(operator="ge", value=1,
+                                                                                     default=1)}
+        checked_conf.scoring.as_requirements.expression = ["attributes.as"]
+        checked_conf.scoring.as_requirements.parameters = {"attributes.as": SizeFilter(operator="ge", value=1,
+                                                                                       default=1)}
+        checked_conf.scoring.not_fragmentary.expression = ["attributes.frag"]
+        checked_conf.scoring.not_fragmentary.parameters = {"attributes.frag": SizeFilter(operator="ge", value=1,
+                                                                                         default=1)}
+        sections = {"something": checked_conf.scoring.scoring,
+                    "req": checked_conf.scoring.requirements.parameters,
+                    "as": checked_conf.scoring.as_requirements.parameters,
+                    "cds": checked_conf.scoring.cds_requirements.parameters,
+                    "frag": checked_conf.scoring.not_fragmentary.parameters}
+
+        checked_conf.scoring.check(checked_conf.pick.orf_loading.minimal_orf_length)
+        for lclass in [Locus, Sublocus]:
+            logger = create_default_logger(f"test_print_metrics_with_attributes_{lclass.name}", level="DEBUG")
+            locus = lclass(self.transcript, configuration=checked_conf, logger=logger)
+            self.assertIn(self.transcript.id, locus.transcripts)
+            rows = list(locus.print_metrics())
+            self.assertEqual(len(rows), 1)
+            row = rows[0]
+            for key, section in sections.items():
+                self.assertNotIn(key, locus.transcripts[self.transcript.id].attributes)
+                self.assertIn(f"attributes.{key}", row.keys())
+                self.assertEqual(row[f"attributes.{key}"],
+                                 section[f"attributes.{key}"].default)
+            for key in sections:
+                self.transcript.attributes[key] = 5
+            locus = Locus(self.transcript, configuration=checked_conf)
+            rows = list(locus.print_metrics())
+            self.assertEqual(len(rows), 1)
+            row = rows[0]
+            for key, section in sections.items():
+                self.assertIn(key, locus.transcripts[self.transcript.id].attributes)
+                self.assertIn(f"attributes.{key}", row.keys())
+                self.assertEqual(row["attributes.something"], 5)
+            # Reset the object
+            for key in sections:
+                del self.transcript.attributes[key]
diff --git a/Mikado/tests/test_modifications.py b/Mikado/tests/test_modifications.py
index 3dba896b4..e12170f3f 100644
--- a/Mikado/tests/test_modifications.py
+++ b/Mikado/tests/test_modifications.py
@@ -1,3 +1,5 @@
+import os
+
 from Mikado._transcripts.scoring_configuration import SizeFilter
 from Mikado.scales.assignment import Assigner
 import itertools
@@ -5,7 +7,7 @@
 from Mikado.parsers.GFF import GffLine
 from Mikado.loci import Transcript
 from ..loci.locus import Locus, pad_transcript
-from ..parsers.bed12 import BED12
+from ..parsers.bed12 import BED12, Bed12Parser
 from ..subprograms.util.trim import trim_coding, trim_noncoding
 import unittest
 import pysam
@@ -272,14 +274,15 @@ def test_removal_after_padding(self):
         locus.add_transcript_to_locus(t2_1, check_in_locus=False)
         locus.add_transcript_to_locus(t2_2, check_in_locus=False)
         self.assertTrue(locus.primary_transcript_id == t1.id)
-        locus.logger.setLevel("DEBUG")
+        locus.logger.setLevel("INFO")
         locus.finalize_alternative_splicing(_scores={t1.id: 20, t2_1.id: 15, t2_2.id: 10})
         self.assertIn(t1.id, locus.transcripts)
         if t2_1.id in locus.transcripts:
             for tid1, tid2 in itertools.combinations(locus.transcripts.keys(), 2):
                 res, _ = Assigner.compare(locus[tid1], locus[tid2])
                 print(tid1, tid2, res.ccode)
-            self.assertNotIn(t2_1.id, locus.transcripts)
+            self.assertNotIn(t2_1.id, locus.transcripts.keys(),
+                             [(key, val.start, val.end) for key, val in locus.transcripts.items()])
 
         self.assertIn(t2_2.id, locus.transcripts, "\n".join(tr.format("bed12") for tr in locus))
         self.assertTrue(locus[t2_2.id].attributes["padded"])
@@ -347,6 +350,22 @@ def test_add_two_partials(self):
                          (locus[ref.id].start, ref.start, template2.start, template1.start))
                          )
 
+    @unittest.skip
+    def test_failed_expansion(self):
+        logger = create_default_logger("test_failed_expansion", level="WARNING")
+        raw = [Transcript(line, logger=logger) for line in Bed12Parser(
+            open(pkg_resources.resource_filename("Mikado.tests", os.path.join("test_pick_pad", "fail.bed12"))))]
+        transcripts = dict((_.id, _) for _ in raw)
+        [_.finalize() for _ in transcripts.values()]
+        template = transcripts["template"]  # 4535908	4540293
+        candidate = transcripts["candidate"]  # 4536444	4540027
+        backup = candidate.copy()
+        fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", os.path.join("test_pick_pad",
+                                                                                           "failing_seq.fa.gz")))
+        logger.setLevel("DEBUG")
+        candidate.logger.setLevel("DEBUG")
+        pad_transcript(candidate, backup, start_transcript=template, end_transcript=template, fai=fai, logger=logger)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Mikado/tests/test_transcript_methods.py b/Mikado/tests/test_transcript_methods.py
index 540f701e1..8bfd42255 100644
--- a/Mikado/tests/test_transcript_methods.py
+++ b/Mikado/tests/test_transcript_methods.py
@@ -454,6 +454,8 @@ def setUp(self):
             transcript.name = transcript.id
             self.transcripts[transcript.id] = transcript
 
+        for tid, transcript in self.transcripts.items():
+            self.assertFalse(any(key.lower() == "id=id" for key in transcript.attributes), (tid, transcript.attributes))
         self.assertEqual(len(self.transcripts), 4)
 
     def test_bed12(self):
diff --git a/Mikado/tests/test_transcript_negative.py b/Mikado/tests/test_transcript_negative.py
index 53e58bdb1..241a69dfe 100644
--- a/Mikado/tests/test_transcript_negative.py
+++ b/Mikado/tests/test_transcript_negative.py
@@ -117,10 +117,12 @@ def test_print(self):
                          str(self.tr),
                          diff)
 
-        g_bed12 = "Chr1	5927	8737	ID=AT1G01020.1;coding=True;phase=0	0	-	6914	8666	0	10	336,633,76,67,86,74,46,90,48,167	0,509,1229,1456,1636,1834,2014,2308,2489,2643"
+        g_bed12 = "Chr1	5927	8737	ID=AT1G01020.1;coding=True;phase=0	0	-	6914	8666	0"\
+                  "	10	336,633,76,67,86,74,46,90,48,167	0,509,1229,1456,1636,1834,2014,2308,2489,2643\tParent=AT1G01020"
         self.assertEqual(g_bed12, self.tr.format("bed12", transcriptomic=False))
 
-        t_bed12 = "AT1G01020.1	0	1623	ID=AT1G01020.1;coding=True;phase=0	0	+	71	809	0	10	167,48,90,46,74,86,67,76,633,336	0,167,215,305,351,425,511,578,654,1287"
+        t_bed12 = "AT1G01020.1	0	1623	ID=AT1G01020.1;coding=True;phase=0	0	+	71	809	0	10	"\
+                  "167,48,90,46,74,86,67,76,633,336	0,167,215,305,351,425,511,578,654,1287"
         self.assertEqual(t_bed12, self.tr.format("bed12", transcriptomic=True))
 
     def test_empty(self):
diff --git a/Mikado/transcripts/pad.py b/Mikado/transcripts/pad.py
index 88d87d830..da648ba1e 100644
--- a/Mikado/transcripts/pad.py
+++ b/Mikado/transcripts/pad.py
@@ -47,22 +47,30 @@ def pad_transcript(transcript: Transcript,
     assert strand == transcript.strand
 
     upstream, up_exons, new_first_exon, up_remove = _enlarge_start(transcript, backup, start_transcript)
-    downstream, up_exons, down_exons, down_remove = _enlarge_end(transcript,
-                                                                 backup, end_transcript, up_exons, new_first_exon)
+    downstream, up_exons, down_exons, new_exon, down_remove = _enlarge_end(transcript,
+                                                                           backup, end_transcript, up_exons, new_first_exon)
 
-    first_exon, last_exon = transcript.exons[0], transcript.exons[-1]
+    # first_exon, last_exon = transcript.exons[0], transcript.exons[-1]
 
     assert upstream >= 0 and downstream >= 0
 
-    if up_remove is True:
-        # Remove the first exon
-        transcript.remove_exon(first_exon)
-    if down_remove is True:
-        if not (up_remove is True and first_exon == last_exon):
-            transcript.remove_exon(last_exon)
+    if transcript.monoexonic is False:
+        if up_remove is True:
+            # Remove the first exon
+            up_exons = sorted(up_exons)
+            transcript.exons[0] = up_exons[-1]
+            up_exons.remove(up_exons[-1])
+            # transcript.remove_exon(first_exon)
+        if down_remove is True:  # (up_remove is False or first_exon != last_exon):
+            down_exons = sorted(down_exons)
+            transcript.exons[-1] = down_exons[-1]
+            down_exons.remove(down_exons[-1])
+        # transcript.remove_exon(last_exon)
+    elif new_exon is not None:
+        transcript.exons[0] = new_exon
 
     new_exons = up_exons + down_exons
-    if not new_exons:
+    if not any([len(new_exons) > 0, up_remove, down_remove]):
         logger.debug("%s does not need to be expanded, exiting", transcript.id)
         return backup
 
@@ -211,6 +219,7 @@ def _enlarge_end(transcript: Transcript,
     downstream = 0
     down_exons = []
     to_remove = False
+    new_exon = new_first_exon[:] if new_first_exon is not None else None
 
     if end_transcript:
         transcript.end = end_transcript.end
@@ -218,7 +227,7 @@ def _enlarge_end(transcript: Transcript,
             [_ for _ in end_transcript.find_downstream(transcript.exons[-1][0], transcript.exons[-1][1])
              if _.value == "exon"])
         intersecting_downstream = sorted(end_transcript.search(
-            transcript.exons[-1][0], transcript.exons[-1][1]))
+            transcript.exons[-1][0] + 1, transcript.exons[-1][1]))
         if not intersecting_downstream:
             raise KeyError("No exon or intron found to be intersecting with %s vs %s, this is a mistake",
                            transcript.id, end_transcript.id)
@@ -275,7 +284,7 @@ def _enlarge_end(transcript: Transcript,
             downstream += sum(_[1] - _[0] + 1 for _ in downstream_exons)
             down_exons.extend([(_[0], _[1]) for _ in downstream_exons])
 
-    return downstream, up_exons, down_exons, to_remove
+    return downstream, up_exons, down_exons, new_exon, to_remove
 
 
 def check_expanded(transcript, backup, start_transcript, end_transcript, fai, upstream, downstream, logger) -> str:
diff --git a/Mikado/transcripts/reference_gene.py b/Mikado/transcripts/reference_gene.py
index 122b91416..d77cb278d 100644
--- a/Mikado/transcripts/reference_gene.py
+++ b/Mikado/transcripts/reference_gene.py
@@ -354,10 +354,10 @@ def load_dict(self, state: dict, exclude_utr=False, protein_coding=False, trust_
             transcript.load_dict(tvalues, trust_orf=trust_orf)
             transcript.finalize()
             if protein_coding is True and transcript.is_coding is False:
-                self.logger.debug("{0} is non coding ({1}, {2})".format(
+                self.logger.debug("{0} is non coding ({1})".format(
                     transcript.id,
                     transcript.combined_cds,
-                    transcript.segments))
+                    ))
                 continue
             if exclude_utr is True:
                 has_utrs = (transcript.utr_length > 0)
diff --git a/Mikado/transcripts/transcriptchecker.py b/Mikado/transcripts/transcriptchecker.py
index b3835b782..af4786ec2 100644
--- a/Mikado/transcripts/transcriptchecker.py
+++ b/Mikado/transcripts/transcriptchecker.py
@@ -324,19 +324,20 @@ def check_orf(self):
                 self.finalize()
             else:
                 self.logger.warning("Transcript %s has an invalid CDS and must therefore be discarded.", self.id)
+                raise InvalidTranscript("Transcript %s has an invalid CDS and must therefore be discarded.")
 
         if self.is_coding is False:
             return
 
         try:
             orfs = list(self.get_internal_orf_beds())
-            assert len(orfs) >= 1
-        except AssertionError as exc:  # Invalid ORFs found
+            if len(orfs) < 1:
+                raise AssertionError("No ORFs remaining.")
+        except AssertionError as exc:
             if self.strip_faulty_cds is False:
-                raise InvalidTranscript("Invalid ORF(s) for {}. Discarding it. Error: {}".format(self.id, exc))
+                raise InvalidTranscript(f"Invalid ORF(s) for {self.id}. Discarding it. Error: {exc}")
             else:
-                self.logger.warning("Invalid ORF(s) for %s. Stripping it of its CDS. Error: %s",
-                                    self.id, exc)
+                self.logger.warning(f"Invalid ORF(s) for {self.id}. Stripping it of its CDS. Error: {exc}")
                 self.strip_cds()
             return
 
@@ -361,7 +362,9 @@ def check_orf(self):
                 self.has_stop_codon = orf.has_stop_codon
                 self.attributes["has_start_codon"] = orf.has_start_codon
                 self.attributes["has_stop_codon"] = orf.has_stop_codon
-                return
+
+        self.strip_cds()
+        self.load_orfs(orfs)
 
     @property
     def cdna(self) -> str:
diff --git a/README.md b/README.md
index 330bfba45..e47db5f9d 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 [![GitHub Downloads](https://img.shields.io/github/downloads/EI-CoreBioinformatics/mikado/total.svg?style=social&logo=github&label=download)](https://github.com/EI-CoreBioinformatics/mikado/releases)
 [![Release](https://img.shields.io/github/release/EI-CoreBioinformatics/mikado.svg)](https://github.com/EI-CoreBioinformatics/mikado/releases)
 [![PyPI](https://img.shields.io/pypi/v/mikado.svg?style=flat)](https://pypi.python.org/pypi/mikado)
-[![Build Status](https://travis-ci.org/lucventurini/mikado.svg?branch=master)](https://travis-ci.org/lucventurini/mikado)
+[![Build Status](https://github.com/EI-CoreBioinformatics/mikado/workflows/Mikado/badge.svg)](https://github.com/EI-CoreBioinformatics/mikado/actions/workflows/python-package.yml)
 
 # Mikado - pick your transcript: a pipeline to determine and select the best RNA-Seq prediction
 
@@ -69,10 +69,8 @@ If you use Mikado in your work, please consider to cite:
 
 > Venturini L., Caim S., Kaithakottil G., Mapleson D.L., Swarbreck D. Leveraging multiple transcriptome assembly methods for improved gene structure annotation. GigaScience, Volume 7, Issue 8, 1 August 2018, giy093, [doi:10.1093/gigascience/giy093](https://doi.org/10.1093/gigascience/giy093)
 
-
 If you also use Portcullis to provide reliable junctions to Mikado, either independently or as part of the Daijin pipeline, please consider to cite:
 
-
 > Mapleson D.L., Venturini L., Kaithakottil G., Swarbreck D. Efficient and accurate detection of splice junctions from RNAseq with Portcullis. GigaScience, Volume 7, Issue 12, 12 December 2018, giy131, [doi:10.1093/gigascience/giy131](https://doi.org/10.1093/gigascience/giy131)
 
 [Portcullis]: https://github.com/maplesond/portcullis
diff --git a/docs/Algorithms.rst b/docs/Algorithms.rst
index 60f699c62..9be0a91d3 100644
--- a/docs/Algorithms.rst
+++ b/docs/Algorithms.rst
@@ -122,13 +122,11 @@ If any BLAST hit *spans* the two ORFs, then the model will be considered as a no
 
 These modes can be controlled directly from the :ref:`pick command line <pick>`, or during the :ref:`initial configuration stage <configure>`.
 
-.. _scoring_files:
+.. _scoring_algorithm:
 
 Transcript measurements and scoring
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. _scoring_algorithm:
-
 In order to determine the best transcript for each locus, Mikado measures each available candidate according to various different :ref:`metrics <Metrics>` and assigning a specific score for each of those. Similarly to `RAMPART <https://github.com/TGAC/RAMPART>`_ [Rampart]_, Mikado will assign a score to each transcript for each metric by assessing it relatively to the other transcripts in the locus. The particular feature rescaling equation used for a given metric depends on the type of feature it represents:
 
 * metrics where higher values represent better transcript assemblies ("maximum").
@@ -148,525 +146,11 @@ Finally, the scores for each metric will be summed up to produce a final score f
     :math:`s_{t} = \sum_{m \forall m \in M} s_{mt}`.
 
 Not all the available metrics will be necessarily used for scoring; the choice of which to employ and how to score and weight each of them is left to the experimenter, although Mikado provides some pre-configured scoring files.
-Values that are guaranteed to be between 0 and 1 (e.g. a percentage value) can be used directly as scores, by setting the *use_raw* parameter as true for them (see below).
-
-.. important:: The scoring algorithm is dependent on the other transcripts in the locus, so each score should not be taken as an *absolute* measure of the reliability of a transcript, but rather as a measure of its **relative goodness compared with the alternatives**. Shifting a transcript from one locus to another can have dramatic effects on the scoring of a transcript, even while the underlying metric values remain unchanged. This is why the score assigned to each transcript changes throughout the Mikado run, as transcripts are moved to subloci, monoloci and finally loci.
-
-Scoring files
-~~~~~~~~~~~~~
-
-Mikado employs user-defined configuration files to define the desirable features in genes. These files are in either YAML or JSON format (default YAML) and are composed of four sections:
-
-  #. a *requirements* section, specifying the minimum requirements that a transcript must satisfy to be considered as valid. **Any transcript failing these requirements will be scored at 0 and purged.**
-  #. a *not_fragmentary* section, specifying the minimum requirements that the primary transcript of a locus has to satisfy in order for the locus **not** to be considered as a putative fragment.
-  #. an *as_requirements* section, which specifies the minimum requirements for transcripts for them to be considered as possible valid alternative splicing events.
-  #. a *scoring* section, specifying which features Mikado should look for in transcripts, and how each of them will be weighted.
-
-Conditions are specified using a strict set of :ref:`available operators <operators>` and the values they have to consider.
-
-.. important:: Although at the moment Mikado does not offer any method to establish machine-learning based scoring configurations, it is a topic we plan to investigate in the future. Mikado already supports `Random Forest Regressors as scorers through Scikit-learn <http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html>`_, but we have yet to devise a proper way to create such regressors.
-
-We provide a guide on how to write your own scoring files in a :ref:`separate tutorial <configure-scoring-tutorial>`.
-
-.. _operators:
-
-Operators
----------
-
-Mikado allows the following operators to express a relationship inside the scoring files:
-
-* *eq*: equal to (:math:`=`). Valid for comparisons with numbers, boolean values, and strings.
-* *ne*: different from (:math:`\neq`). Valid for comparisons with numbers, boolean values, and strings.
-* *lt*: less than (:math:`<`). Valid for comparisons with numbers.
-* *gt*: greater than (:math:`>`). Valid for comparisons with numbers.
-* *le*: less or equal than (:math:`\le`). Valid for comparisons with numbers.
-* *ge*: greater or equal than (:math:`\ge`). Valid for comparisons with numbers.
-* *in*: member of (:math:`\in`). Valid for comparisons with arrays or sets.
-* *not in*: not member of (:math:`\notin`). Valid for comparisons with arrays or sets.
-* *within*: value comprised in the range of the two values, inclusive.
-* *not within*: value *not* comprised in the range of the two values, inclusive.
-
-Mikado will fail if an operator not present on this list is specified, or if the operator is assigned to compare against the wrong data type (eg. *eq* with an array).
-
-.. _requirements-section:
-
-The "requirements", "as_requirements" and "not_fragmentary" sections
---------------------------------------------------------------------
-
-These sections specifies the minimum requirements for a transcript at various stages.
-
-* A transcript failing to pass the *requirements* check will be discarded outright (if "purge" is selected) or given a score of 0 otherwise.
-* If a transcript has not been selected as the primary transcript of a locus, it has to pass the *as_requirements* check to be considered as a valid alternative splicing event.
-* Finally, after loci have been defined, the primary transcripts of loci that do not pass the *not_fragmentary* section mark their loci to be compared against neighbouring loci which have passed this same check.
-
-**It is strongly advised to use lenient parameters in the requirements section**, as failing to do so might result in discarding whole loci. Typically, transcripts filtered at this step should be obvious fragments, eg monoexonic transcripts produced by RNA-Seq with a total length lower than the *library* fragment length.
-This section is composed by two parts:
-
-* *parameters*: a list of the metrics to be considered. Each metric can be considered multiple times, by suffixing it with a ".<id>" construct (eg cdna_length.*mono* vs. cdna_length.*multi* to distinguish two uses of the cdna_length metric - once for monoexonic and once for multiexonic transcripts). Any parameter which is not a :ref:`valid metric name <Metrics>`, after removal of the suffix, **will cause an error**. Parameters have to specify the following:
-
-    * a *value* that the metric has to be compared against
-    * an *operator* that specifies the target operation. See :ref:`the operators section <operators>`.
-
-* *expression*: a string array that will be compiled into a valid boolean expression. All the metrics present in the expression string **must be present in the parameters section**. If an unrecognized metric is present, Mikado will crash immediately, complaining that the scoring file is invalid. Apart from brackets, Mikado accepts only the following boolean operators to chain the metrics:
-
-    * *and*
-    * *or*
-    * *not*
-    * *xor*
-
-.. hint:: if no *expression* is specified, Mikado will construct one by chaining all the provided parameters with and *and* operator. Most of the time, this would result in an unexpected behaviour - ie Mikado assigning a score of 0 to most transcripts. It is **strongly advised** to explicitly provide a valid expression.
-
-As an example, the following snippet replicates a typical requirements section found in a scoring file:
-
-.. code-block:: yaml
-
-    requirements:
-      expression: [((exon_num.multi and cdna_length.multi and max_intron_length and min_intron_length), or,
-        (exon_num.mono and cdna_length.mono))]
-      parameters:
-        cdna_length.mono: {operator: gt, value: 50}
-        cdna_length.multi: {operator: ge, value: 100}
-        exon_num.mono: {operator: eq, value: 1}
-        exon_num.multi: {operator: gt, value: 1}
-        max_intron_length: {operator: le, value: 20000}
-        min_intron_length: {operator: ge, value: 5}
-
-In the parameters section, we ask for the following:
-
-        * *exon_num.mono*: monoexonic transcripts must have one exon ("eq")
-        * *exon_num.multi*: multiexonic transcripts must have more than one exon ("gt")
-        * *cdna_length.mono*: monoexonic transcripts must have a length greater than 50 bps (the ".mono" suffix is arbitrary, as long as it is unique for all calls of *cdna_length*)
-        * *cdna_length.multi*: multiexonic transcripts must have a length greater than or equal to 100 bps (the ".multi" suffix is arbitrary, as long as it is unique for all calls of *cdna_length*)
-        * *max_intron_length*: multiexonic transcripts should not have any intron longer than 200,000 bps.
-        * *min_intron_length*: multiexonic transcripts should not have any intron smaller than 5 bps.
-
-The *expression* field will be compiled into the following expression::
-
-        (exon_num > 1 and cdna_length >= 100 and max_intron_length <= 200000 and min_intron_length >= 5) or (exon_num == 1 and cdna_length > 50)
-
-
-Any transcript for which the expression evaluates to ``false`` will be assigned a score of 0 outright and discarded, unless the user has chosen to disable the purging of such transcripts.
-
-.. _scoring-section:
-
-The scoring section
--------------------
-
-This section specifies which metrics will be used by Mikado to score the transcripts. Each metric to be used is specified as a subsection of the configuration, and will have the following attributes:
-
-* *rescaling*: the only compulsory attribute. It specifies the kind of scoring that will be applied to the metric, and it has to be one of "max", "min", or "target". See :ref:`the explanation on the scoring algorithm <scoring_algorithm>` for details.
-* *value*: compulsory if the chosen rescaling algorithm is "target". This should be either a number or a boolean value.
-* *multiplier*: the weight assigned to the metric in terms of scoring. This parameter is optional; if absent, as it is in the majority of cases, Mikado will consider the multiplier to equal to 1. This is the :math:`w_{m}` element in the :ref:`equations above <scoring_algorithm>`.
-* *filter*: It is possible to specify a filter which the metric has to fulfill to be considered for scoring, eg, "cdna_length >= 200". If the transcript fails to pass this filter, the score *for this metric only* will be set to 0. A "filter" subsection has to specify the following:
-
-    * *operator*: the operator to apply for the boolean expression. See the :ref:`relative section <operators>`.
-    * *value*: value that will be used to assess the metric.
-
-.. hint:: the purpose of the *filter* section is to allow for fine-tuning of the scoring mechanism; ie it allows to penalise transcripts with undesirable qualities (eg a possible retained intron) without discarding them outright. As such, it is a less harsh version of the :ref:`requirements section <requirements-section>` and it is the preferred way of specifying which transcript features Mikado should be wary of.
-
-For example, this is a snippet of a scoring section:
-
-.. code-block:: yaml
+Values that are guaranteed to be between 0 and 1 (e.g. a percentage value) can be used directly as scores, by setting the *use_raw* parameter as true for them.
 
-    scoring:
-        blast_score: {rescaling: max}
-        cds_not_maximal: {rescaling: min}
-        combined_cds_fraction: {rescaling: target, value: 0.8, multiplier: 2}
-        five_utr_length:
-            filter: {operator: le, value: 2500}
-            rescaling: target
-            value: 100
-        end_distance_from_junction:
-            filter: {operator: lt, value: 55}
-            rescaling: min
-        non_verified_introns_num:
-            rescaling: max
-            multiplier: -10
-            filter:
-                operator: gt
-                value: 1
-                metric: exons_num
-
-
-Using this snippet as a guide, Mikado will score transcripts in each locus as follows:
-
-* Assign a full score (one point, as no multiplier is specified) to transcripts which have the greatest *blast_score*
-* Assign a full score (one point, as no multiplier is specified) to transcripts which have the lowest amount of CDS bases in secondary ORFs (*cds_not_maximal*)
-* Assign a full score (**two points**, as a multiplier of 2 is specified) to transcripts that have a total amount of CDS bps approximating 80% of the transcript cDNA length (*combined_cds_fraction*)
-* Assign a full score (one point, as no multiplier is specified) to transcripts that have a 5' UTR whose length is nearest to 100 bps (*five_utr_length*); if the 5' UTR is longer than 2,500 bps, this score will be 0 (see the filter section)
-* Assign a full score (one point, as no multiplier is specified) to transcripts which have the lowest distance between the CDS end and the most downstream exon-exon junction (*end_distance_from_junction*). If such a distance is greater than 55 bps, assign a score of 0, as it is a probable target for NMD (see the filter section).
-* Assign a maximum penalty (**minus 10 points**, as a **negative** multiplier is specified) to the transcript with the highest number of non-verified introns in the locus.
-
-  * Again, we are using a "filter" section to define which transcripts will be exempted from this scoring (in this case, a penalty)
-  * However, please note that we are using the keyword **metric** in this section. This tells Mikado to check a *different* metric for evaluating the filter. Nominally, in this case we are excluding from the penalty any *monoexonic* transcript. This makes sense as a monoexonic transcript will never have an intron to be confirmed to start with.
-
-.. note:: The possibility of using different metrics for the "filter" section is present from Mikado 1.3 onwards.
-
-.. _Metrics:
-
-Metrics
-~~~~~~~
-
-These are all the metrics available to quantify transcripts. The documentation for this section has been generated using the :ref:`metrics utility <metrics-command>`.
-
-Metrics belong to one of the following categories:
-
-* **Descriptive**: these metrics merely provide a description of the transcript (eg its ID) and are not used for scoring.
-
-* **cDNA**: these metrics refer to basic features of any transcript such as its number of exons, its cDNA length, etc.
-
-* **Intron**: these metrics refer to features related to the number of introns and their lengths.
-
-* **CDS**: these metrics refer to features related to the CDS assigned to the transcript.
-
-* **UTR**: these metrics refer to features related to the UTR of the transcript. In the case in which a transcript has been assigned multiple ORFs, unless otherwise stated the UTR metrics will be derived only considering the *selected* ORF, not the combination of all of them.
-
-* **Locus**: these metrics refer to features of the transcript in relationship to all other transcripts in its locus, eg how many of the introns present in the locus are present in the transcript. These metrics are calculated by Mikado during the picking phase, and as such their value can vary during the different stages as the transcripts are shifted to different groups.
-
-* **External**: these metrics are derived from accessory data that is recovered for the transcript during the run time. Examples include data regarding the number of introns confirmed by external programs such as PortCullis, or the BLAST score of the best hits.
-
-.. hint:: Starting from version 1 beta8, Mikado allows to use externally defined metrics for the transcripts. These can be accessed using the keyword "external.<name of the metrics>" within the configuration file. See the :ref:`relevant section <external-metrics>` for details.
-
-.. important:: Starting from Mikado 1 beta 8, it is possible to use metrics with values between 0 and 1 directly as scores, without rescaling. This feature is available only for metrics whose values naturally lie between 0 and 1, or that are boolean in nature.
-
-.. topic:: Available metrics
-
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| Metric name                         | Description                                               | Category    | Data type   | Usable raw   |
-+=====================================+===========================================================+=============+=============+==============+
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| tid                                 | ID of the transcript - cannot be an undefined value.      | Descriptive | str         | False        |
-|                                     | Alias of id.                                              |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| parent                              | Name of the parent feature of the transcript.             | Descriptive | str         | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| score                               | Numerical value which summarizes the reliability of the   | Descriptive | str         | False        |
-|                                     | transcript.                                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| external_scores                     | **SPECIAL** this Namespace contains all the information   | External    | Namespace   | True         |
-|                                     | regarding external scores for the transcript. If an       |             |             |              |
-|                                     | absent property is not defined in the Namespace, Mikado   |             |             |              |
-|                                     | will set a default value of 0 into the Namespace and      |             |             |              |
-|                                     | return it.                                                |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| best_bits                           | Metric that returns the best BitS associated with the     | External    | float       | False        |
-|                                     | transcript.                                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| blast_identity                      | This metric will return the alignment identity for the    | External    | float       | True         |
-|                                     | best BLAST hit according to the evalue. If no BLAST hits  |             |             |              |
-|                                     | are available for the sequence, it will return 0.         |             |             |              |
-|                                     | :return: :return:                                         |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| blast_query_coverage                | This metric will return the **query** coverage for the    | External    | float       | True         |
-|                                     | best BLAST hit according to the evalue. If no BLAST hits  |             |             |              |
-|                                     | are available for the sequence, it will return 0.         |             |             |              |
-|                                     | :return:                                                  |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| blast_score                         | Interchangeable alias for testing different blast-related | External    | float       | False        |
-|                                     | scores. Current: best bit score.                          |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| blast_target_coverage               | This metric will return the **target** coverage for the   | External    | float       | True         |
-|                                     | best BLAST hit according to the evalue. If no BLAST hits  |             |             |              |
-|                                     | are available for the sequence, it will return 0.         |             |             |              |
-|                                     | :return: :return:                                         |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| canonical_intron_proportion         | This metric returns the proportion of canonical introns   | Intron      | float       | True         |
-|                                     | of the transcript on its total number of introns.         |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| cdna_length                         | This property returns the length of the transcript.       | cDNA        | int         | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| cds_not_maximal                     | This property returns the length of the CDS excluded from | CDS         | int         | False        |
-|                                     | the selected ORF.                                         |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| cds_not_maximal_fraction            | This property returns the fraction of bases not in the    | CDS         | float       | True         |
-|                                     | selected ORF compared to the total number of CDS bases in |             |             |              |
-|                                     | the cDNA.                                                 |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| combined_cds_fraction               | This property return the percentage of the CDS part of    | CDS         | float       | True         |
-|                                     | the transcript vs. the cDNA length                        |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| combined_cds_intron_fraction        | This property returns the fraction of CDS introns of the  | Locus       |             | True         |
-|                                     | transcript vs. the total number of CDS introns in the     |             |             |              |
-|                                     | Locus. If the transcript is by itself, it returns 1.      |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| combined_cds_length                 | This property return the length of the CDS part of the    | CDS         | int         | False        |
-|                                     | transcript.                                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| combined_cds_locus_fraction         | This metric returns the fraction of CDS bases of the      | Locus       | float       | True         |
-|                                     | transcript vs. the total of CDS bases in the locus.       |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| combined_cds_num                    | This property returns the number of non-overlapping CDS   | CDS         | int         | False        |
-|                                     | segments in the transcript.                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| combined_cds_num_fraction           | This property returns the fraction of non-overlapping CDS | CDS         | float       | True         |
-|                                     | segments in the transcript vs. the total number of exons  |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| combined_utr_fraction               | This property returns the fraction of the cDNA which is   | UTR         | float       | True         |
-|                                     | not coding according to any ORF. Complement of            |             |             |              |
-|                                     | combined_cds_fraction                                     |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| combined_utr_length                 | This property return the length of the UTR part of the    | UTR         | int         | False        |
-|                                     | transcript.                                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| end_distance_from_junction          | This metric returns the cDNA distance between the stop    | CDS         | int         | False        |
-|                                     | codon and the last junction in the transcript. In many    |             |             |              |
-|                                     | eukaryotes, this distance cannot exceed 50-55 bps         |             |             |              |
-|                                     | otherwise the transcript becomes a target of NMD. If the  |             |             |              |
-|                                     | transcript is not coding or there is no junction          |             |             |              |
-|                                     | downstream of the stop codon, the metric returns 0. This  |             |             |              |
-|                                     | metric considers the combined CDS end.                    |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| end_distance_from_tes               | This property returns the distance of the end of the      | CDS         | int         | False        |
-|                                     | combined CDS from the transcript end site. If no CDS is   |             |             |              |
-|                                     | defined, it defaults to 0.                                |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| exon_fraction                       | This property returns the fraction of exons of the        | Locus       | float       | True         |
-|                                     | transcript which are contained in the sublocus. If the    |             |             |              |
-|                                     | transcript is by itself, it returns 1. Set from outside.  |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| exon_num                            | This property returns the number of exons of the          | cDNA        | int         | False        |
-|                                     | transcript.                                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| five_utr_length                     | Returns the length of the 5' UTR of the selected ORF.     | UTR         |             | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| five_utr_num                        | This property returns the number of 5' UTR segments for   | UTR         | int         | False        |
-|                                     | the selected ORF.                                         |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| five_utr_num_complete               | This property returns the number of 5' UTR segments for   | UTR         | int         | False        |
-|                                     | the selected ORF, considering only those which are        |             |             |              |
-|                                     | complete exons.                                           |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| has_start_codon                     | Boolean. True if the selected ORF has a start codon.      | CDS         | bool        | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| has_stop_codon                      | Boolean. True if the selected ORF has a stop codon.       | CDS         | bool        | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| highest_cds_exon_number             | This property returns the maximum number of CDS segments  | CDS         | int         | False        |
-|                                     | among the ORFs; this number can refer to an ORF           |             |             |              |
-|                                     | *DIFFERENT* from the maximal ORF.                         |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| highest_cds_exons_num               | Returns the number of CDS segments in the selected ORF    | CDS         | int         | False        |
-|                                     | (irrespective of the number of exons involved)            |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| intron_fraction                     | This property returns the fraction of introns of the      | Locus       | float       | True         |
-|                                     | transcript vs. the total number of introns in the Locus.  |             |             |              |
-|                                     | If the transcript is by itself, it returns 1. Set from    |             |             |              |
-|                                     | outside.                                                  |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| is_complete                         | Boolean. True if the selected ORF has both start and end. | CDS         | bool        | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| max_exon_length                     | This metric will return the length of the biggest exon in | cDNA        | int         | False        |
-|                                     | the transcript.                                           |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| max_intron_length                   | This property returns the greatest intron length for the  | Intron      | int         | False        |
-|                                     | transcript.                                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| min_exon_length                     | This metric will return the length of the biggest exon in |             |             | False        |
-|                                     | the transcript.                                           |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| min_intron_length                   | This property returns the smallest intron length for the  | Intron      | int         | False        |
-|                                     | transcript.                                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| non_verified_introns_num            | This metric returns the number of introns of the          | External    | int         | False        |
-|                                     | transcript which are not validated by external data.      |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| num_introns_greater_than_max        | This metric returns the number of introns greater than    | Intron      | int         | False        |
-|                                     | the maximum acceptable intron size indicated in the       |             |             |              |
-|                                     | constructor.                                              |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| num_introns_smaller_than_min        | This metric returns the number of introns smaller than    | Intron      | int         | False        |
-|                                     | the mininum acceptable intron size indicated in the       |             |             |              |
-|                                     | constructor.                                              |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| number_internal_orfs                | This property returns the number of ORFs inside a         | CDS         | int         | False        |
-|                                     | transcript.                                               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| only_non_canonical_splicing         | This metric will return True if the canonical_number is 0 | Intron      | bool        | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| proportion_verified_introns         | This metric returns, as a fraction, how many of the       | External    | float       | True         |
-|                                     | transcript introns are validated by external data.        |             |             |              |
-|                                     | Monoexonic transcripts are set to 1.                      |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| proportion_verified_introns_inlocus | This metric returns, as a fraction, how many of the       | Locus       | float       | True         |
-|                                     | verified introns inside the Locus are contained inside    |             |             |              |
-|                                     | the transcript.                                           |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| retained_fraction                   | This property returns the fraction of the cDNA which is   | Locus       | float       | True         |
-|                                     | contained in retained introns.                            |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| retained_intron_num                 | This property records the number of introns in the        | Locus       | int         | False        |
-|                                     | transcripts which are marked as being retained. See the   |             |             |              |
-|                                     | corresponding method in the sublocus class.               |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_cds_exons_fraction         | Returns the fraction of CDS segments in the selected ORF  | CDS         | float       | True         |
-|                                     | (irrespective of the number of exons involved)            |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_cds_fraction               | This property calculates the fraction of the selected CDS | CDS         | float       | True         |
-|                                     | vs. the cDNA length.                                      |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_cds_intron_fraction        | This property returns the fraction of CDS introns of the  | CDS         | float       | True         |
-|                                     | selected ORF of the transcript vs. the total number of    |             |             |              |
-|                                     | CDS introns in the Locus (considering only the selected   |             |             |              |
-|                                     | ORF). If the transcript is by itself, it should return 1. |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_cds_length                 | This property calculates the length of the CDS selected   | CDS         | int         | False        |
-|                                     | as best inside the cDNA.                                  |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_cds_locus_fraction         | This metric returns the fraction of CDS bases of the      | Locus       | float       | True         |
-|                                     | transcript vs. the total of CDS bases in the locus.       |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_cds_num                    | This property calculates the number of CDS exons for the  | CDS         | int         | False        |
-|                                     | selected ORF                                              |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_cds_number_fraction        | This property returns the proportion of best possible CDS | CDS         | float       | False        |
-|                                     | segments vs. the number of exons. See                     |             |             |              |
-|                                     | selected_cds_number.                                      |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_end_distance_from_junction | This metric returns the distance between the stop codon   | CDS         | int         | False        |
-|                                     | and the last junction of the transcript. In many          |             |             |              |
-|                                     | eukaryotes, this distance cannot exceed 50-55 bps,        |             |             |              |
-|                                     | otherwise the transcript becomes a target of NMD. If the  |             |             |              |
-|                                     | transcript is not coding or there is no junction          |             |             |              |
-|                                     | downstream of the stop codon, the metric returns 0.       |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_end_distance_from_tes      | This property returns the distance of the end of the best | CDS         | int         | False        |
-|                                     | CDS from the transcript end site. If no CDS is defined,   |             |             |              |
-|                                     | it defaults to 0.                                         |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| selected_start_distance_from_tss    | This property returns the distance of the start of the    | CDS         | int         | False        |
-|                                     | best CDS from the transcript start site. If no CDS is     |             |             |              |
-|                                     | defined, it defaults to 0.                                |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| snowy_blast_score                   | Metric that indicates how good a hit is compared to the   | External    | float       | False        |
-|                                     | competition, in terms of BLAST similarities. As in        |             |             |              |
-|                                     | SnowyOwl, the score for each hit is calculated by taking  |             |             |              |
-|                                     | the coverage of the target and dividing it by (2 *        |             |             |              |
-|                                     | len(self.blast_hits)). IMPORTANT: when splitting          |             |             |              |
-|                                     | transcripts by ORF, a blast hit is added to the new       |             |             |              |
-|                                     | transcript only if it is contained within the new         |             |             |              |
-|                                     | transcript. This WILL screw up a bit the homology score.  |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| source_score                        | This metric returns a score that is assigned to the       | External    | int         | False        |
-|                                     | transcript in virtue of its origin.                       |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| start_distance_from_tss             | This property returns the distance of the start of the    | CDS         | int         | False        |
-|                                     | combined CDS from the transcript start site. If no CDS is |             |             |              |
-|                                     | defined, it defaults to 0.                                |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| suspicious_splicing                 | This metric will return True if the transcript either has | Intron      | bool        | False        |
-|                                     | canonical introns on both strands (probably a chimeric    |             |             |              |
-|                                     | artifact between two neighbouring loci, or if it has no   |             |             |              |
-|                                     | canonical splicing event but it would if it were assigned |             |             |              |
-|                                     | to the opposite strand (probably a strand misassignment   |             |             |              |
-|                                     | on the part of the assembler/predictor).                  |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| three_utr_length                    | Returns the length of the 5' UTR of the selected ORF.     |             | int         | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| three_utr_num                       | This property returns the number of 3' UTR segments       | UTR         | int         | False        |
-|                                     | (referred to the selected ORF).                           |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| three_utr_num_complete              | This property returns the number of 3' UTR segments for   | UTR         | int         | False        |
-|                                     | the selected ORF, considering only those which are        |             |             |              |
-|                                     | complete exons.                                           |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| utr_fraction                        | This property calculates the length of the UTR of the     | UTR         | float       | True         |
-|                                     | selected ORF vs. the cDNA length.                         |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| utr_length                          | Returns the sum of the 5'+3' UTR lengths                  | UTR         | int         | False        |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| utr_num                             | Returns the number of UTR segments (referred to the       | UTR         | int         | False        |
-|                                     | selected ORF).                                            |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| utr_num_complete                    | Returns the number of UTR segments which are complete     | UTR         | int         | False        |
-|                                     | exons (referred to the selected ORF).                     |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-| verified_introns_num                | This metric returns the number of introns of the          | External    | int         | False        |
-|                                     | transcript which are validated by external data.          |             |             |              |
-+-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
-
-
-.. _external-metrics:
-
-External metrics
-~~~~~~~~~~~~~~~~
-
-Starting from version 1 beta 8, Mikado allows to load external metrics into the database, to be used for evaluating transcripts. Metrics of this kind **must** have a value comprised between 0 and 1.
-The file can be provided either by specifying it in the :ref:`coonfiguration file <conf_anatomy>`, under "serialise/files/external_scores", or on the command line with the "--external-scores" parameters to mikado :ref:`serialise <serialise>`.
-The external scores file should have the following format:
-
-+--------------------------+------------------+------------------+-------+-----------------+
-| TID                      | Metric_one       | Metric_two       | ...   |  Metric_N       |
-+==========================+==================+==================+=======+=================+
-| Transcript_one           | value            | value            |       | value           |
-+--------------------------+------------------+------------------+-------+-----------------+
-| Transcript_two           | value            | value            |       | value           |
-+--------------------------+------------------+------------------+-------+-----------------+
-| ...                      | ...              | ...              |       | ...             |
-+--------------------------+------------------+------------------+-------+-----------------+
-| Transcript_N             | value            | value            |       | value           |
-+--------------------------+------------------+------------------+-------+-----------------+
-
-
-Please note the following:
-
-* the header is mandatory.
-* the metric names at the head of the table should **not** contain any space or spcecial characters, apart from the underscore (_)
-* the header provides the name of the metric as will be seen by Mikado. As such, it is advised to choose sensible and informative names (e.g. "fraction_covered") rather than uninformative ones (e.g. the "metric_one" from above)
-* Column names **must be unique**.
-* The transcript names present in the first column **must** be present in the FASTA file.
-* The table should be tab-separated.
-* Values can be of any numerical or boolean type. However, only values that are determined **at serialisation** to be comprised within 0 and 1 (inclusive) can be used as raw values.
-
-A proper way of generating and using external scores would, therefore, be the following:
-
-* Run Mikado prepare on the input dataset.
-* Run all necessary supplementary analyses (ORF calling and/or homology analysis through DIAMOND or BLAST).
-* Run supplementary analyses to assess the transcripts, e.g. expression analysis. Normalise results so that they can be expressed with values between 0 and 1.
-
-  * Please note that boolean results (e.g. presence or absence) can be expressed with 0 and 1 intead of "False" and "True". Customarily, in Python 0 stands for False and 1 for True, but you can choose to switch the order if you so desire.
-* Aggregate all results in a text table, like the one above, tab separated.
-* Call mikado serialise, specifying the location of this table either through the configuration file or on the command line invocation.
-
-Given the open ended nature of the external scores, the Daijin pipeline currently does not offer any system to generate these scores. This might change in the future.
-
-Adding external scores to the scoring file
-------------------------------------------
-
-Once the external metrics have been properly loaded, it is necessary to tell Mikado how to use them. This requires :ref:`modifying the scoring file itself <configure-scoring-tutorial>`. The header that we used in the table above does provide the names of the metrics as they will be seen by Mikado.
-
-Let us say that we have performed an expression analysis on our transcripts, and we have created and loaded the following three metrics:
-
-* "fraction_covered", ie the percentage of the transcript covered by at least X reads (where X is decided by the experimenter)
-* "samples_expressed", ie the percentage of samples where the transcript was expressed over a certain threshold (e.g. 1 TPM)
-* "has_coverage_gaps", ie a boolean metrics that indicates whether there are windows *within* the transcript that lowly or not at all covered (e.g. a 100bp stretch with no coverage between two highly covered regions, indicating a possilble intron retention or chimera). For this example, a value of "0" indicates that there no coverage gaps (ie. it is *False* that there are gaps), "1" otherwise (it is *True* that there are coverage gaps).
-
-We can now use these metrics as normal, by invoking them as "external." followed by the name of the metrics: e.g., "external.fraction_covered".
-So for example, if we wished to prioritise transcripts that are expressed in the highest number of samples and are completely covered by RNASeq data upon reads realignment, under "scoring", we can add the following:
-
-.. code-block:: yaml
-
-    scoring:
-        # [ ... other metrics ... ]
-        - external.samples_expressed: {rescaling: max}
-        - external.fraction_covered: {rescaling: max}
-
-And if we wanted to consider any primary transcript with coverage gaps as a potential fragment, under the "fragmentary" section we could do so:
-
-.. code-block:: yaml
-
-    not_fragmentary:
-      expression:
-        # other metrics ..
-        - and (external.has_coverage_gaps)
-        # Finished expression
-      parameters:
-        # other metrics ...
-	external.has_coverage_gaps: {operator: eq, value: 0}  # Please note, again, that here "0" means "no coverage gaps detected".
-	# other metrics ... 
-	
-As external metrics allow Mikado to accept any arbitrary metric for each transcript, they allow the program to assess transcripts in any way the experimenter desires. However, currently we do not provide any way of automating the process.
-
-.. note:: also for external metrics, it is necessary to add a suffix to them if they are invoked more than once in an expression (see the :ref:`tutorial <scoring-tutorial-first-reqs>`). An invocation of e.g. "external.samples_expressed.mono" and "external.samples_expressed.multi", to distinguish between monoexonic and multiexonic transcripts, would be perfectly valid and actually *required* by Mikado. Notice the double use of the dot (".") as separator. Its usage as such is the reason that it cannot be present in the name of the metric itself (so, for example, "has.coverage.gaps" would be an invalid metric name).
+Details on the structure of scoring files can be found :ref:`in a dedicated section <scoring_files>`; we also provide a tutorial on :ref:`how to create your own scoring file <configure-scoring-tutorial>`.
 
+.. important:: The scoring algorithm is dependent on the other transcripts in the locus, so each score should not be taken as an *absolute* measure of the reliability of a transcript, but rather as a measure of its **relative goodness compared with the alternatives**. Shifting a transcript from one locus to another can have dramatic effects on the scoring of a transcript, even while most or all of the underlying metric values remain unchanged. This is why the score assigned to each transcript changes throughout the Mikado run, as transcripts are moved to subloci, monoloci and finally loci.
 
 .. _padding:
 
@@ -679,7 +163,8 @@ of missing exons from neighbouring data. The procedure is similar to the one emp
 1. A transcript can function as **template** for a candidate if:
 
   - the candidate's terminal exon falls within an **exon** of the template
-  - the extension would enlarge the candidate by at most *"ts_distance"* basepairs (not including introns), default **1000** bps
+  - the extension would enlarge the candidate by at most *"ts_distance"* basepairs (not including introns), default
+    **2000** bps
   - the extension would add at most *"ts_max_splices"* splice sites to the candidate, default **2**.
 2. A graph of possible extensions is built for both the 5' end and the 3' end of the locus.
    Transcripts are then divided in extension groups, starting with the outmost (ie the potential **template** for the group). Links that would cause chains
diff --git a/docs/Library/Mikado.daijin.rst b/docs/Library/Mikado.daijin.rst
index e4f32dc1e..f6feb53cc 100644
--- a/docs/Library/Mikado.daijin.rst
+++ b/docs/Library/Mikado.daijin.rst
@@ -1,6 +1,15 @@
 Mikado.daijin package
 =====================
 
+Submodules
+----------
+
+.. toctree::
+   :maxdepth: 4
+
+   Mikado.daijin.assemble
+   Mikado.daijin.mikado
+
 Module contents
 ---------------
 
diff --git a/docs/Library/Mikado.parsers.rst b/docs/Library/Mikado.parsers.rst
index 40fa0f14d..c3cba9bc2 100644
--- a/docs/Library/Mikado.parsers.rst
+++ b/docs/Library/Mikado.parsers.rst
@@ -13,6 +13,7 @@ Submodules
    Mikado.parsers.bed12
    Mikado.parsers.blast_utils
    Mikado.parsers.gfannotation
+   Mikado.parsers.parser
 
 Module contents
 ---------------
diff --git a/docs/Library/Mikado.scales.rst b/docs/Library/Mikado.scales.rst
index e3aefd2ee..919c6a4aa 100644
--- a/docs/Library/Mikado.scales.rst
+++ b/docs/Library/Mikado.scales.rst
@@ -22,6 +22,8 @@ Submodules
    Mikado.scales.class_codes
    Mikado.scales.compare
    Mikado.scales.contrast
+   Mikado.scales.contrast
+   Mikado.scales.contrast
    Mikado.scales.resultstorer
 
 Module contents
diff --git a/docs/Library/Mikado.serializers.blast_serializer.rst b/docs/Library/Mikado.serializers.blast_serializer.rst
index 5bcec5af8..10d241c4f 100644
--- a/docs/Library/Mikado.serializers.blast_serializer.rst
+++ b/docs/Library/Mikado.serializers.blast_serializer.rst
@@ -7,9 +7,13 @@ Submodules
 .. toctree::
    :maxdepth: 4
 
+   Mikado.serializers.blast_serializer.aln_string_parser
+   Mikado.serializers.blast_serializer.aln_string_parser
    Mikado.serializers.blast_serializer.aln_string_parser
    Mikado.serializers.blast_serializer.blast_serialiser
    Mikado.serializers.blast_serializer.btop_parser
+   Mikado.serializers.blast_serializer.btop_parser
+   Mikado.serializers.blast_serializer.btop_parser
    Mikado.serializers.blast_serializer.hit
    Mikado.serializers.blast_serializer.hsp
    Mikado.serializers.blast_serializer.query
diff --git a/docs/Library/Mikado.subprograms.util.rst b/docs/Library/Mikado.subprograms.util.rst
index 0759b3b1a..6f7faccd9 100644
--- a/docs/Library/Mikado.subprograms.util.rst
+++ b/docs/Library/Mikado.subprograms.util.rst
@@ -9,6 +9,7 @@ Submodules
 
    Mikado.subprograms.util.awk_gtf
    Mikado.subprograms.util.class_codes
+   Mikado.subprograms.util.collapse_compare_stats
    Mikado.subprograms.util.convert
    Mikado.subprograms.util.grep
    Mikado.subprograms.util.metrics
diff --git a/docs/Library/Mikado.transcripts.rst b/docs/Library/Mikado.transcripts.rst
index 604f4a730..8407dd2d6 100644
--- a/docs/Library/Mikado.transcripts.rst
+++ b/docs/Library/Mikado.transcripts.rst
@@ -15,6 +15,7 @@ Submodules
 .. toctree::
    :maxdepth: 4
 
+   Mikado.transcripts.pad
    Mikado.transcripts.reference_gene
    Mikado.transcripts.transcript
    Mikado.transcripts.transcriptchecker
diff --git a/docs/Library/Mikado.utilities.rst b/docs/Library/Mikado.utilities.rst
index 8ea346127..6c9ac6c49 100644
--- a/docs/Library/Mikado.utilities.rst
+++ b/docs/Library/Mikado.utilities.rst
@@ -9,11 +9,17 @@ Submodules
 
    Mikado.utilities.dbutils
    Mikado.utilities.f1
+   Mikado.utilities.f1
+   Mikado.utilities.f1
    Mikado.utilities.file_type
    Mikado.utilities.intervaltree
+   Mikado.utilities.intervaltree
+   Mikado.utilities.intervaltree
    Mikado.utilities.log_utils
    Mikado.utilities.namespace
    Mikado.utilities.overlap
+   Mikado.utilities.overlap
+   Mikado.utilities.overlap
 
 Module contents
 ---------------
diff --git a/docs/References.rst b/docs/References.rst
index 97b8c6141..1ccc8e5be 100644
--- a/docs/References.rst
+++ b/docs/References.rst
@@ -44,4 +44,10 @@ References
 .. [PYinterval]  https://github.com/chaimleib/intervaltree
 .. [BXPython]  https://bitbucket.org/james_taylor/bx-python/overview
 .. [Snakeviz]  https://jiffyclub.github.io/snakeviz/
-.. [PASA]  **Improving the Arabidopsis genome annotation using maximal transcript alignment assemblies** Haas, B.J., Delcher, A.L., Mount, S.M., Wortman, J.R., Smith Jr, R.K., Jr., Hannick, L.I., Maiti, R., Ronning, C.M., Rusch, D.B., Town, C.D. et al. *Nucleic Acids Res*, 2003, 31, 5654-5666. doi:10.1093/nar/gkg770
\ No newline at end of file
+.. [PASA]  **Improving the Arabidopsis genome annotation using maximal transcript alignment assemblies** Haas, B.J., Delcher, A.L., Mount, S.M., Wortman, J.R., Smith Jr, R.K., Jr., Hannick, L.I., Maiti, R., Ronning, C.M., Rusch, D.B., Town, C.D. et al. *Nucleic Acids Res*, 2003, 31, 5654-5666. doi:10.1093/nar/gkg770
+.. [GffRead] **GFF Utilities: GffRead and GffCompare** Pertea, G. and Pertea, M. *F1000*, 2020, 9, ISCB Comm J-304. doi:10.12688/f1000research.23297.2
+.. [Prodigal] **Prodigal: prokaryotic gene recognition and translation initiation site identification.** Hyatt, D.,
+   Chen, GL., LoCascio, P.F. et al. BMC Bioinformatics 11, 119 (2010). doi:10.1186/1471-2105-11-119
+.. [TransDecoder] https://github.com/TransDecoder/TransDecoder/wiki
+.. [Pandas] **pandas-dev/pandas: Pandas**, The pandas development team. Zenodo, March 2021. doi:10.5281/zenodo.4572994
+.. [PySAM] https://github.com/pysam-developers/pysam
\ No newline at end of file
diff --git a/docs/Scoring_files.rst b/docs/Scoring_files.rst
new file mode 100644
index 000000000..245a1d7a1
--- /dev/null
+++ b/docs/Scoring_files.rst
@@ -0,0 +1,633 @@
+.. _scoring_files:
+
+Scoring files
+=============
+
+Mikado employs user-defined configuration files to define the desirable features in genes. These files are in TOML, YAML or JSON format (default YAML) and are composed of five sections:
+
+  #. a *requirements* section, specifying the minimum requirements that a transcript must satisfy to be considered as valid. **Any transcript failing these requirements will be scored at 0 and purged.**
+  #. a *cds_requirements* section, specifying minimal conditions for a transcript to retain its CDS. If a transcript fails this check, it will be stripped of its coding component. If this section is not provided, the default will be to copy the *requirements* section above (in practice disabling it).
+  #. a *not_fragmentary* section, specifying the minimum requirements that the primary transcript of a locus has to satisfy in order for the locus **not** to be considered as a putative fragment.
+  #. an *as_requirements* section, which specifies the minimum requirements for transcripts for them to be considered as possible valid alternative splicing events.
+  #. a *scoring* section, specifying which features Mikado should look for in transcripts, and how each of them will be weighted.
+
+Conditions are specified using a strict set of :ref:`available operators <operators>` and the values they have to consider.
+
+.. important:: Although at the moment Mikado does not offer any method to establish machine-learning based scoring configurations, it is a topic we plan to investigate in the future. Mikado already supports `Random Forest Regressors as scorers through Scikit-learn <http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html>`_, but we have yet to devise a proper way to create such regressors.
+
+We provide a guide on how to write your own scoring files in a :ref:`separate tutorial <configure-scoring-tutorial>`.
+
+.. _operators:
+
+Operators
+~~~~~~~~~
+
+Mikado allows the following operators to express a relationship inside the scoring files:
+
+* *eq*: equal to (:math:`=`). Valid for comparisons with numbers, boolean values, and strings.
+* *ne*: different from (:math:`\neq`). Valid for comparisons with numbers, boolean values, and strings.
+* *lt*: less than (:math:`<`). Valid for comparisons with numbers.
+* *gt*: greater than (:math:`>`). Valid for comparisons with numbers.
+* *le*: less or equal than (:math:`\le`). Valid for comparisons with numbers.
+* *ge*: greater or equal than (:math:`\ge`). Valid for comparisons with numbers.
+* *in*: member of (:math:`\in`). Valid for comparisons with arrays or sets.
+* *not in*: not member of (:math:`\notin`). Valid for comparisons with arrays or sets.
+* *within*: value comprised in the range of the two values, inclusive.
+* *not within*: value *not* comprised in the range of the two values, inclusive.
+
+Mikado will fail if an operator not present on this list is specified, or if the operator is assigned to compare against the wrong data type (eg. *eq* with an array).
+
+.. _requirements-section:
+
+The "requirements", "cds_requirements", "as_requirements" and "not_fragmentary" sections
+----------------------------------------------------------------------------------------
+
+These sections specifies the minimum requirements for a transcript at various stages.
+
+* A transcript failing to pass the *requirements* check will be discarded outright (if "purge" is selected) or given a
+  score of 0 otherwise.
+* After passing the *rquirements* section, if the transcript is coding, Mikado will check whether its CDS passes the
+  requirements specified in *cds_requirements*. If the check fails, the transcripts will be **stripped of its CDS**, and
+  will only be considered further as a non-coding transcript. *This check can be used to properly consider transcripts
+  that have a suspicious coding structure - e.g. a single coding exon in a transcript with five or more exons, or a low
+  Coding Potential score coming from a third-party tool*.
+* If a transcript has not been selected as the primary transcript of a locus, it has to pass the *as_requirements* check
+  to be considered as a valid alternative splicing event.
+* Finally, after loci have been defined, the primary transcript of each locus will be checked against the
+  *not_fragmentary* expression. Any locus failing this check will be marked as a potential fragment and, if in the
+  vicinity of other loci, might be purged out of the final output or clearly marked as a fragment (depending on whether
+  the *purge* switch is set to true or false, respectively).
+
+**It is strongly advised to use lenient parameters in the first requirements section**, as failing to do so might result
+in discarding whole loci. Typically, transcripts filtered at this step should be obvious artefacts, eg monoexonic
+transcripts produced by RNA-Seq with a total length lower than the *library* fragment length.
+
+Each of these sections follows the same template, and they are composed by two parts:
+
+* *parameters*: a list of the metrics to be considered. Each metric can be considered multiple times, by suffixing
+  it with a ".<id>" construct (eg cdna_length.*mono* vs. cdna_length.*multi* to distinguish two uses of the cdna_length
+  metric - once for monoexonic and once for multiexonic transcripts). Any parameter which is not a :ref:`valid metric
+  name <Metrics>`, after removal of the suffix, **will cause an error**. Parameters have to specify the following:
+
+    * a *value* that the metric has to be compared against
+    * an *operator* that specifies the target operation. See :ref:`the operators section <operators>`.
+
+* *expression*: a string *array* that will be compiled into a valid boolean expression. All the metrics present in the
+  expression string **must be present in the parameters section**. If an unrecognized metric is present, Mikado will
+  crash immediately, complaining that the scoring file is invalid. Apart from brackets, Mikado accepts only the
+  following boolean operators to chain the metrics:
+
+    * *and*
+    * *or*
+    * *not*
+    * *xor*
+
+.. hint:: if no *expression* is specified, Mikado will construct one by chaining all the provided parameters with and
+   *and* operator. Most of the time, this would result in an unexpected behaviour - ie Mikado assigning a score of 0 to
+   most transcripts. It is **strongly advised** to explicitly provide a valid expression.
+
+As an example, the following snippet replicates a typical requirements section found in a scoring file:
+
+.. code-block:: yaml
+
+    requirements:
+      expression: [((exon_num.multi and cdna_length.multi and max_intron_length and min_intron_length), or,
+        (exon_num.mono and cdna_length.mono))]
+      parameters:
+        cdna_length.mono: {operator: gt, value: 50}
+        cdna_length.multi: {operator: ge, value: 100}
+        exon_num.mono: {operator: eq, value: 1}
+        exon_num.multi: {operator: gt, value: 1}
+        max_intron_length: {operator: le, value: 20000}
+        min_intron_length: {operator: ge, value: 5}
+
+In the parameters section, we ask for the following:
+
+        * *exon_num.mono*: monoexonic transcripts must have one exon ("eq")
+        * *exon_num.multi*: multiexonic transcripts must have more than one exon ("gt")
+        * *cdna_length.mono*: monoexonic transcripts must have a length greater than 50 bps (the ".mono" suffix is
+          arbitrary, as long as it is unique for all calls of *cdna_length*)
+        * *cdna_length.multi*: multiexonic transcripts must have a length greater than or equal to 100 bps (the ".multi"
+          suffix is arbitrary, as long as it is unique for all calls of *cdna_length*)
+        * *max_intron_length*: multiexonic transcripts should not have any intron longer than 200,000 bps.
+        * *min_intron_length*: multiexonic transcripts should not have any intron smaller than 5 bps.
+
+The *expression* field will be compiled into the following expression::
+
+        (exon_num > 1 and cdna_length >= 100 and max_intron_length <= 200000 and min_intron_length >= 5) or (exon_num == 1 and cdna_length > 50)
+
+
+Any transcript for which the expression evaluates to ``false`` will be assigned a score of 0 outright and discarded,
+unless the user has chosen to disable the purging of such transcripts.
+
+.. _scoring-section:
+
+The scoring section
+~~~~~~~~~~~~~~~~~~~
+
+This section specifies which metrics will be used by Mikado to score the transcripts. Each metric to be used is
+specified as a subsection of the configuration, and will have the following attributes:
+
+* *rescaling*: the only compulsory attribute. It specifies the kind of scoring that will be applied to the metric, and
+  it has to be one of "max", "min", or "target". See :ref:`the explanation on the scoring algorithm <scoring_algorithm>`
+  for details.
+* *value*: compulsory if the chosen rescaling algorithm is "target". This should be either a number or a boolean value.
+* *multiplier*: the weight assigned to the metric in terms of scoring. This parameter is optional; if absent, as it is
+  in the majority of cases, Mikado will consider the multiplier to equal to 1. This is the :math:`w_{m}` element in the
+  :ref:`equations above <scoring_algorithm>`.
+* *filter*: It is possible to specify a filter which the metric has to fulfill to be considered for scoring, eg,
+  "cdna_length >= 200". If the transcript fails to pass this filter, the score *for this metric only* will be set to 0.
+  **The filter can apply to a different metric**, so it is possible to e.g. assign a score of 0 for, say, "combined_cds"
+  to any transcript whose "cdna_length" is, say, below 150 bps.
+  A "filter" subsection has to specify the following:
+
+    * *operator*: the operator to apply for the boolean expression. See the :ref:`relative section <operators>`.
+    * *value*: value that will be used to assess the metric.
+    * *metric*: *optional*. The metric that this filter refers to. If omitted, this will be identical to the metric
+      under examination.
+
+.. hint:: the purpose of the *filter* section is to allow for fine-tuning of the scoring mechanism; ie it allows to
+          penalise transcripts with undesirable qualities (eg a possible retained intron) without discarding them
+          outright. As such, it is a less harsh version of the :ref:`requirements section <requirements-section>` and
+          it is the preferred way of specifying which transcript features Mikado should be wary of.
+
+For example, this is a snippet of a scoring section:
+
+.. code-block:: yaml
+
+    scoring:
+        blast_score: {rescaling: max}
+        cds_not_maximal: {rescaling: min}
+        combined_cds_fraction: {rescaling: target, value: 0.8, multiplier: 2}
+        five_utr_length:
+            filter: {operator: le, value: 2500}
+            rescaling: target
+            value: 100
+        end_distance_from_junction:
+            filter: {operator: lt, value: 55}
+            rescaling: min
+        non_verified_introns_num:
+            rescaling: max
+            multiplier: -10
+            filter:
+                operator: gt
+                value: 1
+                metric: exons_num
+
+
+Using this snippet as a guide, Mikado will score transcripts in each locus as follows:
+
+* Assign a full score (one point, as no multiplier is specified) to transcripts which have the greatest *blast_score*
+* Assign a full score (one point, as no multiplier is specified) to transcripts which have the lowest amount of CDS
+  bases in secondary ORFs (*cds_not_maximal*)
+* Assign a full score (**two points**, as a multiplier of 2 is specified) to transcripts that have a total amount of CDS
+  bps approximating 80% of the transcript cDNA length (*combined_cds_fraction*)
+* Assign a full score (one point, as no multiplier is specified) to transcripts that have a 5' UTR whose length is
+  nearest to 100 bps (*five_utr_length*); if the 5' UTR is longer than 2,500 bps, this score will be 0
+  (see the filter section)
+* Assign a full score (one point, as no multiplier is specified) to transcripts which have the lowest distance between
+  the CDS end and the most downstream exon-exon junction (*end_distance_from_junction*). If such a distance is greater
+  than 55 bps, assign a score of 0, as it is a probable target for NMD (see the filter section).
+* Assign a maximum penalty (**minus 10 points**, as a **negative** multiplier is specified) to the transcript with the
+  highest number of non-verified introns in the locus.
+
+  * Again, we are using a "filter" section to define which transcripts will be exempted from this scoring
+    (in this case, a penalty)
+  * However, please note that we are using the keyword **metric** in this section. This tells Mikado to check a
+    *different* metric for evaluating the filter. Nominally, in this case we are excluding from the penalty any
+    *monoexonic* transcript. This makes sense as a monoexonic transcript will never have an intron to be confirmed to
+    start with.
+
+.. note:: The possibility of using different metrics for the "filter" section is present from Mikado 1.3 onwards.
+
+.. _Metrics:
+
+Metrics
+~~~~~~~
+
+These are all the metrics available to quantify transcripts. The documentation for this section has been generated using
+the :ref:`metrics utility <metrics-command>`.
+
+Metrics belong to one of the following categories:
+
+* **Descriptive**: these metrics merely provide a description of the transcript (eg its ID) and are not used for scoring.
+
+* **cDNA**: these metrics refer to basic features of any transcript such as its number of exons, its cDNA length, etc.
+
+* **Intron**: these metrics refer to features related to the number of introns and their lengths.
+
+* **CDS**: these metrics refer to features related to the CDS assigned to the transcript.
+
+* **UTR**: these metrics refer to features related to the UTR of the transcript. In the case in which a transcript has
+  been assigned multiple ORFs, unless otherwise stated the UTR metrics will be derived only considering the *selected*
+  ORF, not the combination of all of them.
+
+* **Locus**: these metrics refer to features of the transcript in relationship to all other transcripts in its locus, eg
+  how many of the introns present in the locus are present in the transcript. These metrics are calculated by Mikado during the picking phase, and as such their value can vary during the different stages as the transcripts are shifted to different groups.
+
+* **External**: these metrics are derived from accessory data that is recovered for the transcript during the run time.
+  Examples include data regarding the number of introns confirmed by external programs such as Portcullis, or the BLAST
+  score of the best hits.
+
+* **Attributes**: these metrics are extracted at runtime from attributes present in the input files. An example of this
+  could be the TPM or FPKM values assigned to transcripts by rna expression analysis software.
+
+.. hint:: Starting from version 1 beta8, Mikado allows to use externally defined metrics for the transcripts. These can
+          be accessed using the keyword "external.<name of the metrics>" within the *configuration* file. See the
+          :ref:`relevant section <external-metrics>` for details.
+
+.. hint:: Starting from version 2, Mikado allows to use attribute defined metrics for the transcripts. These can be
+          accessed using the keyword "attributes.<name of the metric>" within the *scoring* file. See the
+          :ref:`relevant section <attributes-metrics>` for details.
+
+.. important:: Starting from Mikado 1 beta 8, it is possible to use metrics with values between 0 and 1 directly as
+               scores, without rescaling. From Mikado 2, it is also possible to treat values that are between 0 and 100
+               in the same way; Mikado will convert them automatically to be between 0 and 1, internally.
+               It is also possible to instruct Mikado, during scoring, to use certain values as percentages by adding the
+               :code:`percentage = True` for the scoring parameter.
+
+.. topic:: Available metrics
+
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| Metric name                         | Description                                               | Category    | Data type   | Usable raw   |
++=====================================+===========================================================+=============+=============+==============+
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| tid                                 | ID of the transcript - cannot be an undefined value.      | Descriptive | str         | False        |
+|                                     | Alias of id.                                              |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| parent                              | Name of the parent feature of the transcript.             | Descriptive | str         | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| score                               | Numerical value which summarizes the reliability of the   | Descriptive | str         | False        |
+|                                     | transcript.                                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| external_scores                     | **SPECIAL** this Namespace contains all the information   | External    | Namespace   | True         |
+|                                     | regarding external scores for the transcript. If an       |             |             |              |
+|                                     | absent property is not defined in the Namespace, Mikado   |             |             |              |
+|                                     | will set a default value of 0 into the Namespace and      |             |             |              |
+|                                     | return it.                                                |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| alias                               | This property returns the alias of the transcript, if     | Descriptive | str         | False        |
+|                                     | present, else its ID                                      |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| best_bits                           | Metric that returns the best BitS associated with the     | External    | float       | False        |
+|                                     | transcript.                                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| blast_identity                      | This metric will return the alignment identity for the    | External    | float       | True         |
+|                                     | best BLAST hit according to the evalue. If no BLAST hits  |             |             |              |
+|                                     | are available for the sequence, it will return 0.         |             |             |              |
+|                                     | :return: :return:                                         |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| blast_query_coverage                | This metric will return the **query** coverage for the    | External    | float       | True         |
+|                                     | best BLAST hit according to the evalue. If no BLAST hits  |             |             |              |
+|                                     | are available for the sequence, it will return 0.         |             |             |              |
+|                                     | :return:                                                  |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| blast_score                         | Interchangeable alias for testing different blast-related | External    | float       | False        |
+|                                     | scores. Current: best bit score.                          |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| blast_target_coverage               | This metric will return the **target** coverage for the   | External    | float       | True         |
+|                                     | best BLAST hit according to the evalue. If no BLAST hits  |             |             |              |
+|                                     | are available for the sequence, it will return 0.         |             |             |              |
+|                                     | :return: :return:                                         |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| canonical_intron_proportion         | This metric returns the proportion of canonical introns   | Intron      | float       | True         |
+|                                     | of the transcript on its total number of introns.         |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| cdna_length                         | This property returns the length of the transcript.       | cDNA        | int         | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| cds_disrupted_by_ri                 | This property describes whether the CDS is interrupted    | Locus       | bool        | True         |
+|                                     | within a retained intron.                                 |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| cds_not_maximal                     | This property returns the length of the CDS excluded from | CDS         | int         | False        |
+|                                     | the selected ORF.                                         |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| cds_not_maximal_fraction            | This property returns the fraction of bases not in the    | CDS         | float       | True         |
+|                                     | selected ORF compared to the total number of CDS bases in |             |             |              |
+|                                     | the cDNA.                                                 |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| combined_cds_fraction               | This property return the percentage of the CDS part of    | CDS         | float       | True         |
+|                                     | the transcript vs. the cDNA length                        |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| combined_cds_intron_fraction        | This property returns the fraction of CDS introns of the  | Locus       | float       | True         |
+|                                     | transcript vs. the total number of CDS introns in the     |             |             |              |
+|                                     | Locus. If the transcript is by itself, it returns 1.      |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| combined_cds_length                 | This property return the length of the CDS part of the    | CDS         | int         | False        |
+|                                     | transcript.                                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| combined_cds_locus_fraction         | This metric returns the fraction of CDS bases of the      | Locus       | float       | True         |
+|                                     | transcript vs. the total of CDS bases in the locus.       |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| combined_cds_num                    | This property returns the number of non-overlapping CDS   | CDS         | int         | False        |
+|                                     | segments in the transcript.                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| combined_cds_num_fraction           | This property returns the fraction of non-overlapping CDS | CDS         | float       | True         |
+|                                     | segments in the transcript vs. the total number of exons  |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| combined_utr_fraction               | This property returns the fraction of the cDNA which is   | UTR         | float       | True         |
+|                                     | not coding according to any ORF. Complement of            |             |             |              |
+|                                     | combined_cds_fraction                                     |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| combined_utr_length                 | This property return the length of the UTR part of the    | UTR         | int         | False        |
+|                                     | transcript.                                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| end_distance_from_junction          | This metric returns the cDNA distance between the stop    | CDS         | int         | False        |
+|                                     | codon and the last junction in the transcript. In many    |             |             |              |
+|                                     | eukaryotes, this distance cannot exceed 50-55 bps         |             |             |              |
+|                                     | otherwise the transcript becomes a target of NMD. If the  |             |             |              |
+|                                     | transcript is not coding or there is no junction          |             |             |              |
+|                                     | downstream of the stop codon, the metric returns 0. This  |             |             |              |
+|                                     | metric considers the combined CDS end.                    |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| end_distance_from_tes               | This property returns the distance of the end of the      | CDS         | int         | False        |
+|                                     | combined CDS from the transcript end site. If no CDS is   |             |             |              |
+|                                     | defined, it defaults to 0.                                |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| exon_fraction                       | This property returns the fraction of exons of the        | Locus       | float       | True         |
+|                                     | transcript which are contained in the sublocus. If the    |             |             |              |
+|                                     | transcript is by itself, it returns 1. Set from outside.  |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| exon_num                            | This property returns the number of exons of the          | cDNA        | int         | False        |
+|                                     | transcript.                                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| five_utr_length                     | Returns the length of the 5' UTR of the selected ORF.     | UTR         | float       | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| five_utr_num                        | This property returns the number of 5' UTR segments for   | UTR         | int         | False        |
+|                                     | the selected ORF.                                         |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| five_utr_num_complete               | This property returns the number of 5' UTR segments for   | UTR         | int         | False        |
+|                                     | the selected ORF, considering only those which are        |             |             |              |
+|                                     | complete exons.                                           |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| has_start_codon                     | Boolean. True if the selected ORF has a start codon.      | CDS         | bool        | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| has_stop_codon                      | Boolean. True if the selected ORF has a stop codon.       | CDS         | bool        | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| highest_cds_exon_number             | This property returns the maximum number of CDS segments  | CDS         | int         | False        |
+|                                     | among the ORFs; this number can refer to an ORF           |             |             |              |
+|                                     | *DIFFERENT* from the maximal ORF.                         |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| highest_cds_exons_num               | Returns the number of CDS segments in the selected ORF    | CDS         | int         | False        |
+|                                     | (irrespective of the number of exons involved)            |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| intron_fraction                     | This property returns the fraction of introns of the      | Locus       | float       | True         |
+|                                     | transcript vs. the total number of introns in the Locus.  |             |             |              |
+|                                     | If the transcript is by itself, it returns 1. Set from    |             |             |              |
+|                                     | outside.                                                  |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| is_complete                         | Boolean. True if the selected ORF has both start and end. | CDS         | bool        | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| is_reference                        | Checks whether the transcript has been marked as          | External    | bool        | False        |
+|                                     | reference by Mikado prepare                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| max_exon_length                     | This metric will return the length of the biggest exon in | cDNA        | int         | False        |
+|                                     | the transcript.                                           |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| max_intron_length                   | This property returns the greatest intron length for the  | Intron      | int         | False        |
+|                                     | transcript.                                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| min_exon_length                     | This metric will return the length of the biggest exon in | cDNA        | int         | False        |
+|                                     | the transcript.                                           |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| min_intron_length                   | This property returns the smallest intron length for the  | Intron      | int         | False        |
+|                                     | transcript.                                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| non_verified_introns_num            | This metric returns the number of introns of the          | External    | int         | False        |
+|                                     | transcript which are not validated by external data.      |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| num_introns_greater_than_max        | This metric returns the number of introns greater than    | Intron      | int         | False        |
+|                                     | the maximum acceptable intron size indicated in the       |             |             |              |
+|                                     | constructor.                                              |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| num_introns_smaller_than_min        | This metric returns the number of introns smaller than    | Intron      | int         | False        |
+|                                     | the mininum acceptable intron size indicated in the       |             |             |              |
+|                                     | constructor.                                              |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| number_internal_orfs                | This property returns the number of ORFs inside a         | CDS         | int         | False        |
+|                                     | transcript.                                               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| only_non_canonical_splicing         | This metric will return True if the canonical_number is 0 | Intron      | bool        | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| original_source                     | This property returns the original source assigned to the | Descriptive | str         | False        |
+|                                     | transcript (before Mikado assigns its own final source    |             |             |              |
+|                                     | value).                                                   |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| proportion_verified_introns         | This metric returns, as a fraction, how many of the       | External    | float       | True         |
+|                                     | transcript introns are validated by external data.        |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| proportion_verified_introns_inlocus | This metric returns, as a fraction, how many of the       | Locus       | float       | True         |
+|                                     | verified introns inside the Locus are contained inside    |             |             |              |
+|                                     | the transcript. In loci without *any* verified introns,   |             |             |              |
+|                                     | this metric will be set to 1.                             |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| retained_fraction                   | This property returns the fraction of the cDNA which is   | Locus       | float       | True         |
+|                                     | contained in retained introns.                            |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| retained_intron_num                 | This property records the number of introns in the        | Locus       | int         | False        |
+|                                     | transcripts which are marked as being retained. See the   |             |             |              |
+|                                     | corresponding method in the sublocus class.               |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_cds_exons_fraction         | Returns the fraction of CDS segments in the selected ORF  | CDS         | float       | True         |
+|                                     | (irrespective of the number of exons involved)            |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_cds_fraction               | This property calculates the fraction of the selected CDS | CDS         | float       | True         |
+|                                     | vs. the cDNA length.                                      |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_cds_intron_fraction        | This property returns the fraction of CDS introns of the  | CDS         | float       | True         |
+|                                     | selected ORF of the transcript vs. the total number of    |             |             |              |
+|                                     | CDS introns in the Locus (considering only the selected   |             |             |              |
+|                                     | ORF). If the transcript is by itself, it should return 1. |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_cds_length                 | This property calculates the length of the CDS selected   | CDS         | int         | False        |
+|                                     | as best inside the cDNA.                                  |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_cds_locus_fraction         | This metric returns the fraction of CDS bases of the      | Locus       | float       | True         |
+|                                     | transcript vs. the total of CDS bases in the locus.       |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_cds_num                    | This property calculates the number of CDS exons for the  | CDS         | int         | False        |
+|                                     | selected ORF                                              |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_cds_number_fraction        | This property returns the proportion of best possible CDS | CDS         | float       | False        |
+|                                     | segments vs. the number of exons. See                     |             |             |              |
+|                                     | selected_cds_number.                                      |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_end_distance_from_junction | This metric returns the distance between the stop codon   | CDS         | int         | False        |
+|                                     | and the last junction of the transcript. In many          |             |             |              |
+|                                     | eukaryotes, this distance cannot exceed 50-55 bps,        |             |             |              |
+|                                     | otherwise the transcript becomes a target of NMD. If the  |             |             |              |
+|                                     | transcript is not coding or there is no junction          |             |             |              |
+|                                     | downstream of the stop codon, the metric returns 0.       |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_end_distance_from_tes      | This property returns the distance of the end of the best | CDS         | int         | False        |
+|                                     | CDS from the transcript end site. If no CDS is defined,   |             |             |              |
+|                                     | it defaults to 0.                                         |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| selected_start_distance_from_tss    | This property returns the distance of the start of the    | CDS         | int         | False        |
+|                                     | best CDS from the transcript start site. If no CDS is     |             |             |              |
+|                                     | defined, it defaults to 0.                                |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| snowy_blast_score                   | Metric that indicates how good a hit is compared to the   | External    | float       | False        |
+|                                     | competition, in terms of BLAST similarities. As in        |             |             |              |
+|                                     | SnowyOwl, the score for each hit is calculated by taking  |             |             |              |
+|                                     | the coverage of the target and dividing it by (2 *        |             |             |              |
+|                                     | len(self.blast_hits)). IMPORTANT: when splitting          |             |             |              |
+|                                     | transcripts by ORF, a blast hit is added to the new       |             |             |              |
+|                                     | transcript only if it is contained within the new         |             |             |              |
+|                                     | transcript. This WILL screw up a bit the homology score.  |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| source_score                        | This metric returns a score that is assigned to the       | External    | float       | False        |
+|                                     | transcript in virtue of its origin.                       |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| start_distance_from_tss             | This property returns the distance of the start of the    | CDS         | int         | False        |
+|                                     | combined CDS from the transcript start site. If no CDS is |             |             |              |
+|                                     | defined, it defaults to 0.                                |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| suspicious_splicing                 | This metric will return True if the transcript either has | Intron      | bool        | False        |
+|                                     | canonical introns on both strands (probably a chimeric    |             |             |              |
+|                                     | artifact between two neighbouring loci, or if it has no   |             |             |              |
+|                                     | canonical splicing event but it would if it were assigned |             |             |              |
+|                                     | to the opposite strand (probably a strand misassignment   |             |             |              |
+|                                     | on the part of the assembler/predictor).                  |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| three_utr_length                    | Returns the length of the 5' UTR of the selected ORF.     |             | int         | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| three_utr_num                       | This property returns the number of 3' UTR segments       | UTR         | int         | False        |
+|                                     | (referred to the selected ORF).                           |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| three_utr_num_complete              | This property returns the number of 3' UTR segments for   | UTR         | int         | False        |
+|                                     | the selected ORF, considering only those which are        |             |             |              |
+|                                     | complete exons.                                           |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| utr_fraction                        | This property calculates the length of the UTR of the     | UTR         | float       | True         |
+|                                     | selected ORF vs. the cDNA length.                         |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| utr_length                          | Returns the sum of the 5'+3' UTR lengths                  | UTR         | int         | False        |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| utr_num                             | Returns the number of UTR segments (referred to the       | UTR         | int         | False        |
+|                                     | selected ORF).                                            |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| utr_num_complete                    | Returns the number of UTR segments which are complete     | UTR         | int         | False        |
+|                                     | exons (referred to the selected ORF).                     |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+| verified_introns_num                | This metric returns the number of introns of the          | External    | int         | False        |
+|                                     | transcript which are validated by external data.          |             |             |              |
++-------------------------------------+-----------------------------------------------------------+-------------+-------------+--------------+
+
+
+.. _external-metrics:
+
+External metrics
+~~~~~~~~~~~~~~~~
+
+Starting from version 1 beta 8, Mikado allows to load external metrics into the database, to be used for evaluating transcripts. Metrics of this kind **must** have a value comprised between 0 and 1.
+The file can be provided either by specifying it in the :ref:`coonfiguration file <conf_anatomy>`, under "serialise/files/external_scores", or on the command line with the "--external-scores" parameters to mikado :ref:`serialise <serialise>`.
+The external scores file should have the following format:
+
++--------------------------+------------------+------------------+-------+-----------------+
+| TID                      | Metric_one       | Metric_two       | ...   |  Metric_N       |
++==========================+==================+==================+=======+=================+
+| Transcript_one           | value            | value            |       | value           |
++--------------------------+------------------+------------------+-------+-----------------+
+| Transcript_two           | value            | value            |       | value           |
++--------------------------+------------------+------------------+-------+-----------------+
+| ...                      | ...              | ...              |       | ...             |
++--------------------------+------------------+------------------+-------+-----------------+
+| Transcript_N             | value            | value            |       | value           |
++--------------------------+------------------+------------------+-------+-----------------+
+
+
+Please note the following:
+
+* the header is mandatory.
+* the metric names at the head of the table should **not** contain any space or spcecial characters, apart from the underscore (_)
+* the header provides the name of the metric as will be seen by Mikado. As such, it is advised to choose sensible and informative names (e.g. "fraction_covered") rather than uninformative ones (e.g. the "metric_one" from above)
+* Column names **must be unique**.
+* The transcript names present in the first column **must** be present in the FASTA file.
+* The table should be tab-separated.
+* Values can be of any numerical or boolean type. However, only values that are determined **at serialisation** to be comprised within 0 and 1 (inclusive), or between 0 and 100 (ie *percentages*) can be used as raw values.
+
+A proper way of generating and using external scores would, therefore, be the following:
+
+* Run Mikado prepare on the input dataset.
+* Run all necessary supplementary analyses (ORF calling and/or homology analysis through DIAMOND or BLAST).
+* Run supplementary analyses to assess the transcripts, e.g. expression analysis. Normalise results so that they can be expressed with values between 0 and 1.
+
+  * Please note that boolean results (e.g. presence or absence) can be expressed with 0 and 1 intead of "False" and "True". Customarily, in Python 0 stands for False and 1 for True, but you can choose to switch the order if you so desire.
+* Aggregate all results in a text table, like the one above, tab separated.
+* Call mikado serialise, specifying the location of this table either through the configuration file or on the command line invocation.
+
+Given the open ended nature of the external scores, the Daijin pipeline currently does not offer any system to generate these scores. This might change in the future.
+
+.. note:: our laboratory has implemented a novel pipeline, Minos (:ref:`https://github.com/EI-CoreBioinformatics/minos <https://github.com/EI-CoreBioinformatics/minos>`) for integrating multiple sources of evidence using Mikado. Internally, Minos makes use of the external metrics described here. We recommend having a look at the pipeline for inspiration.
+
+Adding external scores to the scoring file
+------------------------------------------
+
+Once the external metrics have been properly loaded, it is necessary to tell Mikado how to use them. This requires :ref:`modifying the scoring file itself <configure-scoring-tutorial>`. The header that we used in the table above does provide the names of the metrics as they will be seen by Mikado.
+
+Let us say that we have performed an expression analysis on our transcripts, and we have created and loaded the following three metrics:
+
+* "fraction_covered", ie the percentage of the transcript covered by at least X reads (where X is decided by the experimenter)
+* "samples_expressed", ie the percentage of samples where the transcript was expressed over a certain threshold (e.g. 1 TPM)
+* "has_coverage_gaps", ie a boolean metrics that indicates whether there are windows *within* the transcript that lowly or not at all covered (e.g. a 100bp stretch with no coverage between two highly covered regions, indicating a possilble intron retention or chimera). For this example, a value of "0" indicates that there no coverage gaps (ie. it is *False* that there are gaps), "1" otherwise (it is *True* that there are coverage gaps).
+
+We can now use these metrics as normal, by invoking them as "external." followed by the name of the metrics: e.g., "external.fraction_covered".
+So for example, if we wished to prioritise transcripts that are expressed in the highest number of samples and are completely covered by RNASeq data upon reads realignment, under "scoring", we can add the following:
+
+.. code-block:: yaml
+
+    scoring:
+        # [ ... other metrics ... ]
+        - external.samples_expressed: {rescaling: max}
+        - external.fraction_covered: {rescaling: max}
+
+And if we wanted to consider any primary transcript with coverage gaps as a potential fragment, under the "fragmentary" section we could do so:
+
+.. code-block:: yaml
+
+    not_fragmentary:
+      expression:
+        # other metrics ..
+        - and (external.has_coverage_gaps)
+        # Finished expression
+      parameters:
+        # other metrics ...
+	external.has_coverage_gaps: {operator: eq, value: 0}  # Please note, again, that here "0" means "no coverage gaps detected".
+	# other metrics ...
+
+As external metrics allow Mikado to accept any arbitrary metric for each transcript, they allow the program to assess transcripts in any way the experimenter desires. However, currently we do not provide any way of automating the process.
+
+.. note:: also for external metrics, it is necessary to add a suffix to them if they are invoked more than once in an expression (see the :ref:`tutorial <scoring-tutorial-first-reqs>`). An invocation of e.g. "external.samples_expressed.mono" and "external.samples_expressed.multi", to distinguish between monoexonic and multiexonic transcripts, would be perfectly valid and actually *required* by Mikado. Notice the double use of the dot (".") as separator. Its usage as such is the reason that it cannot be present in the name of the metric itself (so, for example, "has.coverage.gaps" would be an invalid metric name).
+
+.. _attributes-metrics:
+
+Attributes metrics
+~~~~~~~~~~~~~~~~~~
+Starting from version 2, Mikado allows the usage of metrics defined in the attributes of the input files, these metrics
+behave as the rest of the metrics but they are gathered at runtime from the input datasets. It is important to note that
+these metrics must be equivalent in all the inputs and are by default initialised to "0" when a transcript does not have
+an attribute defining the metric. The default initialisation value can be overridden in the scoring file.
+
+Attribute metrics along with the required **rescaling** parameter, can define a *rtype* parameter as one of (float, int
+or bool) which will be used to cast the value of the attribute internally, and a *percentage* boolean which indicates
+that the values are in the 0-100 range and enables a transformation to the 0-1 range so that these can be used as 'raw'
+scores (see the :ref:`scoring algorithm section <_scoring_algorithm>`).
+
+An example for the usage of these metrics could be::
+
+        Chr5	Cufflinks	transcript	26581218	26583874	1000	-	.	gene_id "cufflinks_star_at.23551";transcript_id "cufflinks_star_at.23551.1";exon_number "1";FPKM "0.4343609420";conf_hi "0.577851";frac "0.751684";cov "11.982854";conf_lo "0.293994";percentage_score "42.42"
+        Chr5	Cufflinks	exon	26581218	26581528	.	-	.	gene_id "cufflinks_star_at.23551";transcript_id "cufflinks_star_at.23551.1";
+        Chr5	Cufflinks	exon	26583335	26583874	.	-	.	gene_id "cufflinks_star_at.23551";transcript_id "cufflinks_star_at.23551.1";
+
+
+If the scoring file defines:
+
+.. code-block:: yaml
+
+    scoring:
+        # [ ... other metrics ... ]
+        - attributes.FPKM: {rescaling: max}
+        - attributes.frac: {rescaling: max, use_raw: true}
+        - attributes.percentage_score: {rescaling: max, use_raw: true, percentage: true}
+
+The same scoring rules defined previously will apply to metrics obtained from the transcript's attributes.
\ No newline at end of file
diff --git a/docs/Tutorial/Adapting.rst b/docs/Tutorial/Adapting.rst
index 1de0d8fab..f0fb73b09 100644
--- a/docs/Tutorial/Adapting.rst
+++ b/docs/Tutorial/Adapting.rst
@@ -249,7 +249,6 @@ Looking at the scoring section of the file, we do not need to apply anything par
     :class: toggle
 
     .. code-block:: yaml
-       :linenos:
 
         scoring:
           snowy_blast_score: {rescaling: max}
diff --git a/docs/Tutorial/Daijin_tutorial.rst b/docs/Tutorial/Daijin_tutorial.rst
index 5f952e199..07c804105 100644
--- a/docs/Tutorial/Daijin_tutorial.rst
+++ b/docs/Tutorial/Daijin_tutorial.rst
@@ -16,10 +16,7 @@ Tutorial for Daijin
 This tutorial will guide you through the task of configuring and running the whole Daijin pipeline on a *Drosophila melanogaster* dataset comprising two different samples, using one aligner (HISAT) and two assemblers (Stringtie and CLASS2) as chosen methods. A modern desktop computer with a multicore processor and 4GB of RAM or more should suffice to execute the pipeline within two hours.
 
 
-.. warning:: Please note that **development of Daijin Assemble has now been discontinued**.
-	     Daijin will be superseded by a different pipeline manager, which is currently in the works. We will continue actively maintening the
-	     "mikado" part of the pipeline, which is dedicated to run the steps between a set of input transcript assemblies and/or cDNA alignments until
-	     the final Mikado output.
+.. warning:: Please note that **development of Daijin Assemble has now been discontinued**. Daijin will be superseded by a different pipeline manager, which is currently in the works. We will continue the active maintenance the "mikado" part of the pipeline, which is dedicated to run the steps between a set of input transcript assemblies and/or cDNA alignments until the final Mikado output.
 
 Overview
 ~~~~~~~~
@@ -243,11 +240,11 @@ Step 2: running the assemble part
 
 Now that we have created a proper configuration file, it is time to launch Daijin assemble and inspect the results. Issue the command::
 
-    daijin assemble --cores <Number of maximum cores> daijin.yaml
+    daijin assemble --cores <Number of maximum cores> daijin.toml
 
 After checking that the configuration file is valid, Daijin will start the alignment and assembly of the dataset. On a normal desktop computer, this should take less than 2 hours. Before launching the pipeline, you can obtain a graphical representation of the steps with::
 
-  daijin assemble --dag daijin.yaml | dot -Tsvg > assemble.svg
+  daijin assemble --dag daijin.toml | dot -Tsvg > assemble.svg
 
 .. figure:: assemble_pipeline.png
    :scale: 50%
@@ -255,7 +252,7 @@ After checking that the configuration file is valid, Daijin will start the align
 
 You can also ask Daijin to display the steps to be executed, inclusive of their command lines, by issuing the following command::
 
-  daijin assemble --dryrun daijin.yaml
+  daijin assemble --dryrun daijin.toml
 
 When Daijin is finished, have a look inside the folder Dmelanogaster/3-assemblies/output/; you will find the following GTF files:
 
diff --git a/docs/Tutorial/Scoring_tutorial.rst b/docs/Tutorial/Scoring_tutorial.rst
index 5dfdf8191..b741a7c27 100644
--- a/docs/Tutorial/Scoring_tutorial.rst
+++ b/docs/Tutorial/Scoring_tutorial.rst
@@ -15,16 +15,16 @@ expose a general way to create your own configuration file.
 The purpose of scoring files: introductory note
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Mikado is an annotation pipeline, and as such, it does not have, at its core, the mission to represent every possible expressed transcript. Some sequenced transcripts may originate from transcriptional errors, exist only transiently and may have no functional role. When performing a sequencing experiment, we create a “snapshot” of the transient transcriptional state of the cell: inclusive of these erroneous events, along with immature transcripts and artifacts arising from misassemblies, fragmentation, genomic inclusion, etc.
+Mikado is an annotation pipeline, and as such, it does not have, at its core, the mission to represent every possible expressed transcript. Some sequenced transcripts may originate from transcriptional errors, exist only transiently or may have no functional role. When performing a sequencing experiment, we create a “snapshot” of the transient transcriptional state of the cell: inclusive of erroneous events, along with immature transcripts and artifacts arising from misassemblies, fragmentation during library construction, genomic DNA contamination, etc.
 
-In Mikado, we make a choice regarding what transcripts we want to retain in our annotation (as is the case with any genome annotation). For example, as you will see in this tutorial, the standard configuration penalises transcripts with retained CDS introns, and transcripts that are NMD targets. This choice does not mean that we think that these transcripts are artifacts; rather, it signifies that we prioritise those transcripts that are more likely to represent functionally active genes. Annotators will differ on the point: for example, the human reference annotation retains and marks these events, rather than discarding them.
+In Mikado, we make a choice regarding what transcripts we want to retain in our annotation (as is the case with any genome annotation). For example, as you will see in this tutorial, the standard configuration penalises transcripts with retained CDS introns, and transcripts that are NMD targets. This choice does not mean that we think that these transcripts are artifacts; rather, it signifies that we prioritise those transcripts that are more likely to represent functionally active genes. Annotators will differ on this point: for example, the human reference annotation retains and marks these events, rather than discarding them.
 
 Mikado allows the experimenter to make these choices simply and transparently. Our pre-defined configuration files strive to replicate the choices made by annotators over the years, and thus allow to replicate - as much as possible - the reference annotations of various species starting from partial sequencing data. However, if as an experimenter you are interested in a more stringent approach - say, only coding transcripts with a complete ORF and a UTR - or you would like instead to perform only a minimal cleaning up - say, just discarding obvious NMD targets - Mikado will allow you to do so. This tutorial will show you how.
 
 Obtaining pre-defined scoring files
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Mikado comes with pre-packaged scoring files for multiple species. These can be found in the installation directory, under "configuration/scoring_files"; or, when launching mikado configure, you can request to copy a template file to the working directory ("--copy-scoring" command flag). In the rest of the tutorial, however, we will presume that no suitable scoring file exists and that a new one has to be created from scratch.
+Mikado comes with two pre-packaged scoring files, one for plant species as well as another generic for mammalian species. These can be found in the installation directory, under "configuration/scoring_files"; or, when launching mikado configure, you can request to copy a template file to the working directory ("--copy-scoring" command flag). In the rest of the tutorial, however, we will presume that no suitable scoring file exists and that a new one has to be created from scratch.
 
 .. _scoring-tutorial-first-reqs:
 
diff --git a/docs/Tutorial/index.rst b/docs/Tutorial/index.rst
index 61f6b1f46..417427693 100644
--- a/docs/Tutorial/index.rst
+++ b/docs/Tutorial/index.rst
@@ -3,80 +3,110 @@
 Tutorial
 ========
 
-This tutorial will guide you through a simple analysis of Mikado, using a small amount of data coming from an experiment on *Arabidopsis thaliana*. RNA-Seq data was obtained from `study PRJEB7093 on ENA <http://www.ebi.ac.uk/ena/data/view/PRJEB7093>`_, aligned with STAR [STAR]_ against the `TAIR10 <http://www.arabidopsis.org>`_ reference genome, and assembled with four different programs. For this small example, we are going to focus on a small genomic region: Chr5, from 26,575,364 to 26,614,205.
+This tutorial will guide you through a simple analysis of Mikado, using a small amount of data coming from an
+experiment on *Arabidopsis thaliana*. RNA-Seq data was obtained from `study PRJEB7093 on ENA <http://www.ebi.ac
+.uk/ena/data/view/PRJEB7093>`_, aligned with STAR [STAR]_ against the `TAIR10 <http://www.arabidopsis.org>`_
+reference genome, and assembled with four different programs. For this small example, we are going to focus on a
+small genomic region: Chr5, from 26,575,364 to 26,614,205.
 
 During this analysis, you will require the following files:
 
 * :download:`chr5.fas.gz <chr5.fas.gz>`: a FASTA file containing the Chr5 of *A. thaliana*.
-* :download:`reference.gff3`: a GFF3 file with the annotation of the genomic slice we are interested in, for comparison purposes.
-* :download:`junctions.bed <junctions.bed>`: a BED12 file of reliable splicing junctions in the region, identified using Portcullis [Portcullis]_
+* :download:`reference.gff3`: a GFF3 file with the annotation of the genomic slice we are interested in, for
+  comparison purposes.
+* :download:`junctions.bed <junctions.bed>`: a BED12 file of reliable splicing junctions in the region, identified
+  using Portcullis [Portcullis]_
 * :download:`class.gtf`: a GTF file of transcripts assembled using CLASS [Class2]_
 * :download:`cufflinks.gtf`: a GTF file of transcripts assembled using Cufflinks [Cufflinks]_
 * :download:`stringtie.gtf`: a GTF file of transcripts assembled using Stringtie [StringTie]_
 * :download:`trinity.gff3`: a GFF3 file of transcripts assembled using Trinity [Trinity]_ and aligned using GMAP [GMAP]_
 * :download:`orfs.bed`: a BED12 file containing the ORFs of the above transcripts, derived using TransDecoder [Trinity]_
-* :download:`uniprot_sprot_plants.fasta.gz`: a FASTA file containing the plant proteins released with SwissProt [Uniprot]_
+* :download:`uniprot_sprot_plants.fasta.gz`: a FASTA file containing the plant proteins released with SwissProt
+  [Uniprot]_
 
-All of this data can also be found in the ``sample_data`` directory of the `Mikado source <http://www.github.com/EI-CoreBioinformatics/Mikado>`_.
+All of this data can also be found in the ``sample_data`` directory of the `Mikado source <https://www.github
+.com/EI-CoreBioinformatics/Mikado>`_.
 
 You will also require the following software:
 
 * a functioning installation of SQLite.
 * a functioning version of BLAST+ [Blastplus]_.
+* a functioning version of Prodigal [Prodigal]_.
 
 Creating the configuration file for Mikado
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-In the first step, we need to create a configuration file to drive Mikado. To do so, we will first create a tab-delimited file describing our assemblies (class.gtf, cufflinks.gtf, stringtie.gtf, trinity.gff3)::
+In the first step, we need to create a configuration file to drive Mikado. To do so, we will first create a
+tab-delimited file describing our assemblies (class.gtf, cufflinks.gtf, stringtie.gtf, trinity.gff3)::
 
-    class.gtf	cl	True
-    cufflinks.gtf	cuff	True
-    stringtie.gtf	st	True    1
-    trinity.gff3	tr	False   -0.5
+    class.gtf       cl      True            False   False   True
+    cufflinks.gtf   cuff    True            False   False   True
+    stringtie.gtf   st      True    1       False   True    True
+    trinity.gff3    tr      False   -0.5    False   False   True
+    reference.gff3  at      True    5       True    False   False
+    pacbio.bam      pb      True    1       False   False   False
 
-In this file, the three fields define the following:
+In this file, the first three fields define the following:
 
-#. The file location and name (if no folder is specified, Mikado will look for each file in the current working directory)
+#. The file location and name (if no folder is specified, Mikado will look for each file in the current working
+directory)
 #. An alias associated with the file, which has to be unique
 #. A binary flag (``True`` / ``False``) indicating whether the assembly is strand-specific or not
-#. An optional fourth field which defines a score associated with that sample. All transcripts associated with the label will have their score corrected by the value on this field. So eg. in this example all Stringtie models will receive an additional point, and all Trinity models will be penalised by half a point. Class and Cufflinks have no special bonus or malus associated with them.
 
-Then, we will decompress the chromosome FASTA file:
+These fields are then followed by a series of **optional** fields:
 
-.. code-block:: bash
-
-    gunzip chr5.fas.gz  # This will create the file chr5.fas
+#. A score associated with that sample. All transcripts associated with the label will have their score corrected by
+   the value on this field. So eg. in this example all Stringtie models will receive an additional point, and all
+   Trinity models will be penalised by half a point. Class and Cufflinks have no special bonus or malus associated.
+#. A binary flag (``True`` / ``False``) defining whether the sample is a reference or not.
+#. A binary flag (``True`` / ``False``) defining whether to exclude redundant models or not.
+#. A binary flag (``True`` / ``False``) indicating whether Mikado prepare should strip the CDS of faulty models, but
+   otherwise keep their cDNA structure in the final output (``True``) or whether instead it should completely discard
+   such models (``False``).
+#. A binary flag (``True`` / ``False``) instructing Mikado about whether the chimera split routine should be skipped
+   for these models (``True``) or if instead it should proceed normally (``False``).
 
 Finally, we will create the configuration file itself using ``mikado configure``:
 
 .. code-block:: bash
 
-    mikado configure --list list.txt --reference chr5.fas --mode permissive --scoring plants.yaml  --copy-scoring plants.yaml --junctions junctions.bed -bt uniprot_sprot_plants.fasta configuration.yaml
+    mikado configure --list list.txt --reference chr5.fas.gz --mode permissive --scoring plants.yaml  --copy-scoring
+plants.yaml --junctions junctions.bed -bt uniprot_sprot_plants.fasta configuration.yaml
 
-This will create a configuration.yaml file with the parameters that were specified on the command line. This is :ref:`simplified configuration file <conf_anatomy>`, containing all the necessary parameters for the Mikado run. It will also copy the ``plants.yaml`` file from the Mikado installation to your current working directory.
-The parameters we used for the command line instruct Mikado in the following ways:
+This will create a configuration.yaml file with the parameters that were specified on the command line. This is
+:ref:`simplified configuration file <conf_anatomy>`, containing all the necessary parameters for the Mikado run. It
+will also copy the ``plants.yaml`` file from the Mikado installation to your current working directory.
 
-Alternatively, we could use a BGZIP-compressed file as input for ``mikado configure``:
+.. hint:: Mikado can accept compressed genome FASTA files, like in this example, as long as they have been compressed
+          with BGZip rather than the vanilla UNIX GZip.
 
-.. code-block:: bash
-    bgzip chr5.fas
-    samtools faidx chr5.fas.gz
-    mikado configure --list list.txt --reference chr5.fas.gz --mode permissive --scoring plants.yaml  --copy-scoring plants.yaml --junctions junctions.bed -bt uniprot_sprot_plants.fasta configuration.yaml
-
-* *--list list.txt*: this part of the command line instructs Mikado to read the file we just created to understand where the input files are and how to treat them.
+* *--list list.txt*: this part of the command line instructs Mikado to read the file we just created to understand
+  where the input files are and how to treat them.
+* *--scoring*: the scoring file to use. Mikado ships with two pre-calculated scoring files, `plant.yaml` and
+`mammalian.yaml`
+* *--copy-scoring*: instruct Mikado to copy the scoring file from the installation directory to the current
+directory, so that the experimenter can modify it as needed.
 * *--reference chr5.fas*: this part of the command line instructs Mikado on the location of the genome file.
-* *--mode permissive*: the mode in which Mikado will treat cases of chimeras. See the :ref:`documentation <chimera_splitting_algorithm>` for details.
-* *--junctions junctions.bed*: this part of the command line instructs Mikado to consider this file as the source of reliable splicing junctions.
-* *-bt uniprot_sprot_plants.fasta*: this part of the command line instructs Mikado to consider this file as the BLAST database which will be used for deriving homology information.
+* *--mode permissive*: the mode in which Mikado will treat cases of chimeras. See the :ref:`documentation
+  <chimera_splitting_algorithm>` for details.
+* *--junctions junctions.bed*: this part of the command line instructs Mikado to consider this file as the source of
+  reliable splicing junctions.
+* *-bt uniprot_sprot_plants.fasta*: this part of the command line instructs Mikado to consider this file as the BLAST
+  database which will be used for deriving homology information.
 
-.. hint:: The *--copy-scoring* argument is usually not necessary, however, it allows you to easily inspect the :ref:`scoring file <scoring_files>` we are going to use  during this run.
+.. hint:: The *--copy-scoring* argument is usually not necessary, however, it allows you to easily inspect the
+:ref:`scoring file <scoring_files>` we are going to use  during this run.
 
-.. hint:: Mikado provides a handful of pre-configured scoring files for different species. However, we do recommend inspecting and tweaking your scoring file to cater to your species. We provide a guide on how to create your own configuration files :ref:`here <configure-scoring-tutorial>`.
+.. hint:: Mikado provides a handful of pre-configured scoring files for different species. However, we do recommend
+inspecting and tweaking your scoring file to cater to your species. We provide a guide on how to create your own
+configuration files :ref:`here <configure-scoring-tutorial>`.
 
 Mikado prepare
 ~~~~~~~~~~~~~~
 
-The subsequent step involves running ``mikado prepare`` to create a :ref:`sorted, non-redundant GTF with all the input assemblies <prepare>`. As we have already created a configuration file with all the details regarding the input files, this will require us only to issue the command:
+The subsequent step involves running ``mikado prepare`` to create a :ref:`sorted, non-redundant GTF with all the
+input assemblies <prepare>`. As we have already created a configuration file with all the details regarding the input
+files, this will require us only to issue the command:
 
 .. code-block:: bash
 
@@ -117,48 +147,88 @@ To create this file, we will proceed as follows:
 
     .. code-block:: bash
 
-        blastx -max_target_seqs 5 -num_threads 10 -query mikado_prepared.fasta -outfmt 5 -db uniprot_sprot_plants.fasta -evalue 0.000001 2> blast.log | sed '/^$/d' | gzip -c - > mikado.blast.xml.gz
+        blastx -max_target_seqs 5 -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send
+evalue bitscore ppos btop"
+        -num_threads 10 -query mikado_prepared.fasta -db uniprot_sprot_plants.fasta -out mikado_prepared.blast.tsv
+
+This will produce the ``mikado_prepared.blast.tsv`` file, which contains the homology information for the run.
+
+.. warning:: Mikado requires a **custom** tabular file from BLAST, as we rely on the information on extra fields such
+             as e.g. ``btop``. Therefore the custom fields following ``-outfmt 6`` are **not** optional.
+
+ORF calculation for the transcripts
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Many of the metrics used by Mikado to evaluate and rank transcripts rely on the definition or their coding regions
+(CDS). It is therefore *highly recommended* to use an ORF predictor to define the coding regions for each transcript
+identified by `mikado prepare`. We directly support two different products:
+
+* :ref:`Prodigal <https://github.com/hyattpd/Prodigal/>`, a fast ORF predictor, capable of calculating thousands of
+  ORFs in seconds. However, as it was originally developed for ORF calling in bacterial genomes, it may occasionally
+  not provide the best possible answer.
+* :ref:`TransDecoder <https://github.com/TransDecoder/TransDecoder/>`, a slower ORF predictor that is however more
+  specialised for eukaryotes.
+
+For this tutorial we are going to use Prodigal. Using it is very straighforward:
+
+.. code-block:: bash
+
+  prodigal -i mikado_prepared.fasta -g 1 -o mikado.orfs.gff3 -f gff
+
+
+.. warning:: Prodigal by default uses the 'Bacterial' codon translation table, which is of course not appropriate at
+             all for our eukariote genome. Therefore, it is essential to set ``-g 1`` on the command line.
+             By the same token, as prodigal normally would output the CDS prediction in GenBank format (currently not
+             supported by Mikado), we have to instruct Prodigal to emit its CDS predictions in GFF format.
 
-This will produce the ``mikado.blast.xml.gz`` file, which contains the homology information for the run.
 
 Mikado serialise
 ~~~~~~~~~~~~~~~~
 
-This step involves running ``mikado serialise`` to create a SQLite database with all the information that mikado needs to perform its analysis. As most of the parameters are already specified inside the configuration file, the command line is quite simple:
+This step involves running ``mikado serialise`` to create a SQLite database with all the information that mikado
+needs to perform its analysis. As most of the parameters are already specified inside the configuration file, the
+command line is quite simple:
 
 .. code-block:: bash
 
-    mikado serialise --json-conf configuration.yaml --xml mikado.blast.xml.gz --orfs mikado.bed --blast_targets
+    mikado serialise --json-conf configuration.yaml --xml mikado_prepared.blast.tsv --orfs mikado.orfs.gff3
+--blast_targets uniprot_sprot_plants.fasta --junctions junctions.bed
 
 After mikado serialise has run, it will have created two files:
 
 #. ``mikado.db``, the SQLite database that will be used by ``pick`` to perform its analysis.
 #. ``serialise.log``, the log of the run.
 
-If you inspect the SQLite database ``mikado.db``, you will see it contains six different tables::
+If you inspect the SQLite database ``mikado.db``, you will see it contains nine different tables::
+
+    $ sqlite3 mikado.db ".tables"
+    chrom             hit               orf
+    external          hsp               query
+    external_sources  junctions         target
 
-    $ sqlite3 mikado.db
-    SQLite version 3.8.2 2013-12-06 14:53:30
-    Enter ".help" for instructions
-    Enter SQL statements terminated with a ";"
-    sqlite> .tables
-    chrom      hit        hsp        junctions  orf        query      target
+These tables contain the information coming from the genome FAI, the BLAST XML, the junctions BED file,
+the ORFs BED file, and finally the input transcripts and the proteins. There are two additional tables (``external``
+and ``external_sources``) which in other runs would contain information on additional data, provided as tabular files.
 
-These tables contain the information coming, in order, from the genome FAI, the BLAST XML, the junctions BED file, the ORFs BED file, and finally the input transcripts and the proteins. For more details on the database structure, please refer to the section on :ref:`this step <serialise>` in this documentation.
+For more details on the database structure, please refer to the section on :ref:`this step <serialise>` in this
+documentation.
 
 Mikado pick
 ~~~~~~~~~~~
 
-Finally, during this step ``mikado pick`` will integrate the data present in the database with the positional and structural data present in the GTF file :ref:`to select the best transcript models <pick>`. The command line to be issued is the following:
+Finally, during this step ``mikado pick`` will integrate the data present in the database with the positional and
+structural data present in the GTF file :ref:`to select the best transcript models <pick>`. The command line to be
+issued is the following:
 
 .. code-block:: bash
 
-    mikado pick --json-conf configuration.yaml --subloci_out mikado.subloci.gff3
+    mikado pick --configuration configuration.yaml --subloci_out mikado.subloci.gff3
 
 At this step, we have to specify only some parameters for ``pick`` to function:
 
-* *json-conf*: the configuration file. This is the only compulsory option.
-* *subloci_out*: the partial results concerning the *subloci* step during the selection process will be written to ``mikado.subloci.gff3``.
+* *--configuration*: the configuration file. This is the only compulsory option.
+* *--subloci_out*: the partial results concerning the *subloci* step during the selection process will be written to
+``mikado.subloci.gff3``.
 
 ``mikado pick`` will produce the following output files:
 
@@ -329,7 +399,8 @@ After selecting the best transcripts in each locus, Mikado has discarded most of
 Analysing the tutorial data with Snakemake
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The workflow described in this tutorial can be executed automatically using Snakemake [Snake]_ with :download:`this Snakefile <Snakefile>`. Just execute:
+The workflow described in this tutorial can be executed automatically using Snakemake [Snake]_ with :download:`this Snakefile <Snakefile>`.
+Just execute:
 
 .. code-block:: bash
 
@@ -341,5 +412,3 @@ in the directory where you have downloaded all of the tutorial files. In graph r
         :align: center
         :scale: 50%
         :figwidth: 100%
-
-
diff --git a/docs/Tutorial/mikado.png b/docs/Tutorial/mikado.png
new file mode 100644
index 000000000..3113f6cc4
Binary files /dev/null and b/docs/Tutorial/mikado.png differ
diff --git a/docs/Tutorial/snake.png b/docs/Tutorial/snake.png
new file mode 100644
index 000000000..2128f4be1
Binary files /dev/null and b/docs/Tutorial/snake.png differ
diff --git a/docs/Usage/Compare.rst b/docs/Usage/Compare.rst
index 373f7b693..9868832d8 100644
--- a/docs/Usage/Compare.rst
+++ b/docs/Usage/Compare.rst
@@ -16,7 +16,11 @@ This Mikado utility allows the user to compare the transcripts from any two anno
   - To understand which prediction transcript best represent each reference model
   - To have a summary information about the similarity between the two annotations.
 
-Mikado compare has been directly inspired by the popular `Cuffcompare`_ [Cufflinks]_ utility and by `ParsEval`_ [ParsEval]_. Please note that while superficially similar to Cuffcompare in the style of the output files, Mikado compare is more philosophically similar to ParsEval, as it will not try to aggregate transcripts in loci but will perform a pure comparison between the two annotation files. Both GTF and GFF files are accepted, in any combination.
+Mikado compare has been directly inspired by the popular `Cuffcompare`_ [Cufflinks]_ utility and by `ParsEval`_ [ParsEval]_.
+Please note that while superficially similar to Cuffcompare in the style of the output files, Mikado compare is more
+philosophically similar to ParsEval, as it will not try to aggregate transcripts in loci but will perform a pure comparison
+between the two annotation files. Mikado compare can accept BAM, BED12, GTF and GFF files as input, both for the reference
+or for the prediction.
 
 Usage
 ~~~~~
@@ -24,13 +28,19 @@ Usage
 Mikado compare is invoked by specifying the *reference* annotation and the desired mode of analysis. There are three possible options:
 
  #. In its default mode, compare will ask for a *prediction* annotation to compare the reference against.
- #. In the *"self"* mode, compare will do a self-comparison of the reference against itself, excluding as possible results the matches between a transcript and itself. It can be useful to glean the relationships between transcripts and genes in an annotation.
- #. In the *"internal"* mode of operations, compare will again perform a self-comparison, focussed on multi-isoform genes. For those, compare will perform and report all possible comparisons. It is useful to understand the relationships between the transcripts in a single locus.
+ #. In the *"self"* mode, compare will do a self-comparison of the reference against itself, excluding as possible results
+    the matches between a transcript and itself. It can be useful to glean the relationships between transcripts and genes
+    in an annotation.
+ #. In the *"internal"* mode of operations, compare will again perform a self-comparison, focussed on multi-isoform genes.
+    For those, compare will perform and report all possible comparisons. It is useful to understand the relationships
+    between the transcripts in a single locus.
 
+Mikado stores the information of the reference in a specialised SQLite index, with a ".midx" suffix, which will be created
+by the program upon its first execution with a new reference. If the index file is already present, Mikado will try
+to use it rather than read again the annotation.
 
-Mikado stores the information of the reference in a specialised SQLite index, with a ".midx" suffix, which will be created by the program upon its first execution with a new reference. If the index file is already present, Mikado will try to use it rather than read again the annotation.
-
-.. note: Starting from version 1.5, Mikado compare supports multiprocessing. Please note that memory usage scales approximately **linearly** with the amount of processes requested.
+.. note:: Starting from version 1.5, Mikado compare supports multiprocessing. Please note that memory usage scales
+          approximately **linearly** with the amount of processes requested.
 
 Command line
 ------------
@@ -100,7 +110,8 @@ Mikado compare produces two tabular files, tmap_ and refmap_, and one :ref:`stat
 TMAP files
 ----------
 
-TMAP are tabular files that store the information regarding the best match for each prediction in the reference. The columns are as follows:
+TMAP are tabular files that store the information regarding the best match for each prediction in the reference.
+The columns are as follows:
 
 #. **ref_id**: Transcript ID of the matched reference model(s).
 #. **ref_gene**: Gene ID of the matched reference model(s).
@@ -115,7 +126,9 @@ TMAP are tabular files that store the information regarding the best match for e
 #. **j_prec**: Splice junction precision of the prediction model ( TP / (number of splice sites in the prediction))
 #. **j_recall**: Splice junction recall of the reference model ( TP / (number of splice sites in the reference))
 #. **j_f1**: `F1`_ of recall and precision at the splice junction level.
-#. **e_prec**: Exon precision of the prediction model ( TP / (number of exons in the prediction)). **NB**: this value is calculated "leniently", ie terminal exons count as a match if the *internal* border is called correctly and the exon is terminal in both prediction and reference.
+#. **e_prec**: Exon precision of the prediction model ( TP / (number of exons in the prediction)).
+   **NB**: this value is calculated "leniently", ie terminal exons count as a match if the *internal* border is called
+   correctly and the exon is terminal in both prediction and reference.
 #. **e_recall**: Exon recall of the reference model ( TP / (number of exons in the reference))
 #. **e_f1**: `F1`_ of recall and precision at the exon level.
 #. **distance**: Distance of the model from its putative match.
@@ -128,14 +141,16 @@ An example of TMAP file is as follows::
     AT5G66600.2	AT5G66600	C	cl_Chr5.6272	cl_Chr5.6272.gene	7	9	94.95	72.43	82.18	100.00	75.00	85.71	85.71	66.67	75.00	0	Chr5:26575000..26578087
     AT5G66620.1,AT5G66630.1,AT5G66631.1	AT5G66620,AT5G66630,AT5G66631	f,j,j,G	st_Stringtie_STAR.21710.15	st_Stringtie_STAR.21710.15.gene	8	11,10,1	19.13,19.95,35.98	54.57,45.65,100.00	28.33,27.76,52.92	28.57,64.29,0.00	20.00,50.00,0.00	23.53,56.25,0.00	12.50,37.50,0.00	9.09,30.00,0.00	10.53,33.33,0.00	0	Chr5:26588402..26598231
 
-You can notice that the third example is particular as the prediction transcript matches not one but multiple reference transcripts. This is a fusion_ event.
+You can notice that the third example is particular as the prediction transcript matches not one but multiple reference
+transcripts. This is a fusion_ event.
 
 .. _refmap:
 
 RefMap files
 ------------
 
-RefMap files are tabular files which store the information regarding the best match for each reference transcript, among all possible prediction models. The columns of the file are as follows:
+RefMap files are tabular files which store the information regarding the best match for each reference transcript,
+among all possible prediction models. The columns of the file are as follows:
 
 #. **ref_id**: Transcript ID of the reference model.
 #. **ccode**: class code of the match. See :ref:`the relevant section on Class codes <ccodes>`.
@@ -143,7 +158,8 @@ RefMap files are tabular files which store the information regarding the best ma
 #. **gid**: Gene ID of the prediction model.
 #. **nF1**: `F1`_ of recall and precision at the nucleotide level.
 #. **jF1**: `F1`_ of recall and precision at the splice junction level.
-#. **eF1**: `F1`_ of recall and precision at the exon level. **NB**: this value is calculated "leniently", ie terminal exons count as a match if the *internal* border is called correctly and the exon is terminal in both prediction and reference.
+#. **eF1**: `F1`_ of recall and precision at the exon level. **NB**: this value is calculated "leniently", ie terminal
+   exons count as a match if the *internal* border is called correctly and the exon is terminal in both prediction and reference.
 #. **ref_gene**: Gene ID of the reference model.
 #. **best_ccode**: Best possible class code found for any of the transcripts of the gene.
 #. **best_tid**: Transcript ID of the prediction model which fit best one of the transcript models of the reference gene.
@@ -164,6 +180,27 @@ An example of a RefMap file is as follows::
 
 Please note that the third example (AT5G66630.1) has as best possible match a fusion_ event.
 
+Extended RefMap files
+^^^^^^^^^^^^^^^^^^^^^
+
+Mikado can optionally produce more detailed RefMap files, listing the details on recall and precision for each match
+(rather than just the F1 for each level). If the flag ``-erm`` is present on the command line, the following extra-fields
+will be present in the file:
+
+#. **nRecall**: recall at the nucleotide level.
+#. **nPrecision**: precision at the nucleotide level.
+#. **jRecall**: recall at the junction level.
+#. **jPrecision**: precision at the junction level.
+#. **eRecall**: recall at the exon level.
+#. **ePrecision**: precision at the exon level.
+#. **best_nRecall**: recall at the nucleotide level, for the best possible comparison.
+#. **best_nPrecision**: precision at the nucleotide level, for the best possible comparison.
+#. **best_jRecall**: recall at the junction level, for the best possible comparison.
+#. **best_jPrecision**: precision at the junction level, for the best possible comparison.
+#. **best_eRecall**: recall at the exon level, for the best possible comparison.
+#. **best_ePrecision**: precision at the exon level, for the best possible comparison.
+
+
 .. _stats:
 
 Stats files
@@ -172,42 +209,45 @@ Stats files
 These files provide a summary of the comparison between the reference and the annotation. An example is as follows::
 
     Command line:
-    /usr/users/ga002/venturil/py351/bin/mikado compare -r reference.gff3 -p mikado.loci.gff3 -o compare -l compare.log
-    7 reference RNAs in 5 genes
-    15 predicted RNAs in  8 genes
+    /home/lucve/miniconda3/envs/mikado2/bin/mikado compare -r reference.gff3 -p Daijin/5-mikado/pick/permissive/mikado-permissive.loci.gff3 -o compare -l compare.log
+    18 reference RNAs in 12 genes
+    22 predicted RNAs in  15 genes
     --------------------------------- |   Sn |   Pr |   F1 |
-                            Base level: 85.74  64.73  73.77
-                Exon level (stringent): 63.83  42.86  51.28
-                  Exon level (lenient): 80.00  52.94  63.72
-                          Intron level: 89.47  59.65  71.58
-                    Intron chain level: 33.33  14.29  20.00
-          Transcript level (stringent): 0.00  0.00  0.00
-      Transcript level (>=95% base F1): 28.57  13.33  18.18
-      Transcript level (>=80% base F1): 42.86  20.00  27.27
-             Gene level (100% base F1): 0.00  0.00  0.00
-            Gene level (>=95% base F1): 40.00  25.00  30.77
-            Gene level (>=80% base F1): 60.00  37.50  46.15
+                            Base level: 94.90  83.22  88.68
+                Exon level (stringent): 80.56  71.60  75.82
+                  Exon level (lenient): 91.18  76.54  83.22
+                     Splice site level: 95.19  81.15  87.61
+                          Intron level: 96.84  88.19  92.31
+                     Intron level (NR): 94.34  79.37  86.21
+                    Intron chain level: 69.23  50.00  58.06
+               Intron chain level (NR): 69.23  50.00  58.06
+          Transcript level (stringent): 55.56  45.45  50.00
+      Transcript level (>=95% base F1): 72.22  59.09  65.00
+      Transcript level (>=80% base F1): 72.22  59.09  65.00
+             Gene level (100% base F1): 75.00  60.00  66.67
+            Gene level (>=95% base F1): 83.33  66.67  74.07
+            Gene level (>=80% base F1): 83.33  66.67  74.07
 
     #   Matching: in prediction; matched: in reference.
 
-                Matching intron chains: 2
-                 Matched intron chains: 2
-       Matching monoexonic transcripts: 1
-        Matched monoexonic transcripts: 1
-            Total matching transcripts: 3
-             Total matched transcripts: 3
-
-              Missed exons (stringent): 17/47  (36.17%)
-               Novel exons (stringent): 40/70  (57.14%)
-                Missed exons (lenient): 9/45  (20.00%)
-                 Novel exons (lenient): 32/68  (47.06%)
-                        Missed introns: 4/38  (10.53%)
-                         Novel introns: 23/57  (40.35%)
-
-                    Missed transcripts: 0/7  (0.00%)
-                     Novel transcripts: 6/15  (40.00%)
-                          Missed genes: 0/5  (0.00%)
-                           Novel genes: 2/8  (25.00%)
+                Matching intron chains: 9
+                 Matched intron chains: 9
+       Matching monoexonic transcripts: 4
+        Matched monoexonic transcripts: 4
+            Total matching transcripts: 13
+             Total matched transcripts: 13
+
+              Missed exons (stringent): 14/72  (19.44%)
+               Novel exons (stringent): 23/81  (28.40%)
+                Missed exons (lenient): 6/68  (8.82%)
+                 Novel exons (lenient): 19/81  (23.46%)
+                        Missed introns: 3/53  (5.66%)
+                         Novel introns: 13/63  (20.63%)
+
+           Missed transcripts (0% nF1): 0/18  (0.00%)
+            Novel transcripts (0% nF1): 3/22  (13.64%)
+                 Missed genes (0% nF1): 0/12  (0.00%)
+                  Novel genes (0% nF1): 3/15  (20.00%)
 
 The first section of the file describes:
 
@@ -215,25 +255,39 @@ The first section of the file describes:
   #. Concordance of the two annotation at the exonic level (recall, precision, and F1), in two ways:
 
      * *"stringent"*: only perfect exonic matches are considered.
-     * *"lenient"*: in this mode, terminal exons are counted as a match if the **internal** border is matched. See the RGASP paper [RGASP]_ for details on the rationale.
-
+     * *"lenient"*: in this mode, terminal exons are counted as a match if the **internal** border is matched.
+       See the RGASP paper [RGASP]_ for details on the rationale.
+  #. Concordance of the two annotations in regards with the splice junctions, analysed independently of one another.
   #. Concordance of the two annotations at the intron level.
-  #. Concordance of the two annotations at the intron chain level - how many intron chains of the reference are found identical in the prediction. Only multiexonic models are considered for this level.
+  #. Concordance of the two annotations at the intron chain level - how many intron chains of the reference are found
+     identical in the prediction. Only multiexonic models are considered for this level.
   #. Concordance of the two annotations at the transcript level, in three different modes:
 
      * *"stringent"*: in this mode, only perfect matches are considered.
-     * *"95% base F1"*: in this mode, we only count instances where the nucleotide F1 is greater than *95%* and, for multiexonic transcripts, the intron chain is reconstructed perfectly.
-     * *"80% base F1"*: in this mode, we only count instances where the nucleotide F1 is greater than *80%* and, for multiexonic transcripts, the intron chain is reconstructed perfectly.
+     * *"95% base F1"*: in this mode, we only count instances where the nucleotide F1 is greater than *95%* and,
+       for multiexonic transcripts, the intron chain is reconstructed perfectly.
+     * *"80% base F1"*: in this mode, we only count instances where the nucleotide F1 is greater than *80%* and,
+       for multiexonic transcripts, the intron chain is reconstructed perfectly.
 
   #. Concordance of the two annotations at the gene level, in three different modes:
 
-     * *"stringent"*: in this mode, we consider reference genes for which it was possible to find at least one perfect match for one of its transcripts.
-     * *"95% base F1"*: in this mode, we only count instances where the nucleotide F1 is greater than *95%* and, for multiexonic transcripts, the intron chain is reconstructed perfectly.
-     * *"80% base F1"*: in this mode, we only count instances where the nucleotide F1 is greater than *80%* and, for multiexonic transcripts, the intron chain is reconstructed perfectly.
+     * *"stringent"*: in this mode, we consider reference genes for which it was possible to find at least one perfect
+       match for one of its transcripts.
+     * *"95% base F1"*: in this mode, we only count instances where the nucleotide F1 is greater than *95%* and, for
+       multiexonic transcripts, the intron chain is reconstructed perfectly. The best possible match is considered for
+       this statistic.
+     * *"80% base F1"*: in this mode, we only count instances where the nucleotide F1 is greater than *80%* and, for
+       multiexonic transcripts, the intron chain is reconstructed perfectly. The best possible match is considered for
+       this statistic.
 
-In the second section, the file reports how many of the intron chains, monoexonic transcripts and total transcripts in the **reference** were *matched* by at least one *matching* **prediction** transcript. Finally, in the third section the file reports the number of missed (present in the reference but not in the prediction) or novel (viceversa - present in the prediction but not in the reference) features.
+In the second section, the file reports how many of the intron chains, monoexonic transcripts and total transcripts in
+the **reference** were *matched* by at least one *matching* **prediction** transcript. Finally, in the third section the
+file reports the number of missed (present in the reference but not in the prediction) or novel (viceversa - present in
+the prediction but not in the reference) features.
 
-.. note:: Please note that a gene might be considered as "found" even if its best match is intronic, on the opposite strand, or not directly overlapping it, or is in the opposite strand (see :ref:`next section <ccodes>`, in particular the *Intronic*, *Fragment* and *No overlap* categories).
+.. note:: Please note that a gene might be considered as "found" even if its best match is intronic, on the opposite strand,
+          or not directly overlapping it, or is in the opposite strand (see :ref:`next section <ccodes>`, in particular the
+          *Intronic*, *Fragment* and *No overlap* categories).
 
 
 .. _ccodes:
@@ -241,11 +295,15 @@ In the second section, the file reports how many of the intron chains, monoexoni
 Class codes
 ~~~~~~~~~~~
 
-In addition to recall, precision and F1 values, Mikado assign each comparison between two transcripts a *class code*, which summarises the relationship between the two transcripts. The idea is lifted from the popular tool `Cuffcompare`_, although Mikado greatly extends the catalogue of possible class codes.
+In addition to recall, precision and F1 values, Mikado assign each comparison between two transcripts a *class code*,
+which summarises the relationship between the two transcripts. The idea is lifted from the popular tool `Cuffcompare`_,
+although Mikado greatly extends the catalogue of possible class codes.
 All class codes fall within one of the following categories:
 
  - **Match**: class codes of this type indicate concordance between the two transcript models.
- - **Extension**: class codes of this type indicate that one of the two models extends the intron chain of the other, without internal interruptions. The extension can be from either perspective - either the prediction extends the reference, or it is instead *contained* within the reference (so that switching perspectives, the reference would "extend" the prediction).
+ - **Extension**: class codes of this type indicate that one of the two models extends the intron chain of the other,
+   without internal interruptions. The extension can be from either perspective - either the prediction extends the
+   reference, or it is instead *contained* within the reference (so that switching perspectives, the reference would "extend" the prediction).
  - **Alternative splicing**: the two exon chains overlap but differ in significant ways.
  - **Intronic**: either the prediction is completely contained within the introns of the reference, or viceversa.
  - **Overlap**: the two transcript models generically overlap on their exonic sequence.
@@ -254,7 +312,13 @@ All class codes fall within one of the following categories:
 
  .. _fusion:
 
- - **Fusion**: this special class code is a qualifier and it never appears on its own. When a transcript is defined as a fusion,  its class code in the *tmap* file will be an "f" followed by the class codes of the individual transcript matches, sperated by comma. So a prediction which matches two reference models, one with a "j" and another with a "o", will have a class code of **"f,j,o"**. In the *refmap* file, if the fusion is the best match, the class code will be "f" followed by the class code for the individual reference transcript; e.g., **"f,j"**
+ - **Fusion**: this special class code is a qualifier and it never appears on its own. When a transcript is defined as a
+   fusion between two or more reference transcript, it will appear on *multiple lines*, one for each of the matches. In
+   each line, the class code will be a "f," followed by the class code assigned to that particular comparison.
+   So e.g. a prediction which matches two reference models, one with a "j" and another with a "o", will be present in two
+   different lines; the first one with a class code of **f,j** and the second with a class code of **f,o**.
+   In the *refmap* file, if the fusion is the best match, the class code will be "f" followed by the class code for the
+   individual reference transcript; e.g., **"f,j"**
 
 
 .. topic:: Available class codes
@@ -434,7 +498,9 @@ Each calculated match against a reference transcript is stored as a potential *b
 
 This function is used to select both for the best match *for the transcript*, as well as to select among these matches for the best match *for the gene*.
 
-The interval tree data structure is created using Cython code originally part of the `bx-python <https://bitbucket.org/james_taylor/bx-python/overview>`_, kindly provided by `Dr. Taylor <mailto:james@taylorlab.org>`_ for modification and inclusion in Mikado. The code has been slightly modified for making it Python3 compliant.
+The interval tree data structure is created using Cython code originally part of the `bx-python <https://bitbucket.org/james_taylor/bx-python/overview>`_, kindly provided by `Dr. Taylor <mailto:james@taylorlab.org>`_ for modification and inclusion in Mikado. We subsequently integrated the improvements made on the code by Dr. Brent Pedersen in his :ref:`quicksect fork <https://github.com/brentp/quicksect>`.
+
+We further modified the original code for allowing "fuzzy matches", as in, allowing some leeway on how far the edges of the query interval are from the target intervals present in the tree.
 
 The .midx files storing the annotation for repeated compare runs are SQLite files. In them, Mikado will store for each gene its coordinates, its transcripts, and the location of exons and CDS features. MIDX files make repeated runs quite faster, as the program will not have to re-parse the GFF file.
 
diff --git a/docs/Usage/Configure.rst b/docs/Usage/Configure.rst
index 81efa5d60..d98536bc1 100644
--- a/docs/Usage/Configure.rst
+++ b/docs/Usage/Configure.rst
@@ -30,20 +30,27 @@ The fields in this file are as follows, for each row:
 .. <file> <label> <strandedness(def. False)> <score(optional, def. 0)> <is_reference(optional, def. False)>
                         <exclude_redundant(optional, def. True)> strandedness, is_reference and exclude_redundant must be boolean values (True, False) score must be a valid floating number.
 
-- File name of the input file. **Mandatory**
-    - The file name must specify a valid path (either absolute or relative) from the folder where Mikado configure is launched to the file itself.
-- Label for the input file. **Mandatory**
-    - Each label must be **unique** within the run.
-- Strandedness of the annotation file. Boolean (either True or False, capitalization is ignored). **Mandatory**.
-    - Reference files will be considered as stranded even if this value is set to False.
-- Score associated to the input file. Default 0. *Optional*. Floating numbers only.
-    - The score will be used to determine tie winners during Mikado prepare in case of redundancy. It will also be applied to transcripts during the pick stage. See :ref:`<this section later on <source_score>`
-- *is_reference*: boolean flag (either True or False, capitalization is ignored), default False.. "Reference" transcripts are treated slightly differently from the others in Mikado. Specifically:
-    - During the prepare stage, reference transcripts are never discarded on account of redundancy, short size, etc.; moreover, their strand will never be reversed on account of the internal Mikado checks.
-    - During the pick stage, reference transcripts can be given an additional boost in their scoring. Moreover, it is possible to instruct Mikado to only look for alternative splicing events of the reference transcripts.
-- *exclude_redundant*: boolean flag (either True or False, capitalization is ignored), default False. If set to True, Mikado prepare will remove any transcript in this annotation that is considered redundant (ie identical or contained within another transcript).
-    - By default, Mikado will remove only ***identical*** copies of transcripts across annotations. Datasets with this flag set will be treated as if they were going through GffRead_.
-- *skip_split*: boolean flag, default False. If set, Mikado will *not* break suspected chimeras, ie transcripts with more than one internal ORF, during the pick stage.
+
+#. File name of the input file. **Mandatory**
+    * The file name must specify a valid path (either absolute or relative) from the folder where Mikado configure is launched to the file itself.
+#. Label for the input file. **Mandatory**
+    *  Each label must be **unique** within the run.
+#. Strandedness of the annotation file. Boolean (either True or False, capitalization is ignored). **Mandatory**.
+    * Reference files will be considered as stranded even if this value is set to False.
+#. Score associated to the input file. Default 0. *Optional*. Floating numbers only.
+    * The score will be used to determine tie winners during Mikado prepare :ref:`in case of redundancy redundant-transcripts-in-prepare`. It will also be applied to transcripts during the pick stage. See :ref:`this section later on <source_score>`.
+#. *is_reference*: boolean flag (either True or False, capitalization is ignored), default False. *Optional*.
+    * "Reference" transcripts are treated slightly differently from the others in Mikado. Specifically:
+       * During the prepare stage, reference transcripts are never discarded on account of redundancy, short size, etc.; moreover, their strand will never be reversed on account of the internal Mikado checks.
+       * During the pick stage, reference transcripts can be given an additional boost in their scoring. Moreover, it is possible to instruct Mikado to only look for alternative splicing events of the reference transcripts.
+#. *exclude_redundant*: boolean flag (either True or False, capitalization is ignored), default False. *Optional*.
+    * If set to True, Mikado prepare will remove any transcript in this annotation that is considered redundant (ie identical or contained within another transcript).
+       * By default, Mikado will remove only ***identical*** copies of transcripts across annotations. Datasets with this flag set will be treated as if they were going through GffRead_.
+       * See the :ref:`this section for more details <redundant-transcripts-in-prepare>`.
+#. *strip_cds*: boolean flag (either True or False, capitalization is ignored), default False. *Optional*.
+    * If set to True, transcripts from this set will have their CDS removed. This is useful e.g. when analysing transcripts coming from GMAP.
+#. *skip_split* boolean flag (either True or False, capitalization is ignored), default False. *Optional*.
+    * If set to True, Mikado will *not* break suspected chimeras for these transcripts, ie transcripts with more than one internal ORF, during the pick stage.
 
 
 Usage
@@ -57,9 +64,7 @@ Selected command line parameters:
 - *--seed*: random seed for full reproducibility of Mikado runs.
 - *--list*: the argument to this option specifies the input annotation files for Mikado. See above.
 
-Command line help:
-
-.. code-block::
+Command line help::
 
     $ mikado configure --help
     usage: Mikado configure [-h] [--full] [--seed SEED] [--minimum-cdna-length MINIMUM_CDNA_LENGTH] [--max-intron-length MAX_INTRON_LENGTH] [--scoring SCORING] [--copy-scoring COPY_SCORING]
@@ -182,6 +187,7 @@ Global options
 The following options apply to all programs in the Mikado pipeline, and they refer to general parameters such as logging verbosity, number of threads, etc.
 
 Parameters:
+
 - *threads*: this is the number of processes/threads that will be requested by the Mikado programs. This parameter can be overridden on the command line.
 - *seed*: random seed specification, to ensure maximum reproducibility of the run.
 .. _start-methods:
@@ -459,6 +465,7 @@ This section of the configuration file deals with the :ref:`picking stage of Mik
 Each subsection of the pick configuration will be explained in its own right.
 
 .. _source_score:
+
 Giving different priorities to transcripts from different assemblies
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -513,6 +520,9 @@ After selecting the best model for each locus, Mikado will backtrack and try to
     # is retained compared to the primary or any other valid AS. Default: false.
     # - valid_ccodes: Valid class codes for AS events. Valid codes are in categories
     # 'Alternative splicing', 'Extension' (with junction F1 lower than 100%), and Overlap (exluding m). Default: j, J, g, G, C, h
+    # - max_utr_length: Maximum length of the UTR for AS events. Default: 10e6 (i.e. no limit)
+    # - max_fiveutr_length: Maximum length of the 5'UTR for AS events. Default: 10e6 (i.e. no limit)
+    # - max_threeutr_length: Maximum length of the 5'UTR for AS events. Default: 10e6 (i.e. no limit)
     # - min_score_perc: Minimum score threshold for subsequent AS events. Only transcripts with a score at least (best) * value are retained.
     # - only_confirmed_introns: bring back AS events only when their introns are either present in the primary transcript or in the set of confirmed introns.
     # - pad: boolean switch. If true, Mikado will pad all the transcript in a gene so that their ends are the same
@@ -539,8 +549,7 @@ Parameters regarding the clustering of transcripts in loci
 
 This section influences how Mikado clusters transcripts in its multi-stage selection. The available parameters are:
 
-- *flank*: numerical. When constructing :ref:`Superloci <superloci>`, Mikado will use this value as the maximum distance
-between transcripts for them to be integrated within the same superlocus.
+- *flank*: numerical. When constructing :ref:`Superloci <superloci>`, Mikado will use this value as the maximum distance between transcripts for them to be integrated within the same superlocus.
 - *cds_only*: boolean. If set to true, during the :ref:`picking stage <pick-algo>` Mikado will consider only the **primary ORF** to evaluate whether two transcripts intersect. Transcripts which eg. share introns in their UTR but have completely unrelated CDSs will be clustered separately. Disabled by default.
 - *purge*: boolean. If true, any transcript failing the :ref:`specified requirements <requirements-section>` will be purged out. Otherwise, they will be assigned a score of 0 and might potentially appear in the final output, if no other transcript is present in the locus.
 - *simple_overlap_for_monoexonic*: boolean. During the :ref:`second clustering <monosubloci>`, by default monoexonic transcripts are clustered together even if they have a very slight overlap with another transcript. Manually setting this flag to *false* will cause Mikado to cluster monoexonic transcripts only if they have a minimum amount of cDNA and CDS overlap with the other transcripts in the holder.
@@ -624,8 +633,7 @@ This section determines how Mikado will deal with :ref:`chimeras <chimera_splitt
 - *execute*: boolean. If set to *false*, Mikado will operate in the *nosplit* mode. If set to *true*, the choice of the mode will be determined by the other parameters.
 - *skip*: this is list of input assemblies (identified by the label in prepare, :ref:`above <input_file_list>`) that will **never** have the transcripts split.
 
-.. hint:: cDNAs, reference transcripts, and the like should end up in the "skip" category. These are, after all, transcripts
-that are presupposed to be originated from a single RNA molecule and therefore without fusions.
+.. hint:: cDNAs, reference transcripts, and the like should end up in the "skip" category. These are, after all, transcripts that are presupposed to be originated from a single RNA molecule and therefore without fusions.
 
 - *blast_check*: boolean. Whether to execute the check on the BLAST hits. If set to *false*, Mikado will operate in the *split* mode, unless *execute* is set to *false* (execute takes precedence over the other parameters).
 - *blast_params*: this section contains the settings relative to the *permissive*, *lenient* and *stringent* mode.
diff --git a/docs/Usage/Daijin.rst b/docs/Usage/Daijin.rst
index 7ec720f02..272f9b6a3 100644
--- a/docs/Usage/Daijin.rst
+++ b/docs/Usage/Daijin.rst
@@ -288,45 +288,45 @@ Running the pipeline
 
 Daijin executes the pipeline in two distinct phases, *assemble* and *mikado*. Both commands have the same command line interface, namely::
 
-     $ daijin assemble --help
-    usage: daijin assemble [-h] [-c HPC_CONF] [-d] [--jobs N] [--cores [N]]
+    $ daijin assemble --help
+       usage: daijin assemble [-h] [-c HPC_CONF] [-d] [--jobs N] [--cores [N]]
                            [--threads N] [--no_drmaa] [--rerun-incomplete]
                            [--forcerun TARGET [TARGET ...]] [--detailed-summary]
                            [--list] [--dag]
                            config
 
-    positional arguments:
-      config                Configuration file to use for running the transcript
-                            assembly pipeline.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -c HPC_CONF, --hpc_conf HPC_CONF
-                            Configuration file that allows the user to override
-                            resource requests for each rule when running under a
-                            scheduler in a HPC environment.
-      -d, --dryrun          Do a dry run for testing.
-      --jobs N, -J N        Maximum number of cluster jobs to execute
-                            concurrently.
-      --cores [N], -C [N]   Use at most N cores in parallel (default: 1000).
-      --threads N, -t N     Maximum number of threads per job. Default: None (set
-                            in the configuration file)
-      --no_drmaa, -nd       Use this flag if you wish to run without DRMAA, for
-                            example, if running on a HPC and DRMAA is not
-                            available, or if running locally on your own machine
-                            or server.
-      --rerun-incomplete, --ri
-                            Re-run all jobs the output of which is recognized as
-                            incomplete.
-      --forcerun TARGET [TARGET ...], -R TARGET [TARGET ...]
-                            Force the re-execution or creation of the given rules
-                            or files. Use this option if you changed a rule and
-                            want to have all its output in your workflow updated.
-      --detailed-summary, -D
-                            Print detailed summary of all input and output files
-      --list, -l            List resources used in the workflow
-      --dag                 Do not execute anything and print the redirected
-                            acylic graph of jobs in the dot language.
+       positional arguments:
+          config                Configuration file to use for running the transcript
+                                assembly pipeline.
+
+       optional arguments:
+          -h, --help            show this help message and exit
+          -c HPC_CONF, --hpc_conf HPC_CONF
+                                Configuration file that allows the user to override
+                                resource requests for each rule when running under a
+                                scheduler in a HPC environment.
+          -d, --dryrun          Do a dry run for testing.
+          --jobs N, -J N        Maximum number of cluster jobs to execute
+                                concurrently.
+          --cores [N], -C [N]   Use at most N cores in parallel (default: 1000).
+          --threads N, -t N     Maximum number of threads per job. Default: None (set
+                                in the configuration file)
+          --no_drmaa, -nd       Use this flag if you wish to run without DRMAA, for
+                                example, if running on a HPC and DRMAA is not
+                                available, or if running locally on your own machine
+                                or server.
+          --rerun-incomplete, --ri
+                                Re-run all jobs the output of which is recognized as
+                                incomplete.
+          --forcerun TARGET [TARGET ...], -R TARGET [TARGET ...]
+                                Force the re-execution or creation of the given rules
+                                or files. Use this option if you changed a rule and
+                                want to have all its output in your workflow updated.
+          --detailed-summary, -D
+                                Print detailed summary of all input and output files
+          --list, -l            List resources used in the workflow
+          --dag                 Do not execute anything and print the redirected
+                                acylic graph of jobs in the dot language.
 
 
 The available command parameters are:
diff --git a/docs/Usage/Pick.rst b/docs/Usage/Pick.rst
index e43a4b997..7336b0913 100644
--- a/docs/Usage/Pick.rst
+++ b/docs/Usage/Pick.rst
@@ -332,86 +332,143 @@ Usage
 Usage::
 
     $ mikado pick --help
-    usage: Mikado pick [-h] [--start-method {fork,spawn,forkserver}] [-p PROCS]
-                       --json-conf JSON_CONF [--scoring-file SCORING_FILE]
-                       [-i INTRON_RANGE INTRON_RANGE] [--fasta FASTA]
-                       [--no-pad | --pad] [--pad-max-splices PAD_MAX_SPLICES]
-                       [--pad-max-distance PAD_MAX_DISTANCE]
-                       [--subloci_out SUBLOCI_OUT] [--monoloci_out MONOLOCI_OUT]
-                       [--loci_out LOCI_OUT] [--prefix PREFIX] [--source SOURCE]
-                       [--no_cds] [--flank FLANK] [--no-purge] [--cds-only]
-                       [--only-reference-update]
-                       [--consider-truncated-for-retained] [-db SQLITE_DB]
-                       [-od OUTPUT_DIR] [--single] [-l LOG] [-v | -nv]
-                       [-lv {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
+    usage: Mikado pick [-h] [--fasta GENOME]
+                       [--start-method {fork,spawn,forkserver}] [--shm | --no-shm]
+                       [-p PROCS] --configuration CONFIGURATION
+                       [--scoring-file SCORING_FILE]
+                       [-i INTRON_RANGE INTRON_RANGE]
+                       [--no-pad | --pad | --codon-table CODON_TABLE]
+                       [--pad-max-splices PAD_MAX_SPLICES]
+                       [--pad-max-distance PAD_MAX_DISTANCE] [-r REGIONS]
+                       [-od OUTPUT_DIR] [--subloci-out SUBLOCI_OUT]
+                       [--monoloci-out MONOLOCI_OUT] [--loci-out LOCI_OUT]
+                       [--prefix PREFIX] [--source SOURCE]
+                       [--report-all-external-metrics] [--no_cds] [--flank FLANK]
+                       [--max-intron-length MAX_INTRON_LENGTH] [--no-purge]
+                       [--cds-only] [--as-cds-only] [--reference-update]
+                       [--report-all-orfs] [--only-reference-update] [-eri] [-kdc]
+                       [-mco MIN_CLUSTERING_CDNA_OVERLAP]
+                       [-mcso MIN_CLUSTERING_CDS_OVERLAP] [--check-references]
+                       [-db SQLITE_DB] [--single] [-l LOG]
+                       [--verbose | --quiet | -lv {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
                        [--mode {nosplit,stringent,lenient,permissive,split}]
+                       [--seed SEED | --random-seed]
                        [gff]
 
+    Launcher of the Mikado pipeline.
+
     positional arguments:
       gff
 
     optional arguments:
       -h, --help            show this help message and exit
+      --fasta GENOME, --genome GENOME
+                            Genome FASTA file. Required for transcript padding.
       --start-method {fork,spawn,forkserver}
-                            Multiprocessing start method. (default: None)
+                            Multiprocessing start method.
+      --shm                 Flag. If switched, Mikado pick will copy the database
+                            to RAM (ie SHM) for faster access during the run.
+      --no-shm              Flag. If switched, Mikado will force using the
+                            database on location instead of copying it to /dev/shm
+                            for faster access.
       -p PROCS, --procs PROCS
                             Number of processors to use. Default: look in the
-                            configuration file (1 if undefined) (default: None)
-      --json-conf JSON_CONF
-                            JSON/YAML configuration file for Mikado. (default:
-                            None)
+                            configuration file (1 if undefined)
+      --configuration CONFIGURATION, --json-conf CONFIGURATION
+                            Configuration file for Mikado.
       --scoring-file SCORING_FILE
                             Optional scoring file for the run. It will override
-                            (default: None)
+                            the value set in the configuration.
       -i INTRON_RANGE INTRON_RANGE, --intron-range INTRON_RANGE INTRON_RANGE
                             Range into which intron lengths should fall, as a
                             couple of integers. Transcripts with intron lengths
                             outside of this range will be penalised. Default: (60,
-                            900) (default: None)
-      --fasta FASTA         Genome FASTA file. Required if pad is enabled
-                            (default). (default: None)
-      --no-pad              Disable transcript padding. (default: None)
-      --pad                 Whether to pad transcripts in loci. (default: None)
+                            900)
+      --no-pad              Disable transcript padding.
+      --pad                 Whether to pad transcripts in loci.
+      --codon-table CODON_TABLE
+                            Codon table to use. Default: 0 (ie Standard, NCBI #1,
+                            but only ATG is considered a valid start codon.
       --pad-max-splices PAD_MAX_SPLICES
                             Maximum splice sites that can be crossed during
-                            transcript padding. (default: None)
+                            transcript padding.
       --pad-max-distance PAD_MAX_DISTANCE
                             Maximum amount of bps that transcripts can be padded
-                            with (per side). (default: None)
+                            with (per side).
+      -r REGIONS, --regions REGIONS
+                            Either a single region on the CLI or a file listing a
+                            series of target regions. Mikado pick will only
+                            consider regions included in this string/file. Regions
+                            should be provided in a WebApollo-like format:
+                            <chrom>:<start>..<end>
       --no_cds              Flag. If set, not CDS information will be printed out
-                            in the GFF output files. (default: False)
+                            in the GFF output files.
       --flank FLANK         Flanking distance (in bps) to group non-overlapping
                             transcripts into a single superlocus. Default:
-                            determined by the configuration file. (default: None)
+                            determined by the configuration file.
+      --max-intron-length MAX_INTRON_LENGTH
+                            Maximum intron length for a transcript. Default:
+                            inferred from the configuration file (default value
+                            there is 1,000,000 bps).
       --no-purge            Flag. If set, the pipeline will NOT suppress any loci
                             whose transcripts do not pass the requirements set in
-                            the JSON file. (default: False)
+                            the JSON file.
       --cds-only            "Flag. If set, Mikado will only look for overlap in
                             the coding features when clustering transcripts
                             (unless one transcript is non-coding, in which case
-                            the whole transcript will be considered). Default:
-                            False, Mikado will consider transcripts in their
-                            entirety. (default: False)
+                            the whole transcript will be considered). Please note
+                            that Mikado will only consider the **best** ORF for
+                            this. Default: False, Mikado will consider transcripts
+                            in their entirety.
+      --as-cds-only         Flag. If set, Mikado will only consider the CDS to
+                            determine whether a transcript is a valid alternative
+                            splicing event in a locus.
+      --reference-update    Flag. If switched on, Mikado will prioritise
+                            transcripts marked as reference and will consider any
+                            other transcipt within loci only in reference to these
+                            reference transcripts. Novel loci will still be
+                            reported.
+      --report-all-orfs     Boolean switch. If set to true, all ORFs will be
+                            reported, not just the primary.
       --only-reference-update
                             Flag. If switched on, Mikado will only keep loci where
                             at least one of the transcripts is marked as
-                            "reference". CAUTION: new and experimental. If no
-                            transcript has been marked as reference, the output
-                            will be completely empty! (default: None)
-      --consider-truncated-for-retained
-                            Flag. If set, Mikado will consider as retained intron
-                            events also transcripts which lack UTR but whose CDS
-                            ends within a CDS intron of another model. (default:
-                            False)
+                            "reference". CAUTION: if no transcript has been marked
+                            as reference, the output will be completely empty!
+      -eri, --exclude-retained-introns
+                            Exclude all retained intron alternative splicing
+                            events from the final output. Default: False. Retained
+                            intron events that do not dirsupt the CDS are kept by
+                            Mikado in the final output.
+      -kdc, --keep-disrupted-cds
+                            Keep in the final output transcripts whose CDS is most
+                            probably disrupted by a retained intron event.
+                            Default: False. Mikado will try to detect these
+                            instances and exclude them from the final output.
+      -mco MIN_CLUSTERING_CDNA_OVERLAP, --min-clustering-cdna-overlap MIN_CLUSTERING_CDNA_OVERLAP
+                            Minimum cDNA overlap between two transcripts for them
+                            to be considered part of the same locus during the
+                            late picking stages. NOTE: if --min-cds-overlap is not
+                            specified, it will be set to this value! Default: 20%.
+      -mcso MIN_CLUSTERING_CDS_OVERLAP, --min-clustering-cds-overlap MIN_CLUSTERING_CDS_OVERLAP
+                            Minimum CDS overlap between two transcripts for them
+                            to be considered part of the same locus during the
+                            late picking stages. NOTE: if not specified, and
+                            --min-cdna-overlap is specified on the command line,
+                            min-cds-overlap will be set to this value! Default:
+                            20%.
+      --check-references    Flag. If switched on, Mikado will also check reference
+                            models against the general transcript requirements,
+                            and will also consider them as potential fragments.
+                            This is useful in the context of e.g. updating an *ab-
+                            initio* results with data from RNASeq, protein
+                            alignments, etc.
       -db SQLITE_DB, --sqlite-db SQLITE_DB
                             Location of an SQLite database to overwrite what is
-                            specified in the configuration file. (default: None)
-      -od OUTPUT_DIR, --output-dir OUTPUT_DIR
-                            Output directory. Default: current working directory
-                            (default: None)
+                            specified in the configuration file.
       --single              Flag. If set, Creator will be launched with a single
-                            process. Useful for debugging purposes only. (default:
-                            False)
+                            process, without involving the multithreading
+                            apparatus. Useful for debugging purposes only.
       --mode {nosplit,stringent,lenient,permissive,split}
                             Mode in which Mikado will treat transcripts with
                             multiple ORFs. - nosplit: keep the transcripts whole.
@@ -423,28 +480,34 @@ Usage::
                             BLAST hit (but not both). - permissive: like lenient,
                             but also split when both ORFs lack BLAST hits - split:
                             split multi-orf transcripts regardless of what BLAST
-                            data is available. (default: None)
+                            data is available.
+      --seed SEED           Random seed number. Default: 0.
+      --random-seed         Generate a new random seed number (instead of the
+                            default of 0)
 
     Options related to the output files.:
-      --subloci_out SUBLOCI_OUT
-      --monoloci_out MONOLOCI_OUT
-      --loci_out LOCI_OUT   This output file is mandatory. If it is not specified
+      -od OUTPUT_DIR, --output-dir OUTPUT_DIR
+                            Output directory. Default: current working directory
+      --subloci-out SUBLOCI_OUT
+      --monoloci-out MONOLOCI_OUT
+      --loci-out LOCI_OUT   This output file is mandatory. If it is not specified
                             in the configuration file, it must be provided here.
-                            (default: None)
-      --prefix PREFIX       Prefix for the genes. Default: Mikado (default: None)
-      --source SOURCE       Source field to use for the output files. (default:
-                            None)
+      --prefix PREFIX       Prefix for the genes. Default: Mikado
+      --source SOURCE       Source field to use for the output files.
+      --report-all-external-metrics
+                            Boolean switch. If activated, Mikado will report all
+                            available external metrics, not just those requested
+                            for in the scoring configuration. This might affect
+                            speed in Minos analyses.
 
     Log options:
       -l LOG, --log LOG     File to write the log to. Default: decided by the
-                            configuration file. (default: None)
-      -v, --verbose         Flag. If set, the debug mode will be activated.
-                            (default: False)
-      -nv, --noverbose      Flag. If set, the log will report only errors and
-                            critical events. (default: False)
+                            configuration file.
+      --verbose
+      --quiet
       -lv {DEBUG,INFO,WARNING,ERROR,CRITICAL}, --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
                             Logging level. Default: retrieved by the configuration
-                            file. (default: None)
+                            file.
 
 
 
diff --git a/docs/Usage/Prepare.rst b/docs/Usage/Prepare.rst
index 67158f3a2..7958ac265 100644
--- a/docs/Usage/Prepare.rst
+++ b/docs/Usage/Prepare.rst
@@ -15,99 +15,166 @@ This is the first executive step of the Mikado pipeline. It will accomplish the
 Usage
 ~~~~~
 
-``Mikado prepare`` allows to override some of the parameters present in the configuration file through command line options, eg. the input files. Notwithstanding, in the interest of reproducibility we advise to configure everything through the configuration file and supply it to Mikado prepare without further modifications.
+``Mikado prepare`` allows to override some of the parameters present in the configuration file through command line
+options, eg. the input files. Notwithstanding, in the interest of reproducibility we advise to configure everything
+through the configuration file and supply it to Mikado prepare without further modifications.
 
 Available parameters:
 
-* *json-conf*: the most important parameter. This is the configuration file created through :ref:`Mikado configure <configure>`.
+* *configuration*: the most important parameter. This is the configuration file created through
+  :ref:`Mikado configure <configure>`.
 * *fasta*: reference genome. Required, either through the command line or through the configuration file.
 * *out*: Output GTF file, with the collapsed transcripts.
 * *out_fasta*: Output FASTA file of the collapsed transcripts.
 * *start-method*: :ref:`multiprocessing start method <start-methods>`.
-* *verbose*, *quiet*: flags to set the verbosity of Mikado prepare. It is generally not advised to turn the verbose mode on, unless there is a problem to debug, given the verbosity of the output.
+* *verbose*, *quiet*: flags to set the verbosity of Mikado prepare. It is generally not advised to turn the verbose mode
+  on, unless there is a problem to debug, given the verbosity of the output.
 * *strand-specific*: If set, all assemblies will be treated as strand-specific.
 * *strand-specific-assemblies*: comma-separated list of strand specific assemblies.
-* *list*: in alternative to specifying all the information on the command line, it is possible to give to Mikado a *tab-separated* file with the following contents:
-
-   #. Location of the file
-   #. label for the file
-   #. whether that assembly is strand-specific or not (write True/False)
-   #. Optionally, a bonus/malus to be associated to transcripts coming from that assembly.
+* *strip-cds*: some aligners (eg GMAP) will try calculate a CDS on the fly for alignments. Use this flag to discard all
+  CDS information from all input transcripts.
+* *exclude-redundant*: if set, this flag instructs Mikado to look for and simplify redundant intron chains. By default,
+  this option is disabled, or enabled on a per-sample basis. See :ref:`this section for an explanation of redundancy
+  removal in Mikado <redundant-transcripts-in-prepare>`, and the section on the list input files below for an explanation
+  on how to set this value on a per-sample basis (recommended).
+* *codon-table*: Mikado prepare will check the validity of the ORFs of input models. This value indicates which codon
+  table Mikado should use for this purpose. See the section on :ref:`the checks on CDSs <orf-check-prepare>`.
+* *strip-faulty-cds*: when encountering a transcript with an invalid ORF due to e.g. in-frame stop codons, Mikado will
+  usually discard the whole transcript. If this flag is set, Mikado will instead remove the CDS information and leave
+  the transcript in place.
+* *list*: in alternative to specifying all the information on the command line, it is possible to give to Mikado a
+  *tab-separated* file with details of the files to use. See :ref:`this section for details <input_file_list>`.
 * *log*: log file. Optional, by default Mikado will print to standard error.
-* *lenient*: flag. If set, transcripts without any canonical splice site will be output as well. By default, they would be discarded.
+* *lenient*: flag. If set, multiexonic transcripts without any canonical splice site will be output as well. By default,
+  they would be discarded.
+* *minimum-cdna-length*: minimum length of the transcripts to be kept, default 200 bps.
+* *max-intron-size*: maximum length of introns for non-reference transcripts, default 1,000,000 bps. Transcripts with
+  introns longer than this will be split into multiple pieces. **Note**: transcripts marked as "reference" will never
+  be split in this way; Mikado will simply emit a warning in the log.
+* *seed*: integer seed to use for reproducibility. By default, Mikado will use the seed set in the configuration file.
+* *random-seed*: boolean switch. If selected, Mikado will use a random seed selected at runtime (and reported in the
+  log)
 * *single*: flag that disables multiprocessing. Mostly useful for debug purposes.
-* *strip-cds*: some aligners (eg GMAP) will try calculate a CDS on the fly for alignments. Use this flag to discard such CDS sections and retain only the cDNA information.
-* *minimum_length*: minimum length of the transcripts to be kept.
 
 Command line usage:
 
 .. code-block:: bash
 
     $ mikado prepare --help
-    usage: Mikado prepare [-h] [--fasta FASTA] [-v | -q]
-                          [--start-method {fork,spawn,forkserver}]
-                          [-s | -sa STRAND_SPECIFIC_ASSEMBLIES] [--list LIST]
-                          [-l LOG] [--lenient] [-m MINIMUM_LENGTH] [-p PROCS]
-                          [-scds] [--labels LABELS] [--single] [-od OUTPUT_DIR]
-                          [-o OUT] [-of OUT_FASTA] [--json-conf JSON_CONF] [-k]
-                          [gff [gff ...]]
-    
+    usage: Mikado prepare [-h] [--fasta REFERENCE] [--verbose | --quiet | -lv {DEBUG,INFO,WARN,ERROR}] [--start-method {fork,spawn,forkserver}] [-s | -sa STRAND_SPECIFIC_ASSEMBLIES] [--list LIST] [-l LOG]
+                      [--lenient] [-m MINIMUM_CDNA_LENGTH] [-MI MAX_INTRON_LENGTH] [-p PROCS] [-scds] [--labels LABELS] [--codon-table CODON_TABLE] [--single] [-od OUTPUT_DIR] [-o OUT] [-of OUT_FASTA]
+                      [--configuration CONFIGURATION] [-er] [--strip-faulty-cds] [--seed SEED | --random-seed]
+                      [gff [gff ...]]
+
     positional arguments:
       gff                   Input GFF/GTF file(s).
-    
+
     optional arguments:
       -h, --help            show this help message and exit
-      --fasta FASTA         Genome FASTA file. Required.
-      -v, --verbose
-      -q, --quiet
+      --fasta REFERENCE, --reference REFERENCE
+                            Genome FASTA file. Required.
+      --verbose
+      --quiet
+      -lv {DEBUG,INFO,WARN,ERROR}, --log-level {DEBUG,INFO,WARN,ERROR}
+                            Log level. Default: derived from the configuration; if absent, INFO
       --start-method {fork,spawn,forkserver}
                             Multiprocessing start method.
       -s, --strand-specific
-                            Flag. If set, monoexonic transcripts will be left on
-                            their strand rather than being moved to the unknown
-                            strand.
+                            Flag. If set, monoexonic transcripts will be left on their strand rather than being moved to the unknown strand.
       -sa STRAND_SPECIFIC_ASSEMBLIES, --strand-specific-assemblies STRAND_SPECIFIC_ASSEMBLIES
                             Comma-delimited list of strand specific assemblies.
-      --list LIST           Tab-delimited file containing rows with the following format:
-<file>  <label> <strandedness, def. False> <score(optional, def. 0)> <is_reference(optional, def. False)> <keep_redundant(optional, def. True)>
-strandedness, is_reference and keep_redundant must be boolean values (True, False)
-score must be a valid floating number.
+      --list LIST           Tab-delimited file containing rows with the following format: <file> <label> <strandedness(def. False)> <score(optional, def. 0)> <is_reference(optional, def. False)>
+                            <exclude_redundant(optional, def. True)> <strip_cds(optional, def. False)> <skip_split(optional, def. False)> "strandedness", "is_reference", "exclude_redundant", "strip_cds" and
+                            "skip_split" must be boolean values (True, False) "score" must be a valid floating number.
       -l LOG, --log LOG     Log file. Optional.
-      --lenient             Flag. If set, transcripts with only non-canonical
-                            splices will be output as well.
-      -m MINIMUM_LENGTH, --minimum_length MINIMUM_LENGTH
+      --lenient             Flag. If set, transcripts with only non-canonical splices will be output as well.
+      -m MINIMUM_CDNA_LENGTH, --minimum-cdna-length MINIMUM_CDNA_LENGTH
                             Minimum length for transcripts. Default: 200 bps.
+      -MI MAX_INTRON_LENGTH, --max-intron-size MAX_INTRON_LENGTH
+                            Maximum intron length for transcripts. Default: 1,000,000 bps.
       -p PROCS, --procs PROCS
                             Number of processors to use (default None)
       -scds, --strip_cds    Boolean flag. If set, ignores any CDS/UTR segment.
-      --labels LABELS       Labels to attach to the IDs of the transcripts of the
-                            input files, separated by comma.
-      --single              Disable multi-threading. Useful for debugging.
+      --labels LABELS       Labels to attach to the IDs of the transcripts of the input files, separated by comma.
+      --codon-table CODON_TABLE
+                            Codon table to use. Default: 0 (ie Standard, NCBI #1, but only ATG is considered a valid start codon.
+      --single, --single-thread
+                            Disable multi-threading. Useful for debugging.
       -od OUTPUT_DIR, --output-dir OUTPUT_DIR
                             Output directory. Default: current working directory
       -o OUT, --out OUT     Output file. Default: mikado_prepared.gtf.
       -of OUT_FASTA, --out_fasta OUT_FASTA
                             Output file. Default: mikado_prepared.fasta.
-      --json-conf JSON_CONF
+      --configuration CONFIGURATION, --json-conf CONFIGURATION
                             Configuration file.
-      -k, --keep-redundant  Boolean flag. If invoked, Mikado prepare will retain
-                            redundant models.
+      -er, --exclude-redundant
+                            Boolean flag. If invoked, Mikado prepare will exclude redundant models,ignoring the per-sample instructions.
+      --strip-faulty-cds    Flag. If set, transcripts with an incorrect CDS will be retained but with their CDS stripped. Default behaviour: the whole transcript will be considered invalid and discarded.
+      --seed SEED           Random seed number. Default: 0.
+      --random-seed         Generate a new random seed number (instead of the default of 0)
 
 
 Collection of transcripts from the annotation files
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Different assemblers will produce data in different formats, typically in GFF or GTF format, and not necessarily in the same order (if any is present). Mikado will serialise the transcripts from these files and port them all into a standard GTF format. Moreover, it will ensure that each transcript ID appears only once across the input files. The optional labels provided for each file will be attached to the transcript names as prefixes, and used as the source field in the output GTF, to ensure the uniqueness of each transcript name.
+Different assemblers will produce data in different formats, typically in GFF or GTF format, and not necessarily in the
+same order (if any is present). Mikado will serialise the transcripts from these files and port them all into a standard
+GTF format. Moreover, it will ensure that each transcript ID appears only once across the input files. The optional labels
+provided for each file will be attached to the transcript names as prefixes, and used as the source field in the output GTF,
+to ensure the uniqueness of each transcript name.
 If two or more transcripts are found to be identical, only one will be retained, chosen at random among all the possibilities.
-In addition to this, Mikado prepare will also sort the transcripts by coordinate, irrespective of strand, so that they are suitably displayed for the divide-et-impera algorithm of :ref:`Mikado pick <pick>`.
-
-When two or more identical transcripts are present in a locus, Mikado will use the (optionally provided) source score to select the *a priori* best assembly amongst the choices.
-For example, if a mikado prepare run comprises both PacBio reads and Illumina assemblies and the experimenter has given a score of 1 or more to the former dataset but not the latter, if a PacBio read is present together with a stringtie assembly, the PacBio read will always be selected over the StringTie.
-Please note that this "score-based" selection ***only operates for transcripts that are identical**. No other selection is performed at this stage.
-
-.. warning:: To be considered *identical*, two transcripts must match down to the last base pair. A simple match or containment of the intron chain will not suffice. This is because using the cDNA data alone it is difficult to understand whether the longer form(s) is the correct assembly rather than a chimera or a trans-splice event.
-.. note:: From version 1.3 onwards, Mikado considers the CDS as well when performing the redundancy check. So, two transcripts having the same coordinates but different CDS (because of non-overlapping ORFs or disagrement on the frame and/or start codon position) will be kept as non-redundant.
-.. note:: Transcripts that are considered to come from a "reference" assembly are never going to be excluded, and will always be prioritised over other assemblies.
+In addition to this, Mikado prepare will also sort the transcripts by coordinate, irrespective of strand, so that they
+are suitably displayed for the divide-et-impera algorithm of :ref:`Mikado pick <pick>`.
+
+When two or more identical transcripts are present in a locus, Mikado will use the (optionally provided) source score
+to select the *a priori* best assembly amongst the choices.
+For example, if a mikado prepare run comprises both PacBio reads and Illumina assemblies and the experimenter has given
+a score of 1 or more to the former dataset but not the latter, if a PacBio read is present together with a stringtie
+assembly, the PacBio read will always be selected over the StringTie.
+Please note that this "score-based" selection ***only operates for transcripts that are identical**. No other selection
+is performed at this stage.
+
+.. warning:: To be considered *identical*, two transcripts must match down to the last base pair. A simple match or
+             containment of the intron chain will not suffice. This is because using the cDNA data alone it is difficult
+             to understand whether the longer form(s) is the correct assembly rather than a chimera or a trans-splice event.
+.. note:: From version 1.3 onwards, Mikado considers the CDS as well when performing the redundancy check. So, two
+          transcripts having the same coordinates but different CDS (because of non-overlapping ORFs or disagrement on
+          the frame and/or start codon position) will be kept as non-redundant.
+.. note:: Transcripts that are considered to come from a "reference" assembly are never going to be excluded, and will
+          always be prioritised over other assemblies.
+
+
+.. _redundant-transcripts-in-prepare:
+
+Removal of redundant transcripts
+--------------------------------
+
+Many third-party tools, e.g. gffread [GffRead]_, try to simplify transcript assemblies by grouping transcripts according
+to their intron chains and then keeping only one transcript per group, usually the longest. This removes transcripts with
+identical intron chains as well as transcripts whose intron chain is completely contained within another one in the group.
+In most cases, Mikado explicitly does **not** take this approach because, especially with RNASeq assemblies, longer
+transcripts might not necessarily be the most correct; rather, in a non-negligible portion of cases, longer transcripts
+might have originated by an artefactual fusion of two different, neighbouring transcripts. The implicit assumption made
+by e.g. gffread (that shorter transcripts are the result of fragmentation of the longer transcripts) would therefore lead to incorrect assemblies.
+The default approach taken by Mikado, therefore, is to identify cases where transcripts are **completely** identical
+(both in terms of cDNA and CDS, if kept), and only remove redundancies in those rare, specific cases.
+
+In certain situations, however, a strategy based on intron chain redundancy like in gffread might be warranted. Specifically:
+
+- in long read datasets (e.g. PacBio or ONT alignments) the implicit assumption made by gffread is valid: in these cases
+  it is safe to assume that fragmentation during RNA extraction and library preparation would constitute the main origin
+  of redundancy.
+- when dealing with massive transcript datasets (>=5-10 million transcripts), removing excess transcripts might be necessary
+  to keep the analysis manageable, at the cost of slightly reduced accuracy.
+
+Mikado allows to perform this more extensive redundancy removal either on a *per-analysis* or *per-sample* (recommended) basis.
+When scanning the transcript assemblies, Mikado will look for intron chains that are completely contained within another.
+When such an occurence arises, *if and only if Mikado has been instructed to remove redundant cases*, Mikado will do the following:
+
+- if one of the two transcripts comes from a sample for which the redundancy removal is disabled (including, automatically,
+  all "reference" samples), it will always be kept.
+- if the transcript marked for redundancy check and removal has a lower baseline score *or* is contained within the other
+  transcript, it will be marked for removal.
 
 
 Check on strand correctness
@@ -117,20 +184,58 @@ During its run, Mikado prepare will also check the correctness of the transcript
 
 * Unless the assembly is marked as strand-specific, any monoexonic transcript will have its strand *removed*.
 * If a transcript contains canonical splice junctions on **both** strands, it will be completely removed
-* If a transcript contains only non-canonical splice junctions, it will be removed *unless* the "lenient" option is specified either at the command line or in the configuration file.
+* If a transcript contains only non-canonical splice junctions, it will be removed *unless* the "lenient" option is
+  specified either at the command line or in the configuration file.
+
+The couples of splice acceptors and donors which are considered as canonical :ref:`can be specified in the configuration
+file <canonical-configuration>`. By default, Mikado will consider as canonical both properly canonical splicing event
+(GT-AG) as well as the semi-canonical events (GC-AG, AT-AC). Any other couple will be considered as non-canonical.
 
-The couples of splice acceptors and donors which are considered as canonical :ref:`can be specified in the configuration file <canonical-configuration>`. By default, Mikado will consider as canonical both properly canonical splicing event (GT-AG) as well as the semi-canonical events (GC-AG, AT-AC). Any other couple will be considered as non-canonical.
+.. warning:: Mikado will check the strand of each junction inside a transcript *independently*. Therefore, if a transcript
+             with 9 junctions on the plus strand is found to have a non-canonical splicing junction **which happens to be
+             the reverse of a canonical one** (eg. CT-AC), it will deem this junction as misassigned to the wrong strand
+             and flip it to the minus strand. In this example, the transcript will therefore be **considered as an error**
+             as it contains both + and - junctions. If Mikado prepare is running in *lenient* mode, these transcripts will
+             be marked as having a "suspicious_splicing" and kept in the output. Otherwise, they will be discarded outright.
 
-.. warning:: Mikado will check the strand of each junction inside a transcript *independently*. Therefore, if a transcript with 9 junctions on the plus strand is found to have a non-canonical splicing junction **which happens to be the reverse of a canonical one** (eg. CT-AC), it will deem this junction as misassigned to the wrong strand and flip it to the minus strand. In this example, the transcript will therefore be **considered as an error** as it contains both + and - junctions, and discarded.
+.. warning:: The same considerations as above apply to transcripts that have *no* recognisable strand for their junctions.
+             Such transcripts will be removed outright if *lenient* is False.
 
-.. note:: Starting from Mikado version **1.3**, transcripts can be tagged as being from an assembly of "reference" quality. This implies that:
+.. note:: Starting from Mikado version **1.3**, transcripts can be tagged as being from an assembly of "reference" quality.
+This implies that:
 
 * A transcript which is marked as “reference” will never have its CDS stripped
-* A transcript which is marked as “reference” will never be marked for removal due to redundancy, even if there are multiple copies of it, or if other assemblies with a higher score have identical transcripts (normally only one transcript would be retained, and that would be chosen amongst the highest scoring assemblies)
+* A transcript which is marked as “reference” will never be marked for removal due to redundancy, even if there are multiple
+  copies of it, or if other assemblies with a higher score have identical transcripts (normally only one transcript would
+  be retained, and that would be chosen amongst the highest scoring assemblies)
 * A transcript which is marked as reference will never have its strand removed or flipped.
 
 Please see the :ref:`configuration help page <configure>` for details.
 
+.. _orf-check-prepare:
+
+Check on ORF correctness
+------------------------
+
+Mikado will check that the input transcripts have a formally valid ORF both in terms of the structure as well as of its
+sequence.
+
+By "formally correct ORF structure", Mikado means that:
+
+- CDS segments must be contained within declared exons
+- CDS segments should not overlap each other or any declared UTR segment
+- there should be no gap between CDS segments on a transcript's cDNA
+- including the initial phase, the length of the CDS should be a multiple of three.
+
+By "formally correct ORF sequence", Mikado means that:
+
+- there are no internal in-frame stop codons
+
+To perform the latter check, Mikado will use the specified codon table (by default "0", ie the NCBI Standard codon table
+but only considering "ATG" as a valid start codon).
+
+Finally, Mikado will infer whether the transcript has a start and/or stop codon (again, using the selected codon table)
+and tag it appropriately.
 
 Output files
 ------------
@@ -140,4 +245,10 @@ Mikado prepare will produce two files:
 * a *sorted* GTF file, containing all the transcripts surviving the checks
 * a FASTA file of the transcripts, in the proper cDNA orientation.
 
-.. warning:: contrary to other tools such as eg gffread from Cufflinks [Cufflinks]_, Mikado prepare will **not** try to calculate the loci for the transcripts. This task will be performed later in the pipeline. As such, the GTF file is formally incorrect, as multiple transcripts in the same locus but coming from different assemblies will *not* have the same gene_id but rather will have kept their original one. Moreover, if two gene_ids were identical but discrete in the input files (ie located on different sections of the genome), this error will not be corrected. If you desire to use this GTF file for any purpose, please use a tool like gffread to calculate the loci appropriately.
+.. warning:: contrary to other tools such as eg gffread [GffRead]_, Mikado prepare will **not** try to calculate the
+             loci for the transcripts. This task will be performed later in the pipeline. As such, the GTF file is
+             formally incorrect, as multiple transcripts in the same locus but coming from different assemblies will
+             *not* have the same gene_id but rather will have kept their original one. Moreover, if two gene_ids were
+             identical but discrete in the input files (ie located on different sections of the genome), this error will
+             not be corrected. If you desire to use this GTF file for any purpose, please use a tool like gffread to
+             calculate the loci appropriately.
diff --git a/docs/Usage/Serialise.rst b/docs/Usage/Serialise.rst
index b54e8d2de..2e1aecd82 100644
--- a/docs/Usage/Serialise.rst
+++ b/docs/Usage/Serialise.rst
@@ -15,31 +15,49 @@
 Mikado serialise
 ================
 
-Mikado integrates data from multiple sources to select the best transcripts. During this step, these sources are brought together inside a common database, simplifying the process of retrieving them at runtime. Currently, Mikado integrates three or more different types of data:
+Mikado integrates data from multiple sources to select the best transcripts. During this step, these sources are brought
+together inside a common database, simplifying the process of retrieving them at runtime. Currently, Mikado integrates
+three or more different types of data:
 
 #. :ref:`reliable junctions <serialise_input_junctions>`, in BED12_ format
-#. :ref:`ORF data <serialise_input_orfs>`, currently identified using TransDecoder_ or Prodigal_. Any program capable of generating it in BED12_ or GFF3_ format is suitable.
+#. :ref:`ORF data <serialise_input_orfs>`, currently identified using TransDecoder_ or Prodigal_. Any program capable of
+   generating it in BED12_ or GFF3_ format is suitable.
 #. :ref:`BLASTX data <serialise_input_blast>` (from [Blastplus]_ or [Diamond]_)
 #. :ref:`Miscellaneous numeric scores <serialise_input_external>` can also be given as input to Mikado in a tabular file
 
-After serialisation in the database, these sources will be available to use for any subsequent Mikado pick run. Having the data present in the database allows to run Mikado with multiple configurations and little overhead in terms of pre-loading data; this feature is taken directly advantage of in :ref:`Daijin`, where it is possible to run Mikado using multiple modes.
+After serialisation in the database, these sources will be available to use for any subsequent Mikado pick run. Having
+the data present in the database allows to run Mikado with multiple configurations and little overhead in terms of
+pre-loading data; this feature is taken directly advantage of in :ref:`Daijin`, where it is possible to run Mikado using
+multiple modes.
 
 Mikado serialise can use three different SQL databases as backends - SQLite, MySQL and PostgreSQL - thanks to SQLAlchemy_.
-This step, together with the creation of the TransDecoder and BLAST data, is the most time consuming of the pipeline. In particular, although Mikado serialise will try to analyse the XML data in a parallelised fashion if so instructed, the insertion of the data in the database will still happen in a single thread and will therefore be of limited speed. If using SQLite as database (the default option), it is possible to decrease the runtime by modifying the "max_objects" parameters, at the cost however of increased RAM usage.
+This step, together with the creation of the TransDecoder and BLAST data, is the most time consuming of the pipeline.
+In particular, although Mikado serialise will try to analyse the XML data in a parallelised fashion if so instructed,
+the insertion of the data in the database will still happen in a single thread and will therefore be of limited speed.
+If using SQLite as database (the default option), it is possible to decrease the runtime by modifying the "max_objects"
+parameters, at the cost however of increased RAM usage.
 
-.. important:: The schema of Mikado databases changed with version 1.3. Any database created prior to this version **should be regenerated**, otherwise Mikado pick will fail.
+.. important:: The schema of Mikado databases changed with version 1.3. Any database created prior to this version
+               **should be regenerated**, otherwise Mikado pick will fail.
 
 .. _BED12-sidebar:
 .. sidebar:: BED12_ files
 
-    During serialisation, Mikado interprets BED12 files as having the relevant information in the **thick-start**, **thick-end** fields, ie columns 7 and 8. For ORF files, this means that the CDS should start at the 7th column and end at the 8th (stop-codon exclusive, like in TransDecoder_) - or viceversa for ORFs on the negative strand. For junction files, it means that the reliable junction, ie the reliable *intron*, starts at the thick-start position and ends at the thick-end position. This format requirement is the opposite of what happens for example in the junctions produced by `TopHat <http://ccb.jhu.edu/software/tophat/index.shtml>`_, where the BED12 file lists the two *exonic* fragments, rather than the intron. STAR_ instead provides a tabular non-standard file indicating the most reliable junctions in the alignment. Portcullis_ provides utilities to convert such files into the format used by Mikado, and to merge together multiple junction files. However, we recommend running Portcullis directly on the alignments, rather than using the junctions indicated by the programs themselves.
+    During the serialisation of BED12 ORF files (such as those created by TransDecoder_), Mikado will interpret the
+    **thick-start**, **thick-end** columns (7th and 8th respectively) as the beginning and end of the CDS.
+    During the serialisation of junction files, Mikado will instead calculate the positions of the introns by inferring
+    them from the BED12 blocks.
+    This means that Mikado can interpret junctions coming directly from the aligner (e.g. TopHat2_), from a dedicated
+    tool e.g. Portcullis_, but also, it can infer and mark as reliable the introns coming from e.g. a set of manually
+    curated genes in BED12 format.
 
 .. _serialise_input_blast:
 
 BLAST files
 ~~~~~~~~~~~
 
-Mikado serialise is capable of using homology results derived from either BLAST+ [Blastplus]_ or DIAMOND. Further, it is capable of analysing results provided in multiple formats:
+Mikado serialise is capable of using homology results derived from either BLAST+ [Blastplus]_ or DIAMOND. Further,
+it is capable of analysing results provided in multiple formats:
 
 - Tabular BLAST file. **This is now the preferred format**. The file needs to have been created with the following field specification:
   - "qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore ppos btop"
@@ -47,42 +65,63 @@ Mikado serialise is capable of using homology results derived from either BLAST+
 - ASN files, i.e. the BLAST+ archive format. These will be converted on-the-fly to tabular files with the appropriate keys.
 - DAA files, i.e. the DIAMOND archive format. These will be converted on-the-fly to tabular files with the appropriate keys.
 
-.. warning:: ASN files will need BLAST+ to be installed in the same environment/instance of Mikado, otherwise the on-the-fly conversion will fail. Likewise, DAA files require DIAMOND to be installed in the same environment/instance of Mikado.
+.. warning:: ASN files will need BLAST+ to be installed in the same environment/instance of Mikado, otherwise the
+             on-the-fly conversion will fail. Likewise, DAA files require DIAMOND to be installed in the same
+             environment/instance of Mikado.
 
-All possible input files can be optionally compressed using GZIP or BZIP2; Mikado will handle the conversion. The only exception are DAA files, as these are already compressed by default.
+All possible input files can be optionally compressed using GZIP or BZIP2; Mikado will handle the conversion. The only
+exception are DAA files, as these are already compressed by default.
 
-.. hint:: The most expensive operation in a "Mikado serialise" run is by far the serialisation of BLAST XML files. Using tabular files significantly speeds up the process, as they are much smaller and faster to read and parse. In case XML files are used, splitting the input files in multiple chunks, and analysing them separately, allows for better parallelisation. If a single monolythic XML/ASN file is produced, by contrast, Mikado will be quite slow.
+.. hint:: The most expensive operation in a "Mikado serialise" run is by far the serialisation of BLAST XML files.
+          Using tabular files significantly speeds up the process, as they are much smaller and faster to read and parse.
+          In case XML files are used, splitting the input files in multiple chunks, and analysing them separately,
+          allows for better parallelisation. If a single monolythic XML/ASN file is produced, by contrast, Mikado
+          will be quite slow.
 
 .. _serialise_input_orfs:
 
 Transdecoder ORFs
 ~~~~~~~~~~~~~~~~~
 
-When Mikado analyses ORFs produced by TransDecoder_, Prodigal_ or equivalent program, it performs additionally the following checks:
+When Mikado analyses ORFs produced by TransDecoder_, Prodigal_ or equivalent program, it performs additionally the
+following checks:
 
 #. Check the congruence between the length of the transcript in the BED12 file and that found in the FASTA file
 #. Check that the ORF does not contain internal stop codons
 #. Check that the CDS length is valid, ie a multiple of 3, if the ORF is complete
-#. Optionally, if the ORF is open on the 5' side, Mikado can try to find an internal start codon. See :ref:`this section <max-regression>` for details.
+#. Optionally, if the ORF is open on the 5' side, Mikado can try to find an internal start codon.
+   See :ref:`this section <max-regression>` for details.
 
-Mikado can accept the data in GFF3_ format (the standard for Prodigal_) or :ref:`BED12 <BED12-sidebar>` (preferred for TransDecoder_).
+Mikado can accept the data in GFF3_ format (the standard for Prodigal_) or :ref:`BED12 <BED12-sidebar>`
+(preferred for TransDecoder_).
 
 .. _serialise_input_junctions:
 
 Reliable junctions
 ~~~~~~~~~~~~~~~~~~
 
-Some aligners (e.g. STAR_) are capable of returning a list of splice junctions, inferred by the read alignments, that they repute to be extremely reliable. Likewise, our tool Portcullis_ is capable of analysing one or multiple BAM_ alignment files and return a list of junctions that are well-supported by the data (e.g. high coverage of diverse reads around the junction, with few or no mismatches) and therefore to be considered more trustworthy from the rest. Mikado actively makes use of this splice junction data to score and select transcripts.
+Some aligners (e.g. STAR_) are capable of returning a list of splice junctions, inferred by the read alignments,
+that they repute to be extremely reliable. Likewise, our tool Portcullis_ is capable of analysing one or multiple
+BAM_ alignment files and return a list of junctions that are well-supported by the data (e.g. high coverage of diverse
+reads around the junction, with few or no mismatches) and therefore to be considered more trustworthy from the rest.
+Mikado actively makes use of this splice junction data to score and select transcripts.
 
-We require the data to be provided in BED12_ format (see :ref:`sidebar <BED12-sidebar>` for Mikado-specific details). Portcullis_ provides utilities for converting junction data and/or merge multiple datasets into a single file, through its *junctools* command line utility.
+We require the data to be provided in BED12_ format (see :ref:`sidebar <BED12-sidebar>` for Mikado-specific details).
+Portcullis_ provides utilities for converting junction data and/or merge multiple datasets into a single file,
+through its *junctools* command line utility.
+
+Additionally, Mikado is capable of interpreting BED12 files of manually curated genes as a source of reliable junctions.
+Please see the :ref:`sidebar <BED12-sidebar>` for details.
 
 .. _serialise_input_external:
 
 Additional scores
 ~~~~~~~~~~~~~~~~~
 
-Aside from the above datasets, Mikado can further integrate scores from different sources, such as structural coherence with known annotations from other species, coding-potential calculations, or expression data.
-To do so, Mikado needs a **tab-delimited** file with **all** transcript IDs present in the first column, which should be marked as *tid*.
+Aside from the above datasets, Mikado can further integrate scores from different sources, such as structural coherence
+with known annotations from other species, coding-potential calculations, or expression data.
+To do so, Mikado needs a **tab-delimited** file with **all** transcript IDs present in the first column, which should
+be marked as *tid*.
 As an example of a valid file, with two columns (**tpm** ie expression data, and **CPC** ie coding potential):
 
 .. csv-table::
@@ -102,29 +141,38 @@ Our pipeline Minos_, in development, creates and uses such tables to prioritise
 Usage
 ~~~~~
 
-``mikado serialise`` allows to override some of the parameters present in the configuration file through command line options, eg the input files. Notwithstanding, in the interest of reproducibility we advise to configure everything through the configuration file and supply it to Mikado prepare without further modifications.
+``mikado serialise`` allows to override some of the parameters present in the configuration file through command
+line options, eg the input files. Notwithstanding, in the interest of reproducibility we advise to configure everything
+through the configuration file and supply it to Mikado prepare without further modifications.
 
 Available parameters:
 
 * Parameters related to performance:
 
-    - *start-method*: one of fork, spawn, forkserver. It determines the multiprocessing start method. By default, Mikado will use the default for the system (fork on UNIX, spawn on Windows).
+    - *start-method*: one of fork, spawn, forkserver. It determines the multiprocessing start method.
+      By default, Mikado will use the default for the system (fork on UNIX, spawn on Windows).
     - *procs*: Number of processors to use.
     - *single-thread*: flag. If set, Mikado will disable all multithreading.
-    - *max_objects*: Maximum number of objects to keep in memory before committing to the database. See :ref:`this section of the configuration <max-objects>` for details.
+    - *max_objects*: Maximum number of objects to keep in memory before committing to the database.
+      See :ref:`this section of the configuration <max-objects>` for details.
 
 * Basic input data and settings:
 
     - *output-dir*: directory where the SQLite database and the log will be written to.
-    - *transcripts*: these are the input transcripts that are present on the GTF file considered by Mikado. Normally this should be the output of Mikado prepare.
-    - *genome_fai*: FAIDX file of the genome FASTA. If not given, serialise will derive it from the "reference: genome" field of the configuration.
+    - *transcripts*: these are the input transcripts that are present on the GTF file considered by Mikado. Normally
+      this should be the output of Mikado prepare.
+    - *genome_fai*: FAIDX file of the genome FASTA. If not given, serialise will derive it from the "reference: genome"
+      field of the configuration.
     - *force*: flag. If set, and the database is already present, it will be truncated rather than updated.
     - **json-conf**: this is the configuration file created with :ref:`Mikado configure <configure>`.
-    - *db*: if the database is specified on the command line, ``mikado serialise`` will interpret it as a **SQLite** database. This will overwrite any setting present in the configuration file.
+    - *db*: if the database is specified on the command line, ``mikado serialise`` will interpret it as a **SQLite**
+      database. This will overwrite any setting present in the configuration file.
+
 * Parameters related to logging:
 
     - *log*: log file. It defaults to ``serialise.log``.
-    - *log_level*: verbosity of the logging. Please be advised that excessive verbosity can negatively impact the performance of the program - the debug mode is extremely verbose.
+    - *log_level*: verbosity of the logging. Please be advised that excessive verbosity can negatively impact the
+      performance of the program - the debug mode is extremely verbose.
 
 * Parameters related to reliable junctions:
 
@@ -133,37 +181,54 @@ Available parameters:
 * Parameters related to the treatment of :ref:`ORF data <serialise_input_orfs>`:
 
     - *orfs*: ORF BED12_ / GFF3_ files, separated by comma.
-    - *max-regression*: A percentage, expressed as a number between 0 and 1, which indicates how far can Mikado regress along the ORF to find a valid start codon. See the :ref:`relative section in the configuration <max-regression>` for details.
-    - *no-start-adjustment*: if selected, Mikado will not try to correct the start position in ORFs and will consider them as provided.
-    - *codon-table*: this parameter specifies the codon table to use for the project. Mikado by default uses the NCBI codon table 1 (standard with eukaryotes) with the modification that only ATG is considered as a valid start codon, as ORF predictions usually inflate the number of non-standard starts.
+    - *max-regression*: A percentage, expressed as a number between 0 and 1, which indicates how far can Mikado regress
+      along the ORF to find a valid start codon. See the :ref:`relative section in the configuration <max-regression>` for details.
+    - *no-start-adjustment*: if selected, Mikado will not try to correct the start position in ORFs and will consider
+      them as provided.
+    - *codon-table*: this parameter specifies the codon table to use for the project. Mikado by default uses the NCBI
+      codon table 1 (standard with eukaryotes) with the modification that only ATG is considered as a valid start codon,
+      as ORF predictions usually inflate the number of non-standard starts.
 
 * Parameters related to :ref:`BLAST data <serialise_input_blast>`:
 
     - *blast_targets*: BLAST FASTA database.
-    - *xml*: BLAST files to parse. This can be either a **comma-seprated** list of valid files, or a **comma-separated** list of folders containing valid files.
-    - *max-target-seqs*: maximum number of BLAST targets that can be loaded per sequence, for each BLAST alignment. Please note that if you align against multiple databases, this threshold will be applied once per file.
+    - *xml*: BLAST files to parse. This can be either a **comma-seprated** list of valid files, or a **comma-separated**
+      list of folders containing valid files. Please note that, notwithstanding the name of the flag, Mikado will also
+      accept valid TSV files here (see :ref:`above <serialise_input_blast>`).
+    - *max-target-seqs*: maximum number of BLAST targets that can be loaded per sequence, for each BLAST alignment.
+      Please note that if you align against multiple databases, this threshold will be applied once per file.
 
 * Parameters related to :ref:`additional scores <serialise_input_external>`:
     - *external-scores*: a tab-delimited file of additional scores for the transcripts; one row per transcript.
 
-.. hint:: Mikado will parallelise only the reading of multiple XML files. As such, this part of the pipeline is less performing than the other steps.
-
-.. warning:: It is advised to set this parameter to *spawn* even on UNIX. See :ref:`the dedicated sidebar for details <scheduler-multiprocessing>`.
+.. warning:: It is advised to set this parameter to *spawn* even on UNIX. See :ref:`the dedicated sidebar for details
+             <scheduler-multiprocessing>`.
 
 Usage::
 
     $ mikado serialise --help
     usage: Mikado serialise [-h] [--start-method {fork,spawn,forkserver}] [--orfs ORFS] [--transcripts TRANSCRIPTS] [-mr MAX_REGRESSION] [--codon-table CODON_TABLE] [-nsa] [--max-target-seqs MAX_TARGET_SEQS]
-                            [-bt BLAST_TARGETS] [--xml XML] [-p PROCS] [--single-thread] [--genome_fai GENOME_FAI] [--junctions JUNCTIONS] [--external-scores EXTERNAL_SCORES] [-mo MAX_OBJECTS] [-f]
-                            [--json-conf JSON_CONF] [-l [LOG]] [-od OUTPUT_DIR] [-lv {DEBUG,INFO,WARN,ERROR}] [--seed SEED]
-                            [db]
+                        [-bt BLAST_TARGETS] [--xml XML] [-p PROCS] [--single-thread] [--genome_fai GENOME_FAI] [--genome GENOME] [--junctions JUNCTIONS] [--external-scores EXTERNAL_SCORES] [-mo MAX_OBJECTS]
+                        [--no-force | --force] [--configuration CONFIGURATION] [-l [LOG]] [-od OUTPUT_DIR] [-lv {DEBUG,INFO,WARN,ERROR} | --verbose | --quiet | --blast-loading-debug]
+                        [--seed SEED | --random-seed]
+                        [db]
 
     optional arguments:
       -h, --help            show this help message and exit
       --start-method {fork,spawn,forkserver}
                             Multiprocessing start method.
+      --no-force            Flag. If set, do not drop the contents of an existing Mikado DB before beginning the serialisation.
+      --force               Flag. If set, an existing databse will be deleted (sqlite) or dropped (MySQL/PostGreSQL) before beginning the serialisation.
       -od OUTPUT_DIR, --output-dir OUTPUT_DIR
                             Output directory. Default: current working directory
+      -lv {DEBUG,INFO,WARN,ERROR}, --log-level {DEBUG,INFO,WARN,ERROR}
+                            Log level. Default: derived from the configuration; if absent, INFO
+      --verbose
+      --quiet
+      --blast-loading-debug
+                            Flag. If set, Mikado will switch on the debug mode for the XML/TSV loading.
+      --seed SEED           Random seed number. Default: 0.
+      --random-seed         Generate a new random seed number (instead of the default of 0)
 
       --orfs ORFS           ORF BED file(s), separated by commas
       --transcripts TRANSCRIPTS
@@ -190,6 +255,7 @@ Usage::
       --single-thread       Force serialise to run with a single thread, irrespective of other configuration options.
 
       --genome_fai GENOME_FAI
+      --genome GENOME
       --junctions JUNCTIONS
 
       --external-scores EXTERNAL_SCORES
@@ -197,21 +263,20 @@ Usage::
 
       -mo MAX_OBJECTS, --max-objects MAX_OBJECTS
                             Maximum number of objects to cache in memory before committing to the database. Default: 100,000 i.e. approximately 450MB RAM usage for Drosophila.
-      -f, --force           Flag. If set, an existing databse will be deleted (sqlite) or dropped (MySQL/PostGreSQL) before beginning the serialisation.
-      --json-conf JSON_CONF
+      --configuration CONFIGURATION, --json-conf CONFIGURATION
       -l [LOG], --log [LOG]
                             Optional log file. Default: stderr
-      -lv {DEBUG,INFO,WARN,ERROR}, --log-level {DEBUG,INFO,WARN,ERROR}
-                            Log level. Default: derived from the configuration; if absent, INFO
-      db                    Optional output database. Default: derived from json_conf
-      --seed SEED           Random seed number.
+      db                    Optional output database. Default: derived from configuration
 
 
 Technical details
 ~~~~~~~~~~~~~~~~~
 
-The schema of the database is quite simple, as it is composed only of 9 discrete tables in two groups. The first group, *chrom* and *junctions*, serialises the information pertaining to the reliable junctions - ie information which is not relative to the transcripts but rather to their genomic locations.
-The second group serialises the data regarding ORFs, BLAST files and external arbitrary data. The need of using a database is mainly driven by the latter, as querying a relational database is faster than retrieving the information from the XML files themselves at runtime.
+The schema of the database is quite simple, as it is composed only of 9 discrete tables in two groups. The first group,
+*chrom* and *junctions*, serialises the information pertaining to the reliable junctions - ie information which is not
+relative to the transcripts but rather to their genomic locations.
+The second group serialises the data regarding ORFs, BLAST files and external arbitrary data. The need of using a database
+is mainly driven by the need to avoid calculating all necessary information at runtime every time mikado pick is launched.
 
 .. database figure generated with `SchemaCrawler <https://github.com/schemacrawler/SchemaCrawler>`_, using the following command line:
     schemacrawler --command=schema --url=jdbc:sqlite:sample_data/daijin/5-mikado/mikado.db -o docs/Usage/database_schema.png --outputformat=png --info-level=maximum
diff --git a/docs/Usage/database_schema.png b/docs/Usage/database_schema.png
index 6e13d32f9..dfe4fd55b 100644
Binary files a/docs/Usage/database_schema.png and b/docs/Usage/database_schema.png differ
diff --git a/docs/_static/theme_overrides.css b/docs/_static/theme_overrides.css
index e904a4594..a50287f34 100644
--- a/docs/_static/theme_overrides.css
+++ b/docs/_static/theme_overrides.css
@@ -1,9 +1,8 @@
 /* override table width restrictions */
 
 .wy-nav-content {
-    /* padding: 1.618em 3.236em; */
     height: 100%;
-    max-width: none;
+    max-width: 80%; /* Tries to make the tables fit a bit nicer on the page */
     margin: auto;
 }
 
@@ -24,11 +23,17 @@ tt {
     font-size: 130%;
 }
 
-/*body {*/
-/*    font-family: {{ theme_bodyfont }};*/
-/*    font-size: 150%;*/
-/*    background-color: {{ theme_footerbgcolor }};*/
-/*    color: #000;*/
-/*    margin: 0;*/
-/*    padding: 0;*/
-/*}*/
+
+body {
+    color: #000;
+    margin: 0;
+    padding: 0;
+}
+
+div.body {
+    max-width: 80%;
+}
+
+div.injected {
+    display: none;
+}
diff --git a/docs/conf.py b/docs/conf.py
index a173d464f..dce887331 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -195,7 +195,7 @@
 html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] }
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
diff --git a/docs/index.rst b/docs/index.rst
index 75b6b9897..5c7093ee8 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -94,6 +94,8 @@ Availability and License
 
 Open source code available on github: `https://github.com/EI-CoreBioinformatics/mikado <https://github.com/EI-CoreBioinformatics/mikado>`_
 
+For Linux and OSX (the latter only since v2.2.3) we also provide installation through Conda: `https://anaconda.org/bioconda/mikado <https://anaconda.org/bioconda/mikado>`.
+
 Please report any issue you might encounter to the `EI-CoreBioinformatics issue tracker <https://github.com/EI-CoreBioinformatics/mikado/issues>`_.
 
 This documentation is hosted publicly on read the docs: `https://mikado.readthedocs.org/en/latest/ <https://mikado.readthedocs.org/en/latest/>`_
@@ -103,7 +105,7 @@ Mikado is available under `GNU LGLP V3 <http://www.gnu.org/licenses/lgpl.txt>`_.
 Acknowledgements
 ~~~~~~~~~~~~~~~~
 
-Mikado has greatly benefited from the public libraries, in particular [Cython]_, the [NetworkX]_ library, Scipy and Numpy ([Scipy]_, [Numpy]_), BioPython [BioPython]_, Intervaltree [PYinterval]_, and the BX library for a Cython implementation of interval trees [BXPython]_. Mikado has also been constantly optimised using Snakeviz [Snakeviz]_, a tool which proved invaluable during the development process.
+Mikado has greatly benefited from the public libraries, in particular [Cython]_, the [NetworkX]_ library, Scipy, Numpy and Pandas ([Scipy]_, [Numpy]_, [Pandas]_), BioPython [BioPython]_, Intervaltree [PYinterval]_, and the BX library for a Cython implementation of interval trees [BXPython]_. Moreover, Mikado makes liberal use of the PySAM [PySAM]_ library for analysing SAM/BAM files as well as for working with FASTA files. Mikado has also been constantly optimised using Snakeviz [Snakeviz]_, a tool which proved invaluable during the development process.
 
 
 Credits
@@ -112,6 +114,7 @@ Credits
  - Luca Venturini (The software architect and developer)
  - Shabhonam Caim (Primary tester and analytic consultancy)
  - Daniel Mapleson (Developer of PortCullis and of the Daijin pipeline)
+ - Luis Yanes (Software developer)
  - Gemy Kaithakottil (Tester and analytic consultancy)
  - David Swarbreck (Annotation guru and ideator of the pipeline)
 
@@ -130,5 +133,6 @@ Contents
    Tutorial/Adapting
    Usage/index
    Algorithms
+   Scoring_files
    References
    Library/modules