Skip to content

Commit

Permalink
solved conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Luca Venturini authored and Luca Venturini committed Jan 29, 2016
2 parents 44c178a + c12b8c6 commit 6d47725
Show file tree
Hide file tree
Showing 13 changed files with 650 additions and 78 deletions.
11 changes: 10 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,13 @@ build/*
shanghai.egg-info/
dist/*
Mikado.egg-info/*
.idea/*
.idea/*sample_data/configuration2.yaml
.idea/codeStyleSettings.xml
.idea/dictionaries
.idea/encodings.xml
.idea/inspectionProfiles
.idea/Mikado.iml
.idea/misc.xml
.idea/modules.xml
.idea/vcs.xml
.idea/workspace.xml
6 changes: 3 additions & 3 deletions mikado_lib/configuration/configuration_blueprint.json
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,14 @@
"n",
"O",
"e",
"K",
"o",
"h"
"h",
"J",
"C"
]
},
"default": [
"j",
"n",
"O",
"h"
]
Expand Down
1 change: 1 addition & 0 deletions mikado_lib/scales/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ def calc_f1(recall, precision):
from . import accountant
# noinspection PyPep8
from . import assigner
from . import compare
90 changes: 63 additions & 27 deletions mikado_lib/scales/assigner.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def __check_for_fusions(self, prediction, matches):
self.logger.debug("Match for %s: %s",
match,
[_.id for _ in gene_matches])
gid_groups.append(tuple(gene.id for gene in gene_matches))
gid_groups.append(tuple(sorted([gene.id for gene in gene_matches])))
temp_results = []
# This will result in calculating the matches twice unfortunately

Expand Down Expand Up @@ -295,11 +295,12 @@ def __check_for_fusions(self, prediction, matches):

# Create the fusion best result
values = []
fused_group = tuple(_.ref_gene[0] for _ in best_fusion_results)
fused_group = tuple(sorted([_.ref_gene[0] for _ in best_fusion_results]))
self.logger.debug("Fused group: %s; GID_groups: %s",
fused_group,
gid_groups)

# TODO: this is a very stuypid way of checking, it must be improved
if fused_group not in gid_groups:
for key in ResultStorer.__slots__:
if key in ["gid", "tid", "distance"]:
Expand Down Expand Up @@ -454,6 +455,7 @@ def finish(self):
self.done, "s" if self.done > 1 else "")
self.refmap_printer()
self.stat_calculator.print_stats()
self.tmap_out.close()

def calc_compare(self, prediction: Transcript, reference: Transcript) -> ResultStorer:
"""Thin layer around the calc_compare class method.
Expand Down Expand Up @@ -490,7 +492,7 @@ def compare(cls, prediction: Transcript, reference: Transcript) -> (ResultStorer
Available ccodes (from Cufflinks documentation):
- = Complete intron chain match
- c Contained
- c Contained (perfect junction recall and precision, imperfect recall)
- j Potentially novel isoform (fragment): at least one splice junction is shared
with a reference transcript
- e Single exon transfrag overlapping a reference exon and at least
Expand All @@ -512,7 +514,11 @@ def compare(cls, prediction: Transcript, reference: Transcript) -> (ResultStorer
- _ Complete match, for monoexonic transcripts
(nucleotide F1>=95% - i.e. min(precision,recall)>=90.4%
- m Exon overlap between two monoexonic transcripts
- n Potentially novel isoform, where all the known junctions
- n Potential extension of the reference - we have added new splice junctions
*outside* the boundaries of the transcript itself
- C Contained transcript with overextensions on either side
(perfect junction recall, imperfect nucleotide specificity)
- J Potentially novel isoform, where all the known junctions
have been confirmed and we have added others as well *externally*
- I *multiexonic* transcript falling completely inside a known transcript
- h the transcript is multiexonic and extends a monoexonic reference transcript
Expand Down Expand Up @@ -600,36 +606,66 @@ def compare(cls, prediction: Transcript, reference: Transcript) -> (ResultStorer
if ccode is None:
if min(prediction.exon_num, reference.exon_num) > 1:
if junction_recall == 1 and junction_precision < 1:
new_splices = set.difference(set(prediction.splices),
set(reference.splices))

# Check if this is an extension
new_splices = set(prediction.splices) - set(reference.splices)
# If one of the new splices is internal to the intron chain, it's a j
if any(min(reference.splices) <
splice <
max(reference.splices) for splice in new_splices):
ccode = "j"
else:
# we have recovered all the junctions AND
# added some reference junctions of our own
ccode = "n"
elif junction_recall > 0 and 0 < junction_precision < 1:
if one_intron_confirmed is True:
ccode = "j"
if any(reference.start < splice < reference.end for splice in new_splices):
# if nucl_recall < 1:
ccode = "J"
else:
ccode = "n"
elif 0 < junction_recall < 1 and 0 < junction_precision < 1:
# if one_intron_confirmed is True:
ccode = "j"
# else:
# ccode = "o"
elif junction_precision == 1 and junction_recall < 1:
if nucl_precision == 1:
assert nucl_recall < 1
ccode = "c"
else:
ccode = "o"
elif junction_precision == 1:
ccode = "c"
if nucl_precision < 1:
for intron in reference.introns:
if intron in prediction.introns:
continue
if intron[1] < prediction.start:
continue
elif intron[0] > prediction.end:
continue
if prediction.start < intron[0] and intron[1] < prediction.end:
ccode = "j"
break
missed_introns = reference.introns - prediction.introns
start_in = any([True for intron in missed_introns if
(prediction.start < intron[0] and
intron[1] < min(prediction.splices))])
end_in = any([True for intron in missed_introns if
(prediction.end > intron[1] and
intron[0] > max(prediction.splices))])

if start_in or end_in:
ccode = "j"
else:
ccode = "C"

# end_in = any(prediction.end in intron for intron in missed_introns)
#
#
#
#
#
# ref_only_introns = sorted(list(reference.introns - prediction.introns))
# pred_introns = sorted(prediction.introns)
# min_pred_intron = pred_introns[0][0]
# max_pred_intron = pred_introns[-1][1]
# assert ref_only_introns != set(), (prediction, reference)
# left = sorted(
# [_[0] for _ in ref_only_introns if _[1] < min_pred_intron])
# if left:
# left = left[-1]
# right = sorted(
# [_[1] for _ in ref_only_introns if _[0] > max_pred_intron])
# if right:
# right = right[0]
# if ((left and left > prediction.start) or
# (right and right < prediction.end)):
# ccode = "j" # Retained intron
# else:
# ccode = "C"
elif junction_recall == 0 and junction_precision == 0:
if nucl_f1 > 0:
ccode = "o"
Expand Down
2 changes: 1 addition & 1 deletion mikado_lib/scales/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def parse_prediction(args, genes, positions, queue_logger):

# Finish everything, including printing refmap and stats
assigner_instance.finish()

args.prediction.close()

def compare(args):
"""
Expand Down
3 changes: 1 addition & 2 deletions mikado_lib/subprograms/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ def setup(args):
if args.output_dir is not None:
args.json_conf["prepare"]["output_dir"] = getattr(args,
"output_dir")

if not os.path.exists(args.json_conf["prepare"]["output_dir"]):
try:
os.makedirs(args.json_conf["prepare"]["output_dir"])
Expand Down Expand Up @@ -561,7 +560,7 @@ def positive(string):
parser.add_argument("--single", action="store_true", default=False,
help="Disable multi-threading. Useful for debugging.")
parser.add_argument("-od", "--output-dir", dest="output_dir",
type=str, default=None,
type=str, default=".",
help="Output directory. Default: current working directory")
parser.add_argument("-o", "--out", default=None,
help="Output file. Default: mikado_prepared.fasta.")
Expand Down
Loading

0 comments on commit 6d47725

Please sign in to comment.