Skip to content

Commit

Permalink
Merged issue-270, fix #270
Browse files Browse the repository at this point in the history
  • Loading branch information
lucventurini committed Oct 7, 2020
1 parent ca90031 commit 9b04246
Show file tree
Hide file tree
Showing 18 changed files with 145 additions and 132 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ python:
# - "3.5"
- "3.6"
- "3.7.3"
- "3.8"
# Setup anaconda, see https://gist.github.com/dan-blanchard/7045057
before_install:
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
Expand Down
4 changes: 2 additions & 2 deletions Mikado/configuration/configuration_blueprint.json
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@
"required": ["files", "strand_specific"],
"properties":
{
"keep_redundant": {
"exclude_redundant": {
"type": "boolean", "default": false
},
"minimum_cdna_length": {
Expand Down Expand Up @@ -290,7 +290,7 @@
"labels": {"type": "array", "default": []},
"strand_specific_assemblies": {"type": "array", "default": []},
"reference": {"type": "array", "default": []},
"keep_redundant": {"type": "array", "default": []},
"exclude_redundant": {"type": "array", "default": []},
"source_score":{
"type": "object",
"default": {},
Expand Down
3 changes: 2 additions & 1 deletion Mikado/loci/locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ def __launch_padding(self):
# The "templates" are the transcripts that we used to expand the others.
templates = self.pad_transcripts()
# First off, let us update the transcripts.
for tid in self.transcripts:
tid_keys = list(self.transcripts.keys())
for tid in tid_keys:
self.logger.debug("Swapping %s", tid)
self._swap_transcript(backup[tid], self.transcripts[tid])

Expand Down
18 changes: 9 additions & 9 deletions Mikado/parsers/bed12.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def get_tables(table, to_stop=False, gap=None):
if gap is not None:
forward_table[gap * 3] = "*"

if table.nucleotide_alphabet.letters is not None:
valid_letters = set(table.nucleotide_alphabet.letters.upper())
if table.nucleotide_alphabet is not None:
valid_letters = set(table.nucleotide_alphabet.upper())
else:
# Assume the worst case, ambiguous DNA or RNA:
valid_letters = backup_valid_letters
Expand Down Expand Up @@ -298,8 +298,8 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
self.score = 0
self.strand = None
self.rgb = ''
self.__block_sizes = np.zeros(1, dtype=np.int_)
self.__block_starts = np.zeros(1, dtype=np.int_)
self.__block_sizes = np.zeros(1, dtype=np.int64)
self.__block_starts = np.zeros(1, dtype=np.int64)
self.__block_count = 1
self.__invalid = None
self.invalid_reason = None
Expand Down Expand Up @@ -1167,14 +1167,14 @@ def block_sizes(self):
@block_sizes.setter
def block_sizes(self, sizes):
sizes = np.array(sizes)
if not issubclass(sizes.dtype.type, np.int_):
if not issubclass(sizes.dtype.type, np.int64):
raise TypeError("Block sizes should be integers!")
self.__block_sizes = sizes
del self.invalid

@block_sizes.deleter
def block_sizes(self):
self.__block_sizes = np.zeros(1, dtype=np.int_)
self.__block_sizes = np.zeros(1, dtype=np.int64)
del self.invalid

@property
Expand All @@ -1184,7 +1184,7 @@ def block_starts(self):
@block_starts.setter
def block_starts(self, starts):
starts = np.array(starts)
if not issubclass(starts.dtype.type, np.int_):
if not issubclass(starts.dtype.type, np.int64):
raise TypeError("Block sizes should be integers! Dtype: {}; array: {}".format(
starts.dtype, starts
))
Expand All @@ -1193,7 +1193,7 @@ def block_starts(self, starts):

@block_starts.deleter
def block_starts(self):
self.__block_starts = np.zeros(1, dtype=np.int_)
self.__block_starts = np.zeros(1, dtype=np.int64)
del self.invalid

@property
Expand Down Expand Up @@ -1364,7 +1364,7 @@ def to_transcriptomic(self, sequence=None, fasta_index=None, start_adjustment=Fa
bsizes = np.flip(self.block_sizes)
tStart, tEnd = self.block_sizes.sum() - tEnd, self.block_sizes.sum() - tStart

bstarts = np.concatenate([np.zeros(1, dtype=np.int_), bsizes[:-1].cumsum()])
bstarts = np.concatenate([np.zeros(1, dtype=np.int64), bsizes[:-1].cumsum()])
# bstarts = [0]
# for bs in bsizes[:-1]:
# bstarts.append(bs + bstarts[-1])
Expand Down
30 changes: 18 additions & 12 deletions Mikado/preparation/annotation_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def run(self):
while True:
results = self.submission_queue.get()
try:
label, handle, strand_specific, is_reference, keep_redundant, shelf_name = results
label, handle, strand_specific, is_reference, exclude_redundant, shelf_name = results
except ValueError as exc:
raise ValueError("{}.\tValues: {}".format(exc, ", ".join([str(_) for _ in results])))
if handle == "EXIT":
Expand All @@ -100,7 +100,7 @@ def run(self):
max_intron=self.max_intron,
strip_cds=self.__strip_cds,
is_reference=is_reference,
keep_redundant=keep_redundant,
exclude_redundant=exclude_redundant,
strand_specific=strand_specific)
elif gff_handle.__annot_type__ == "gtf":
new_ids = load_from_gtf(shelf_name,
Expand All @@ -112,7 +112,7 @@ def run(self):
max_intron=self.max_intron,
is_reference=is_reference,
strip_cds=self.__strip_cds,
keep_redundant=keep_redundant,
exclude_redundant=exclude_redundant,
strand_specific=strand_specific)
elif gff_handle.__annot_type__ == "bed12":
new_ids = load_from_bed12(shelf_name,
Expand All @@ -124,7 +124,7 @@ def run(self):
min_length=self.min_length,
max_intron=self.max_intron,
strip_cds=self.__strip_cds,
keep_redundant=keep_redundant,
exclude_redundant=exclude_redundant,
strand_specific=strand_specific)
else:
raise ValueError("Invalid file type: {}".format(gff_handle.name))
Expand Down Expand Up @@ -397,7 +397,7 @@ def load_from_gff(shelf_name,
min_length=0,
max_intron=3*10**5,
is_reference=False,
keep_redundant=False,
exclude_redundant=False,
strip_cds=False,
strand_specific=False):
"""
Expand All @@ -420,6 +420,8 @@ def load_from_gff(shelf_name,
:type strand_specific: bool
:param is_reference: boolean. If set to True, the transcript will always be retained.
:type is_reference: bool
:param exclude_redundant: boolean. If set to True, fully redundant transcripts will be removed.
:type exclude_redundant: bool
:return:
"""

Expand Down Expand Up @@ -475,7 +477,7 @@ def load_from_gff(shelf_name,

exon_lines[row.id]["strand_specific"] = strand_specific
exon_lines[row.id]["is_reference"] = is_reference
exon_lines[row.id]["keep_redundant"] = keep_redundant
exon_lines[row.id]["exclude_redundant"] = exclude_redundant
continue
elif row.is_exon is True:
if not row.is_cds or (row.is_cds is True and strip_cds is False):
Expand Down Expand Up @@ -520,7 +522,7 @@ def load_from_gff(shelf_name,
exon_lines[tid]["parent"] = transcript2genes[tid]
exon_lines[tid]["strand_specific"] = strand_specific
exon_lines[tid]["is_reference"] = is_reference
exon_lines[tid]["keep_redundant"] = keep_redundant
exon_lines[tid]["exclude_redundant"] = exclude_redundant
elif tid not in exon_lines and tid not in transcript2genes:
continue
else:
Expand Down Expand Up @@ -555,7 +557,7 @@ def load_from_gtf(shelf_name,
min_length=0,
max_intron=3*10**5,
is_reference=False,
keep_redundant=False,
exclude_redundant=False,
strip_cds=False,
strand_specific=False):
"""
Expand All @@ -578,6 +580,8 @@ def load_from_gtf(shelf_name,
:type strand_specific: bool
:param is_reference: boolean. If set to True, the transcript will always be retained.
:type is_reference: bool
:param exclude_redundant: boolean. If set to True, the transcript will be marked for potential redundancy removal.
:type exclude_redundant: bool
:return:
"""

Expand Down Expand Up @@ -618,7 +622,7 @@ def load_from_gtf(shelf_name,
exon_lines[row.transcript]["parent"] = "{}.gene".format(row.id)
exon_lines[row.transcript]["strand_specific"] = strand_specific
exon_lines[row.transcript]["is_reference"] = is_reference
exon_lines[row.transcript]["keep_redundant"] = keep_redundant
exon_lines[row.transcript]["exclude_redundant"] = exclude_redundant
if "exon_number" in exon_lines[row.transcript]["attributes"]:
del exon_lines[row.transcript]["attributes"]["exon_number"]
continue
Expand All @@ -645,7 +649,7 @@ def load_from_gtf(shelf_name,
exon_lines[row.transcript]["parent"] = "{}.gene".format(row.transcript)
exon_lines[row.transcript]["strand_specific"] = strand_specific
exon_lines[row.transcript]["is_reference"] = is_reference
exon_lines[row.transcript]["keep_redundant"] = keep_redundant
exon_lines[row.transcript]["exclude_redundant"] = exclude_redundant
else:
if row.transcript in to_ignore:
continue
Expand Down Expand Up @@ -677,7 +681,7 @@ def load_from_bed12(shelf_name,
min_length=0,
max_intron=3*10**5,
is_reference=False,
keep_redundant=False,
exclude_redundant=False,
strip_cds=False,
strand_specific=False):
"""
Expand All @@ -700,6 +704,8 @@ def load_from_bed12(shelf_name,
:type strand_specific: bool
:param is_reference: boolean. If set to True, the transcript will always be retained.
:type is_reference: bool
:param exclude_redundant: boolean. If set to True, the transcript will be marked for potential redundancy removal.
:type exclude_redundant: bool
:return:
"""

Expand Down Expand Up @@ -739,7 +745,7 @@ def load_from_bed12(shelf_name,
exon_lines[transcript.id]["parent"] = "{}.gene".format(transcript.id)
exon_lines[transcript.id]["strand_specific"] = strand_specific
exon_lines[transcript.id]["is_reference"] = is_reference
exon_lines[transcript.id]["keep_redundant"] = keep_redundant
exon_lines[transcript.id]["exclude_redundant"] = exclude_redundant
exon_lines[transcript.id]["features"]["exon"] = [
(exon[0], exon[1]) for exon in transcript.exons
]
Expand Down

0 comments on commit 9b04246

Please sign in to comment.