Skip to content

Commit

Permalink
expand fall through to next most likely variant if network prediction
Browse files Browse the repository at this point in the history
has insufficient read coverage
  • Loading branch information
ftostevin-ont committed Sep 23, 2021
1 parent 0e20eaa commit 09a7d18
Showing 1 changed file with 37 additions and 22 deletions.
59 changes: 37 additions & 22 deletions clair3/CallVariants.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,32 +723,37 @@ def output_from(

if is_homo_SNP:
reference_base = reference_sequence[tensor_position_center]
idx = homo_SNP_probabilities.index(maximum_probability)
base1, base2 = homo_SNP_bases_from(homo_SNP_probabilities)
alternate_base = base1 if base1 != reference_base else base2
sorted_alt_bases, alternate_base = find_alt_base(alt_info_dict, alternate_base)
if alternate_base is None or alternate_base == reference_base:
homo_SNP_probabilities[idx] = 0
continue

elif is_hetero_SNP:
base1, base2 = hetero_SNP_bases_from(hetero_SNP_probabilities)
idx = hetero_SNP_probabilities.index(maximum_probability)
reference_base = reference_sequence[tensor_position_center]
is_multi = base1 != reference_base and base2 != reference_base
if is_multi:
sorted_alt_bases, _ = find_alt_base(alt_info_dict)
if len(sorted_alt_bases) == 0:
break
if len(sorted_alt_bases) < 2:
alternate_base = sorted_alt_bases[0]
hetero_SNP_probabilities[np.argmax(hetero_SNP_probabilities)] = 0.0
break
hetero_SNP_probabilities[idx] = 0
continue
alternate_base = ','.join(sorted_alt_bases[:2])
else:
alternate_base = base1 if base1 != reference_base else base2
sorted_alt_bases, alternate_base = find_alt_base(alt_info_dict, alternate_base)
if alternate_base is None or alternate_base == reference_base:
hetero_SNP_probabilities[idx] = 0
continue


elif is_homo_insertion:
variant_length = None
idx = homo_Ins_probabilities.index(maximum_probability)
if add_indel_length:
idx = homo_Ins_probabilities.index(maximum_probability)
variant_length = homo_Ins_lengths[idx]
insertion_bases = insertion_bases_using_alt_info_from(
alt_info_dict=alt_info_dict,
Expand All @@ -757,7 +762,8 @@ def output_from(

insertion_length = len(insertion_bases)
if insertion_length == 0:
break
homo_Ins_probabilities[idx] = 0
continue
reference_base = reference_sequence[tensor_position_center]
alternate_base = insertion_bases

Expand All @@ -776,25 +782,27 @@ def output_from(
)
insertion_length = len(insertion_bases)
if insertion_length == 0:
break
hetero_ACGT_Ins_probabilities[idx] = 0
continue
reference_base = reference_sequence[tensor_position_center]
alternate_base = insertion_bases

is_SNP_Ins_multi = hetero_Ins_base != reference_base
if is_SNP_Ins_multi:
sorted_alt_bases, _ = find_alt_base(alt_info_dict)
if len(sorted_alt_bases) == 0:
break
hetero_ACGT_Ins_probabilities[idx] = 0
continue
else:
alternate_base = "{},{}".format(sorted_alt_bases[0], alternate_base)

elif is_hetero_InsIns:
insertion_bases_list = []
idx = hetero_InsIns_probabilities.index(maximum_probability)
if add_indel_length:
idx = hetero_InsIns_probabilities.index(maximum_probability)
variant_length_1, variant_length_2 = hetero_InsIns_length_tuples[idx]
del hetero_InsIns_probabilities[idx]
del hetero_InsIns_length_tuples[idx]
# del hetero_InsIns_probabilities[idx]
# del hetero_InsIns_length_tuples[idx]

insertion_bases1 = insertion_bases_using_alt_info_from(
alt_info_dict=alt_info_dict,
Expand All @@ -819,7 +827,8 @@ def output_from(
return_multi=True
)
if len(insertion_bases_list) < 2:
break
hetero_InsIns_probabilities[idx] = 0
continue
insertion_bases, another_insertion_bases = insertion_bases_list

reference_base = reference_sequence[tensor_position_center]
Expand All @@ -830,12 +839,13 @@ def output_from(
if alternate_base_1 != alternate_base_2:
alternate_base = "{},{}".format(alternate_base_1, alternate_base_2)
else:
reference_base, alternate_base = None, None
hetero_InsIns_probabilities[idx] = 0
continue

elif is_homo_deletion:
variant_length = None
idx = homo_Del_probabilities.index(maximum_probability)
if add_indel_length:
idx = homo_Del_probabilities.index(maximum_probability)
variant_length = homo_Del_lengths[idx]

deletion_bases = deletion_bases_using_alt_info_from(
Expand All @@ -844,7 +854,8 @@ def output_from(
)
deletion_length = len(deletion_bases)
if deletion_length == 0:
break
homo_Del_probabilities[idx] = 0
continue
reference_base = reference_sequence[tensor_position_center] + deletion_bases
alternate_base = reference_base[0]

Expand All @@ -862,7 +873,8 @@ def output_from(
)
deletion_length = len(deletion_bases)
if deletion_length == 0:
break
hetero_ACGT_Del_probabilities[idx] = 0
continue
reference_base = reference_sequence[tensor_position_center] + deletion_bases
alternate_base = reference_base[0]

Expand All @@ -874,8 +886,8 @@ def output_from(

elif is_hetero_DelDel:
deletion_bases_list = []
idx = hetero_DelDel_probabilities.index(maximum_probability)
if add_indel_length:
idx = hetero_DelDel_probabilities.index(maximum_probability)
variant_length_1, variant_length_2 = sorted(hetero_DelDel_length_tuples[idx],
reverse=True) # longer deletion should be in first position
deletion_base1 = deletion_bases_using_alt_info_from(
Expand Down Expand Up @@ -903,7 +915,8 @@ def output_from(
)

if len(deletion_bases_list) < 2:
break
hetero_DelDel_probabilities[idx] = 0
continue

deletion_bases, deletion_bases1 = deletion_bases_list

Expand All @@ -918,12 +931,13 @@ def output_from(
):
alternate_base = "{},{}".format(alternate_base_1, alternate_base_2)
else:
reference_base, alternate_base = None, None
hetero_DelDel_probabilities[idx] = 0
continue

elif is_insertion_and_deletion:
variant_length_1, variant_length_2 = None, None
idx = hetero_InsDel_probabilities.index(maximum_probability)
if add_indel_length:
idx = hetero_InsDel_probabilities.index(maximum_probability)
variant_length_1, variant_length_2 = hetero_InsDel_length_tuples[idx]

insertion_bases = insertion_bases_using_alt_info_from(
Expand All @@ -939,7 +953,8 @@ def output_from(
deletion_length = len(deletion_bases)

if insertion_length == 0 or deletion_length == 0:
break
hetero_InsDel_probabilities[idx] = 0
continue
reference_base = reference_sequence[tensor_position_center] + deletion_bases
alternate_base = "{},{}".format(
reference_base[0],
Expand Down

0 comments on commit 09a7d18

Please sign in to comment.