From a81e32631722164773ecf4fb6dc4223f47468bac Mon Sep 17 00:00:00 2001 From: dglemos Date: Wed, 1 Mar 2023 17:06:17 +0000 Subject: [PATCH 1/5] Fix hgvsp multi-exon --- .../DBSQL/VariationFeatureAdaptor.pm | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm b/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm index bd1afa092..2930acf9c 100755 --- a/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm @@ -2354,10 +2354,37 @@ sub get_reference{ my @coords = defined($pos2) ? $tr_mapper->pep2genomic($pos, $pos2) : $tr_mapper->pep2genomic($pos, $pos); - my $start = $coords[0]->start(); - my $end = $coords[0]->end(); + my $start; + my $end; my $strand = $coords[0]->strand(); + # Assign start and end when we have two coordinates + # This is the most common alternative + if(scalar(@coords) == 2){ + if(($coords[0]->end() - $coords[0]->start() + 1) % 2 == 0) { + if($strand == 1) { + $start = $coords[0]->start(); + $end = $start + 2; + } + else { + $end = $coords[0]->end(); + $start = $end - 2; + } + } + elsif($strand == 1) { + $end = $coords[1]->end(); + $start = $end - 2; + } + else { + $start = $coords[0]->start(); + $end = $start + 2; + } + } + else{ + $start = $coords[0]->start(); + $end = $coords[0]->end(); + } + my $seq_length = $type_del == 1 ? ($end-$start) + 1 : 3; ## find reference sequence From 9cfe972bac4ca77ae233612f810a0102d188c282 Mon Sep 17 00:00:00 2001 From: dglemos Date: Thu, 2 Mar 2023 12:16:47 +0000 Subject: [PATCH 2/5] Add hgvsp multi-exon test on the foward strand --- modules/t/variationFeatureAdaptor.t | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/t/variationFeatureAdaptor.t b/modules/t/variationFeatureAdaptor.t index 01b2fe367..13cfe291d 100755 --- a/modules/t/variationFeatureAdaptor.t +++ b/modules/t/variationFeatureAdaptor.t @@ -444,6 +444,10 @@ $hgvs_str = 'Q00872:p.Ala53Val'; ok($vfa->fetch_by_hgvs_notation($hgvs_str)->allele_string eq 'C/T', 'HGVSp notation using UniProt ID'); ok($vfa->fetch_by_hgvs_notation('ENST00000470094:c.55_111del')->end eq 32954180, 'HGVSc multi-exon deletion'); +# test HGVS protein when codon is within two exons +ok($vfa->fetch_by_hgvs_notation('ENSP00000422007.1:p.Gly469Glu')->start eq 66326707, 'HGVSp multi-exon'); + + print "\n# Test - fetch_by_spdi_notation\n"; my $spdi_str = 'NC_000013.10:32954017::'; throws_ok {$vfa->fetch_by_spdi_notation($spdi_str); } qr/Could not parse the SPDI notation $spdi_str/, 'Throw on invalid SPDI notation.'; From b7f3d39cdd8d99d52a69f06d3d8e0a8314de9408 Mon Sep 17 00:00:00 2001 From: dglemos Date: Thu, 2 Mar 2023 13:55:03 +0000 Subject: [PATCH 3/5] Add HGVSp multi-exon test on the reverse strand --- modules/t/variationFeatureAdaptor.t | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/t/variationFeatureAdaptor.t b/modules/t/variationFeatureAdaptor.t index 13cfe291d..77e217b7e 100755 --- a/modules/t/variationFeatureAdaptor.t +++ b/modules/t/variationFeatureAdaptor.t @@ -361,6 +361,7 @@ ok($vfs16a->[0]->variation_name() eq $vf_somatic_name, "somatic vf with phenotyp # test fetching VF with empty consequence type column is($vfa->fetch_by_dbID(997738282)->display_consequence, 'sequence_variant', 'empty consequence column'); +ok(scalar @{$vfa->fetch_all_by_location_identifier('18:40228819:A_G')} == 1, "fetch_all_by_location_identifier '18:40228819:A_G'"); # test fetch Iterator print "\n# Test - fetch_Iterator\n"; @@ -445,7 +446,10 @@ ok($vfa->fetch_by_hgvs_notation($hgvs_str)->allele_string eq 'C/T', 'HGVSp notat ok($vfa->fetch_by_hgvs_notation('ENST00000470094:c.55_111del')->end eq 32954180, 'HGVSc multi-exon deletion'); # test HGVS protein when codon is within two exons -ok($vfa->fetch_by_hgvs_notation('ENSP00000422007.1:p.Gly469Glu')->start eq 66326707, 'HGVSp multi-exon'); +# forward strand +ok($vfa->fetch_by_hgvs_notation('ENSP00000422007.1:p.Gly469Glu')->start eq 66326707, 'HGVSp multi-exon (forward)'); +# reverse strand +ok($vfa->fetch_by_hgvs_notation('ENSP00000293261:p.Arg232Met')->start eq 48846578, 'HGVSp multi-exon (reverse)'); print "\n# Test - fetch_by_spdi_notation\n"; From de90e31796528db50208506586d0b2f116d84c96 Mon Sep 17 00:00:00 2001 From: dglemos Date: Thu, 2 Mar 2023 15:52:50 +0000 Subject: [PATCH 4/5] Fix start and end --- .../Variation/DBSQL/VariationFeatureAdaptor.pm | 11 ++++++++++- modules/t/variationFeatureAdaptor.t | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm b/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm index 2930acf9c..ce40db90e 100755 --- a/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm @@ -2356,6 +2356,8 @@ sub get_reference{ my $start; my $end; + my $real_start; + my $real_end; my $strand = $coords[0]->strand(); # Assign start and end when we have two coordinates @@ -2365,19 +2367,23 @@ sub get_reference{ if($strand == 1) { $start = $coords[0]->start(); $end = $start + 2; + $real_end = $coords[1]->end(); } else { $end = $coords[0]->end(); $start = $end - 2; + $real_start = $coords[1]->start(); } } elsif($strand == 1) { $end = $coords[1]->end(); $start = $end - 2; + $real_start = $coords[0]->start(); } else { $start = $coords[0]->start(); $end = $start + 2; + $real_end = $coords[1]->end(); } } else{ @@ -2401,8 +2407,11 @@ sub get_reference{ my $from_codon_ref = $from_slice->seq(); + $start = $real_start ? $real_start : $start; + $end = $real_end ? $real_end : $end; + ## correct for strand - reverse_comp(\$from_codon_ref) if $strand <0; + reverse_comp(\$from_codon_ref) if $strand <0; return ($from_codon_ref, $start, $end, $strand); } diff --git a/modules/t/variationFeatureAdaptor.t b/modules/t/variationFeatureAdaptor.t index 77e217b7e..910c84de1 100755 --- a/modules/t/variationFeatureAdaptor.t +++ b/modules/t/variationFeatureAdaptor.t @@ -447,7 +447,7 @@ ok($vfa->fetch_by_hgvs_notation('ENST00000470094:c.55_111del')->end eq 32954180, # test HGVS protein when codon is within two exons # forward strand -ok($vfa->fetch_by_hgvs_notation('ENSP00000422007.1:p.Gly469Glu')->start eq 66326707, 'HGVSp multi-exon (forward)'); +ok($vfa->fetch_by_hgvs_notation('ENSP00000422007.1:p.Gly469Glu')->start eq 66325646, 'HGVSp multi-exon (forward)'); # reverse strand ok($vfa->fetch_by_hgvs_notation('ENSP00000293261:p.Arg232Met')->start eq 48846578, 'HGVSp multi-exon (reverse)'); From c99e59067db91c13d082e11eef10183079c6ad5f Mon Sep 17 00:00:00 2001 From: dglemos Date: Thu, 6 Apr 2023 11:11:46 +0100 Subject: [PATCH 5/5] Add comment --- modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm b/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm index ce40db90e..daf4caa2c 100755 --- a/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm @@ -2352,6 +2352,9 @@ sub get_reference{ my $tr_mapper = $transcript->get_TranscriptMapper(); + # If the codon overlaps an exon-intron boundary @coords can return two coordinates: + # one for the first exon where the codon starts + # a second coordinate for the exon where the codon ends my @coords = defined($pos2) ? $tr_mapper->pep2genomic($pos, $pos2) : $tr_mapper->pep2genomic($pos, $pos); my $start;