Skip to content

Commit

Permalink
Merge pull request #785 from aparton/feature/varsynonyms
Browse files Browse the repository at this point in the history
Updates to JSON output
  • Loading branch information
sarahhunt committed Jun 26, 2020
2 parents 6e6f573 + 299ffb4 commit b824ecb
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 15 deletions.
3 changes: 2 additions & 1 deletion modules/Bio/EnsEMBL/VEP/Config.pm
Expand Up @@ -155,6 +155,7 @@ our @OPTION_SETS = (
af_gnomad => 1,
max_af => 1,
pubmed => 1,
var_synonyms => 1,
uniprot => 1,
mane => 1,
tsl => 1,
Expand Down Expand Up @@ -631,7 +632,7 @@ sub check_config {

# turn off some options if using --everything and --database
if($config->{everything} && $config->{database}) {
delete $config->{$_} for qw(af_1kg af_esp af_exac af_gnomad max_af pubmed);
delete $config->{$_} for qw(af_1kg af_esp af_exac af_gnomad max_af pubmed var_synonyms);
}

# check valid values for flags
Expand Down
13 changes: 13 additions & 0 deletions modules/Bio/EnsEMBL/VEP/OutputFactory/JSON.pm
Expand Up @@ -421,6 +421,19 @@ sub add_colocated_variant_info_JSON {
$ex->{$field} = [split(',', $ex->{$field})];
}

# update variation synonyms
if(defined($ex->{var_synonyms})){
my $var_syn_hash;
my @str = split /--/, $ex->{var_synonyms};
foreach my $source (@str){
my @spl = split /::/, $source;
my @output = split /,/, $spl[1];
$var_syn_hash->{$spl[0]} = \@output;
}

$ex->{var_synonyms} = $var_syn_hash;
}

push @{$hash->{colocated_variants}}, $ex;

return $hash;
Expand Down
47 changes: 47 additions & 0 deletions t/OutputFactory_JSON.t
Expand Up @@ -164,6 +164,53 @@ SKIP: {
'add_colocated_variant_info_JSON'
);

$ex->{var_synonyms} = 'ClinVar::RCV0001,RCV0002--OMIM::OMIM01';

is_deeply(
$of->add_colocated_variant_info_JSON({}, [$frequency_hash], $ex),
{
'colocated_variants' => [
{
'frequencies' => {
'T' => {
'amr' => '0.0014',
'gnomad_sas' => '0',
'gnomad' => '0.0003478',
'ea' => '0',
'gnomad_oth' => '0',
'gnomad_asj' => '0',
'gnomad_nfe' => '1.886e-05',
'aa' => '0.004998',
'gnomad_afr' => '0.004643',
'afr' => '0.003',
'gnomad_amr' => '0.0003236',
'gnomad_fin' => '0',
'sas' => '0',
'gnomad_eas' => '0',
'eur' => '0',
'eas' => '0'
}
},
'id' => 'rs142513484',
'minor_allele_freq' => '0.0010',
'minor_allele' => 'T',
'end' => 25585733,
'start' => 25585733,
'strand' => 1,
'allele_string' => 'C/T',
'pubmed' => [10, 20, 30],
'clin_sig' => ["pathogenic", "benign"],
'var_synonyms' => {
'ClinVar' => ['RCV0001','RCV0002'],
'OMIM' => ['OMIM01']
}
}
]
},
'add_colocated_variant_info_JSON - convert var_synonyms string into hash'
);


$ib = get_annotated_buffer({input_file => $test_cfg->{test_vcf}});
$of = Bio::EnsEMBL::VEP::OutputFactory::JSON->new({config => $ib->config});

Expand Down
2 changes: 1 addition & 1 deletion t/OutputFactory_Tab.t
Expand Up @@ -252,7 +252,7 @@ is(
"-\t-\t-\t-\trs142513484\tMODIFIER\t-\t-1\t-\tSNV\tMRPL39\tHGNC\tHGNC:14027\tprotein_coding\tYES\t-\t5\t-\t".
"CCDS33522.1\tENSP00000305682\tQ9NYK5\t-\tUPI00001AEAC0\t-\t-\t-\t11/11\t-\t-\t-\tENST00000307301.11:c.*18G>A\t".
"-\t-\t0.0010\t0.003\t0.0014\t0\t0\t0\t0.004998\t0\t0.0003478\t0.004643\t0.0003236\t0\t0\t0\t1.886e-05\t0\t0\t".
"0.004998\tAA\t-\t-\t-\t-\t-\t-\t-\t-\t-",
"0.004998\tAA\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-",
'get_all_lines_by_InputBuffer - everything'
);

Expand Down
14 changes: 1 addition & 13 deletions t/OutputFactory_VCF.t
Expand Up @@ -346,19 +346,7 @@ $of = Bio::EnsEMBL::VEP::OutputFactory::VCF->new({config => $ib->config});

is(
$lines[0],
"21\t25585733\trs142513484\tC\tT\t.\t.\t".
'CSQ=T|3_prime_UTR_variant|MODIFIER|MRPL39|ENSG00000154719|Transcript|ENST00000307301|protein_coding|'.
'11/11||ENST00000307301.11:c.*18G>A||1122|||||rs142513484||-1||SNV|HGNC|HGNC:14027|YES||5||CCDS33522.1|'.
'ENSP00000305682|Q9NYK5||UPI00001AEAC0|||||||0.0010|0.003|0.0014|0|0|0|0.004998|0|0.0003478|0.004643|'.
'0.0003236|0|0|0|1.886e-05|0|0|0.004998|AA|||||||||,'.
'T|missense_variant|MODERATE|MRPL39|ENSG00000154719|Transcript|ENST00000352957|protein_coding|'.
'10/10||ENST00000352957.8:c.991G>A|ENSP00000284967.6:p.Ala331Thr|1033|991|331|A/T|Gca/Aca|rs142513484|'.
'|-1||SNV|HGNC|HGNC:14027|||1|P1|CCDS13573.1|ENSP00000284967|Q9NYK5||UPI00001AEE66||tolerated_low_confidence(0.17)|'.
'benign(0.001)||||0.0010|0.003|0.0014|0|0|0|0.004998|0|0.0003478|0.004643|0.0003236|0|0|0|1.886e-05|0|0|0.004998|AA|||||||||,'.
'T|upstream_gene_variant|MODIFIER|AP000223.1|ENSG00000260583|Transcript|ENST00000567517|antisense||||||||||'.
'rs142513484|2407|-1||SNV|Clone_based_ensembl_gene||YES|||||||||||||||0.0010|0.003|0.0014|0|0|0|0.004998|0|'.
'0.0003478|0.004643|0.0003236|0|0|0|1.886e-05|0|0|0.004998|AA|||||||||'.
"\tGT\t0|0",
"21\t25585733\trs142513484\tC\tT\t.\t.\tCSQ=T|3_prime_UTR_variant|MODIFIER|MRPL39|ENSG00000154719|Transcript|ENST00000307301|protein_coding|11/11||ENST00000307301.11:c.*18G>A||1122|||||rs142513484||-1||SNV|HGNC|HGNC:14027|YES||5||CCDS33522.1|ENSP00000305682|Q9NYK5||UPI00001AEAC0|||||||0.0010|0.003|0.0014|0|0|0|0.004998|0|0.0003478|0.004643|0.0003236|0|0|0|1.886e-05|0|0|0.004998|AA||||||||||,T|missense_variant|MODERATE|MRPL39|ENSG00000154719|Transcript|ENST00000352957|protein_coding|10/10||ENST00000352957.8:c.991G>A|ENSP00000284967.6:p.Ala331Thr|1033|991|331|A/T|Gca/Aca|rs142513484||-1||SNV|HGNC|HGNC:14027|||1|P1|CCDS13573.1|ENSP00000284967|Q9NYK5||UPI00001AEE66||tolerated_low_confidence(0.17)|benign(0.001)||||0.0010|0.003|0.0014|0|0|0|0.004998|0|0.0003478|0.004643|0.0003236|0|0|0|1.886e-05|0|0|0.004998|AA||||||||||,T|upstream_gene_variant|MODIFIER|AP000223.1|ENSG00000260583|Transcript|ENST00000567517|antisense||||||||||rs142513484|2407|-1||SNV|Clone_based_ensembl_gene||YES|||||||||||||||0.0010|0.003|0.0014|0|0|0|0.004998|0|0.0003478|0.004643|0.0003236|0|0|0|1.886e-05|0|0|0.004998|AA||||||||||\tGT\t0|0",
'get_all_lines_by_InputBuffer - everything'
);

Expand Down

0 comments on commit b824ecb

Please sign in to comment.