From 3d479629d015b3537c3fcf59df81a6db7a783fb9 Mon Sep 17 00:00:00 2001 From: Anja Thormann Date: Fri, 24 Mar 2017 13:54:44 +0000 Subject: [PATCH] fix bug with reported allele requirements, add sift and polyohen data --- G2P.pm | 55 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/G2P.pm b/G2P.pm index 33d7b49..dde7c5f 100644 --- a/G2P.pm +++ b/G2P.pm @@ -127,6 +127,16 @@ my $maf_key_2_population_name = { EA => 'ESP6500:European_American', }; +my $allelic_requirements = { + 'biallelic' => {maf => 0.005, rules => {HET => 2, HOM => 1}}, + 'monoallelic' => {maf => 0.0001, rules => {HET => 1, HOM => 0}}, + 'x-linked dominant' => {maf => 0.0001, rules => {HET => 1, HOM => 0}}, + 'monoallelic (X; hemizygous)' => {maf => 0.0001, rules => {HET => 1, HOM => 0}}, + 'x-linked over-dominance' => {maf => 0.0001, rules => {HET => 1, HOM => 0}}, +}; + +my @allelic_requirement_terms = keys %$allelic_requirements; + my @population_wide = qw(AA EA AFR AMR EAS EUR SAS ExAC_AFR ExAC_AMR ExAC_Adj ExAC_EAS ExAC_FIN ExAC_NFE ExAC_OTH ExAC_SAS); sub new { @@ -248,6 +258,8 @@ sub new { $self->{config}->{af_1kg} = 1; $self->{config}->{af_esp} = 1; $self->{config}->{af_exac} = 1; + $self->{config}->{sift} = 1; + $self->{config}->{polyphen} = 1; } # tell VEP we have a cache so stuff gets shared/merged between forks @@ -285,7 +297,9 @@ sub run { return {} unless $gene_data; my @ars = ($gene_data->{'allelic requirement'}) ? @{$gene_data->{'allelic requirement'}} : (); - return {} unless (@ars && ( grep {$_ =~ /^(mono|bi)allelic$/} @ars )); + my %seen; + @ars = grep { !$seen{$_}++ } @ars; + return {} unless (@ars && ( grep { exists($allelic_requirements->{$_}) } @ars)); # limit by type my @consequence_types = map { $_->SO_term } @{$tva->get_all_OverlapConsequences}; @@ -296,18 +310,24 @@ sub run { my $ar_passed = {}; my ($freqs, $existing_variant); foreach my $ar (@ars) { - if ($ar eq 'monoallelic') { - $threshold = $self->{user_params}->{maf_monoallelic}; - } else { - $threshold = $self->{user_params}->{maf_biallelic}; - } - ($freqs, $existing_variant) = @{$self->get_freq($tva)}; - foreach my $maf_key (keys %$freqs) { - return {} if ($freqs->{$maf_key} > $threshold); + my $passed = 1; + if (defined $allelic_requirements->{$ar}) { + $threshold = $allelic_requirements->{$ar}->{maf}; + ($freqs, $existing_variant) = @{$self->get_freq($tva)}; + foreach my $maf_key (keys %$freqs) { + if ($freqs->{$maf_key} > $threshold) { + $passed = 0; + last; + } + } + if ($passed) { + $ar_passed->{$ar} = 1; + } } - $ar_passed->{$ar} = 1; } + return {} if (!keys %$ar_passed); + my $vf = $tva->base_variation_feature; my $allele = $tva->variation_feature_seq; my $start = $vf->{start}; @@ -336,15 +356,22 @@ sub run { $existing_name = $existing_variant->{variation_name} || 'NA'; $novel = 'no'; } + + my $pph_score = (defined $tva->polyphen_score) ? $tva->polyphen_score : 'NA'; + my $pph_pred = (defined $tva->polyphen_prediction) ? $tva->polyphen_prediction : 'NA'; + my $sift_score = (defined $tva->sift_score) ? $tva->sift_score : 'NA'; + my $sift_pred = (defined $tva->sift_prediction) ? $tva->sift_prediction : 'NA'; if (scalar keys %$freqs > 0) { $frequencies = join(',', map {"$_=$freqs->{$_}"} keys %$freqs); } my $ar = join(',', sort keys %$ar_passed); + my $ar_in_g2pdb = join(',', sort @ars); my $g2p_data = { 'zyg' => $zyg, 'allele_requirement' => $ar, + 'ar_in_g2pdb' => $ar_in_g2pdb, 'frequencies' => $frequencies, 'consequence_types' => join(',', @consequence_types), 'refseq' => $refseq, @@ -355,6 +382,10 @@ sub run { 'hgvs_t' => $hgvs_t, 'hgvs_p' => $hgvs_p, 'vf_location' => "$seq_region_name:$start-$end $ref/$allele", + 'sift_score' => "$sift_score", + 'sift_prediction' => $sift_pred, + 'polyphen_score' => "$pph_score", + 'polyphen_prediction' => $pph_pred, }; my %return = ( @@ -371,7 +402,9 @@ sub run { delete $cache->{$vf_name} if exists($cache->{$vf_name}); # biallelic genes require >=1 hom or >=2 hets + my $gene_reqs = {}; + foreach my $ar (keys %$ar_passed) { if($ar eq 'biallelic') { # homozygous, report complete @@ -389,7 +422,7 @@ sub run { } } # monoallelic genes require only one allele - elsif($ar eq 'monoallelic') { + elsif($ar eq 'monoallelic' || $ar eq 'x-linked dominant' || $ar eq 'monoallelic (X; hemizygous)' || $ar eq 'x-linked over-dominance') { $return{G2P_complete} = 1; $gene_reqs->{MONO} = 1; }