Skip to content

Commit

Permalink
new evidence types
Browse files Browse the repository at this point in the history
  • Loading branch information
sarahhunt committed Feb 15, 2016
1 parent bbbd341 commit f786c45
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use strict;
use warnings;

use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);
use Bio::EnsEMBL::Variation::Utils::QCUtils qw( count_rows count_for_statement);
use Bio::EnsEMBL::Variation::Utils::QCUtils qw( count_rows count_for_statement get_evidence_attribs);

my $DEBUG = 0;

Expand Down Expand Up @@ -89,6 +89,7 @@ sub run {
my $complimented_desc = $self->check_complimented_desc();
my $bad_position = $self->check_bad_position();

my $attribs_loaded = $self->check_attribs();

print $report "Post-import preQC check
Expand All @@ -110,6 +111,9 @@ VariationFeature without seqregion: $varfeat_no_seqreg
VariationFeature where end+1<start: $bad_position
\n";

print $report "\nERROR: missing evidence attribs\n\n" if $attribs_loaded == 0;


print $report "ERROR: $complimented_desc complimented descriptions found - to be fixed manually\n\n" if $complimented_desc >0;

if($var_no_ss_allele > 0 ||
Expand All @@ -120,7 +124,8 @@ VariationFeature where end+1<start: $bad_position
$geno_no_sample >0 ||
$varfeat_no_pos >0 ||
$varfeat_no_seqreg >0 ||
$geno_no_subsnp >0
$geno_no_subsnp >0 ||
$attribs_loaded == 0
){

print $report "Exiting - missing data to import\n";
Expand Down Expand Up @@ -348,4 +353,25 @@ sub check_bad_position{
return count_for_statement($var_dba , $data_ext_stat);
}

=head2 check_attribs
Check the expected evidence attribs are available.
Check early to avoid partial jobs - not future-proof but will catch some problems
=cut
sub check_attribs{

my $self = shift;

my $var_dba = $self->get_species_adaptor('variation');
my $attribs = get_evidence_attribs($var_dba);

my $found_everything = 1;
foreach my $ev ( "1000Genomes", "Cited", "ESP", "ExAC", "Frequency", "HapMap", "Multiple_observations","1000Bull_Genomes", "WTSI_MGP"){
$found_everything = 0 unless defined $attribs->{$ev};
}
return $found_everything;
}

1;
31 changes: 25 additions & 6 deletions modules/Bio/EnsEMBL/Variation/Utils/QCUtils.pm
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ sub summarise_evidence{


## summarise ss information
my $ss_variations = get_ss_variations($var_dbh, $first, $last);
my $ss_variations = get_ss_variations($var_dbh, $first, $last, $species);


## extract list of variants with pubmed citations
Expand All @@ -291,7 +291,8 @@ sub summarise_evidence{

## dbSNP ss submissions
push @{$evidence{$var}}, $evidence_ids->{Multiple_observations}
if defined $ss_variations->{$var}->{count} && $ss_variations->{$var}->{count} > 1;
if defined $ss_variations->{$var}->{count} && $ss_variations->{$var}->{count} > 1
&& $species !~/Homo|Human/i ;

push @{$evidence{$var}}, $evidence_ids->{Frequency}
if defined $ss_variations->{$var}->{'freq'};
Expand All @@ -311,7 +312,16 @@ sub summarise_evidence{
push @{$evidence{$var}}, $evidence_ids->{'1000Genomes'}
if (defined $kg_variations->{$var} || defined $ss_variations->{$var}->{'KG'}) ;

## pubmed citations
## additional cow evidence
push @{$evidence{$var}}, $evidence_ids->{'1000Bull_Genomes'}
if (defined $kg_variations->{$var} || defined $ss_variations->{$var}->{'1000_BULL_GENOMES'}) ;

## additional mouse evidence
push @{$evidence{$var}}, $evidence_ids->{'WTSI_MGP'}
if (defined $kg_variations->{$var} || defined $ss_variations->{$var}->{'SC_MOUSE_GENOMES'}) ;


## pubmed citations - multi species
push @{$evidence{$var}}, $evidence_ids->{Cited}
if defined $pubmed_variations->{$var};

Expand Down Expand Up @@ -363,7 +373,8 @@ sub get_ss_variations{
my $var_dbh = shift;
my $first = shift;
my $last = shift;

my $species = shift;

my %evidence;

my $obs_var_ext_sth = $var_dbh->prepare(qq[ select al.variation_id,
Expand All @@ -389,12 +400,20 @@ sub get_ss_variations{

$l->[2] = "N" unless defined $l->[2];

## human specific
$evidence{$l->[0]}{'KG'} = 1 if $l->[1] =~/1000GENOMES/;
$evidence{$l->[0]}{'freq'} = 1 if $l->[1] =~/1000GENOMES/;

#save submitter handle, population and ss id to try to discern independent submissions
push @{$save_by_var{$l->[0]}}, [ $l->[1], $l->[2], $l->[5] ];
## cow specific
$evidence{$l->[0]}{'1000_BULL_GENOMES'} = 1 if $l->[1] =~/1000_BULL_GENOMES/;
## mouse specific
$evidence{$l->[0]}{'SC_MOUSE_GENOMES'} = 1 if $l->[1] =~/SC_MOUSE_GENOMES/;

#save submitter handle, population and ss id to try to discern independent submissions
## no longer useful for Human 2016/01
unless ($species =~/Homo|Human/i){
push @{$save_by_var{$l->[0]}}, [ $l->[1], $l->[2], $l->[5] ];
}

## Save frequency evidence for variant by variant id - ensure at least 2 chromosomes assayed and variant poly
## only assign frequency status if population has more than 1 member.
Expand Down

0 comments on commit f786c45

Please sign in to comment.