Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/Ensembl/ensembl-variation
Browse files Browse the repository at this point in the history
…into registry_and_divisions_support_release_90
  • Loading branch information
Thomas Maurel committed Aug 21, 2017
2 parents 9935d31 + 95add60 commit 8955df5
Show file tree
Hide file tree
Showing 56 changed files with 2,633 additions and 1,399 deletions.
73 changes: 48 additions & 25 deletions modules/Bio/EnsEMBL/Variation/DBSQL/AlleleAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -329,60 +329,82 @@ sub _fetch_all_by_Variation_from_Genotypes {

# If we got a population argument, make sure that it is a Population object
assert_ref($population,'Bio::EnsEMBL::Variation::Population') if (defined($population));
# fetch all genotypes
my $genotypes = $variation->get_all_SampleGenotypes();
return [] unless scalar @$genotypes;
# copy sample ID to save time later
$_->{_sample_id} ||= $_->sample->dbID for @$genotypes;
# also attempt to get hash of missing data, may have been added if retrieving data from VCF
my $missing = {};
my %sample_ids = ();
foreach my $gt(@$genotypes) {
$gt->{_sample_id} ||= $gt->sample->dbID;
$sample_ids{$gt->{_sample_id}} = 1;
$missing = $gt->{_missing_alleles} if defined($gt->{_missing_alleles});
}

$sample_ids{$_} = 1 for keys %$missing;

# get populations for samples
my (@pop_list, %pop_hash);
my (@pop_list, $pop_hash);
if(defined($population)) {
@pop_list = ($population);
my $pop_id = $population->dbID;
$pop_hash{$pop_id}{$_->dbID} = 1 for @{$population->get_all_Samples};
$pop_hash->{$pop_id}->{$_->dbID} = 1 for @{$population->get_all_Samples};
}
else {
my $pa = $self->db->get_PopulationAdaptor();
%pop_hash = %{$pa->_get_sample_population_hash([map {$_->{_sample_id}} @$genotypes])};
return [] unless %pop_hash;
@pop_list = @{$pa->fetch_all_by_dbID_list([keys %pop_hash])};
$pop_hash = $pa->_get_sample_population_hash([keys %sample_ids]);
return [] unless scalar keys %$pop_hash;
@pop_list = @{$pa->fetch_all_by_dbID_list([keys %$pop_hash])};
}
return [] unless @pop_list and %pop_hash;
return [] unless @pop_list and scalar keys %$pop_hash;
# organise the genotypes by subsnp
my %by_ss = ();
push @{$by_ss{$_->subsnp || ''}}, $_ for @$genotypes;

my @objs;


my $missing_by_pop = ();

foreach my $pop(@pop_list) {
next unless $pop->_freqs_from_gts;
my $pop_id = $pop->dbID;
foreach my $ss(keys %by_ss) {
my (%counts, $total, @freqs);
map {$counts{$_}++}
map {@{$_->{genotype}}}
grep {$pop_hash{$pop_id}{$_->{_sample_id}}}
grep {$pop_hash->{$pop_id}->{$_->{_sample_id}}}
@{$by_ss{$ss}};
next unless %counts;
my @alleles = keys %counts;
$total += $_ for values %counts;

# add missing
foreach my $sample_id(keys %{$pop_hash->{$pop_id}}) {
if(my $missing_by_sample = $missing->{$sample_id}) {
foreach my $allele(keys %$missing_by_sample) {
$missing_by_pop->{$pop_id}->{$allele} = 1;
$total += $missing_by_sample->{$allele};
}
}
}

next unless $total;
@freqs = map {defined($counts{$_}) ? ($counts{$_} / $total) : 0} @alleles;
for my $i(0..$#alleles) {
push @objs, Bio::EnsEMBL::Variation::Allele->new_fast({
allele => $alleles[$i],
Expand All @@ -392,13 +414,14 @@ sub _fetch_all_by_Variation_from_Genotypes {
variation => $variation,
adaptor => $self,
subsnp => $ss eq '' ? undef : $ss,
_missing_alleles => $missing_by_pop->{$pop_id} || {},
});
weaken($objs[-1]->{'variation'});
}
}
}
return \@objs;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1336,6 +1336,7 @@ sub store{
foreach my $attrib_type( keys %{$pf->{attribs}} ){
my $attrib_type_id = $aa->attrib_id_for_type_code($attrib_type);
throw("No attrib type ID found for attrib_type ", $attrib_type) unless defined $attrib_type_id;
$pf->{attribs}->{$attrib_type} =~ s/\s+$//;
$pfa_sth->execute( $pf->{dbID}, $attrib_type_id, $pf->{attribs}->{$attrib_type} );
}
$pfa_sth->finish;
Expand Down
46 changes: 29 additions & 17 deletions modules/Bio/EnsEMBL/Variation/DBSQL/PopulationGenotypeAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -330,54 +330,66 @@ sub _fetch_all_by_Variation_from_Genotypes {
my $genotypes = $variation->get_all_SampleGenotypes();

return [] unless scalar @$genotypes;
# copy sample ID to save time later
$_->{_sample_id} ||= $_->sample->dbID for @$genotypes;
# also attempt to get hash of missing data, may have been added if retrieving data from VCF
my $missing = {};
my %sample_ids = ();
foreach my $gt(@$genotypes) {
$gt->{_sample_id} ||= $gt->sample->dbID;
$sample_ids{$gt->{_sample_id}} = 1;
$missing = $gt->{_missing_alleles} if defined($gt->{_missing_alleles});
}

$sample_ids{$_} = 1 for keys %$missing;

# get populations for samples
my (@pop_list, %pop_hash);
my (@pop_list, $pop_hash);

if(defined($population)) {
@pop_list = ($population);
my $pop_id = $population->dbID;
$pop_hash{$pop_id}{$_->dbID} = 1 for @{$population->get_all_Samples};
$pop_hash->{$pop_id}->{$_->dbID} = 1 for @{$population->get_all_Samples};
}
else {
my $pa = $self->db->get_PopulationAdaptor();
%pop_hash = %{$pa->_get_sample_population_hash([map {$_->{_sample_id}} @$genotypes])};
return [] unless %pop_hash;
@pop_list = @{$pa->fetch_all_by_dbID_list([keys %pop_hash])};
$pop_hash = $pa->_get_sample_population_hash([keys %sample_ids]);
return [] unless scalar keys %$pop_hash;
@pop_list = @{$pa->fetch_all_by_dbID_list([keys %$pop_hash])};
}

return [] unless @pop_list and %pop_hash;
return [] unless @pop_list and scalar keys %$pop_hash;

# organise the genotypes by subsnp
my %by_ss = ();
push @{$by_ss{$_->subsnp || ''}}, $_ for @$genotypes;

my @objs;

foreach my $pop(@pop_list) {
next unless $pop->_freqs_from_gts;
my $pop_id = $pop->dbID;
foreach my $ss(keys %by_ss) {
my (%counts, $total, @freqs);
map {$counts{$_->genotype_string(1)}++}
grep {$pop_hash{$pop_id}{$_->{_sample_id}}}
grep {$pop_hash->{$pop_id}->{$_->{_sample_id}}}
@{$by_ss{$ss}};
next unless %counts;
my @alleles = keys %counts;
$total += $_ for values %counts;

# add missing
$total++ for grep {$missing->{$_}} keys %{$pop_hash->{$pop_id}};
next unless $total;
@freqs = map {defined($counts{$_}) ? ($counts{$_} / $total) : 0} @alleles;
for my $i(0..$#alleles) {
push @objs, Bio::EnsEMBL::Variation::PopulationGenotype->new_fast({
genotype => [split /\|/, $alleles[$i]],
Expand Down
132 changes: 99 additions & 33 deletions modules/Bio/EnsEMBL/Variation/DBSQL/VariationFeatureAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1725,42 +1725,57 @@ sub fetch_by_hgvs_notation {
#Get the Transcript object to convert coordinates
my $transcript_adaptor = $user_transcript_adaptor || $self->db()->dnadb()->get_TranscriptAdaptor();
my $transcript = $transcript_adaptor->fetch_by_stable_id($reference);

my @transcripts;

#try and fetch via gene
if(!defined($transcript)) {
my $gene_adaptor = $transcript_adaptor->db->get_GeneAdaptor();
my ($gene) = grep {($_->external_name || '') eq $reference} @{$gene_adaptor->fetch_all_by_external_name($reference)};

if($gene) {
$transcript = $gene->canonical_transcript();

if(!defined($transcript)) {
foreach my $tr(@{$gene->get_all_Transcripts}) {
$transcript = $tr if grep {$_->database eq 'CCDS'} @{$tr->get_all_DBEntries};
}
}
}
push @transcripts, @{$self->_get_gene_transcripts($transcript_adaptor, $reference, $multiple_ok)};
}
else {
push @transcripts, $transcript;
}

throw ("Could not get a Transcript object for '$reference'") if !defined($transcript);
throw ("Could not get a Transcript object for '$reference'") unless @transcripts;

my $is_exonic;
($start, $end, $strand, $is_exonic) = _parse_hgvs_transcript_position($description, $transcript) ;
my @results;
my %errors;

$slice = $slice_adaptor->fetch_by_region($transcript->coord_system_name(),$transcript->seq_region_name());
($ref_allele, $alt_allele) = get_hgvs_alleles( $hgvs);
foreach my $transcript(@transcripts) {
my ($is_exonic, $result);

my $result = $self->_hgvs_from_components($hgvs, $reference, $type, $description, $slice, $ref_allele, $alt_allele, $start, $end, $strand);
return $multiple_ok ? [$result] : $result;
eval {
($start, $end, $strand, $is_exonic) = _parse_hgvs_transcript_position($description, $transcript) ;

$slice = $slice_adaptor->fetch_by_region($transcript->coord_system_name(),$transcript->seq_region_name());
($ref_allele, $alt_allele) = get_hgvs_alleles($hgvs);

$result = $self->_hgvs_from_components(
$hgvs, $reference, $type, $description,
$slice, $ref_allele, $alt_allele, $start, $end, $strand
)
};

if($@) {
$errors{$@} = 1;
}
else {
push @results, $result;
}
}

throw(join("\n", keys %errors)) unless @results;

return $multiple_ok ? \@results : $results[0];
}

elsif($type =~ m/g|m/i) {
($start, $end) = _parse_hgvs_genomic_position($description) ;
($start, $end) = _parse_hgvs_genomic_position($description) ;

## grab reference allele; second call after "||" allows for LRG regions to be fetched
$slice = $slice_adaptor->fetch_by_region('chromosome', $reference ) || $slice_adaptor->fetch_by_region(undef, $reference);
$strand =1; ## strand should be genome strand for HGVS genomic notation
($ref_allele, $alt_allele) = get_hgvs_alleles( $hgvs);
($ref_allele, $alt_allele) = get_hgvs_alleles($hgvs);

my $result = $self->_hgvs_from_components($hgvs, $reference, $type, $description, $slice, $ref_allele, $alt_allele, $start, $end, $strand);
return $multiple_ok ? [$result] : $result;
Expand All @@ -1772,24 +1787,55 @@ sub fetch_by_hgvs_notation {
my $transcript_adaptor = $user_transcript_adaptor || $self->db()->dnadb()->get_TranscriptAdaptor();
my $transcript = $transcript_adaptor->fetch_by_translation_stable_id($reference);

my @transcripts;

#try and fetch via gene
if(!defined($transcript)) {
my $gene_adaptor = $transcript_adaptor->db->get_GeneAdaptor();
my ($gene) = @{$gene_adaptor->fetch_all_by_external_name($reference)};

$transcript = $self->_pick_likely_transcript($gene->get_all_Transcripts) if $gene;

print STDERR "Picked transcript ".$transcript->stable_id."\n" if $DEBUG==1;
push @transcripts, @{$self->_get_gene_transcripts($transcript_adaptor, $reference, $multiple_ok)};
}
else {
push @transcripts, $transcript;
}

throw ("Could not get a Transcript object for '$reference'") if !defined($transcript);

my $possible_prot = _parse_hgvs_protein_position($description, $reference, $transcript);
throw("Could not uniquely determine nucleotide change from $hgvs") if scalar @$possible_prot > 1 && !$multiple_ok;
throw ("Could not get a Transcript object for '$reference'") unless @transcripts;

my @results;
my %errors;

foreach my $transcript(@transcripts) {

my $possible_prot;
eval {
$possible_prot = _parse_hgvs_protein_position($description, $reference, $transcript);
throw("Could not uniquely determine nucleotide change from $hgvs") if scalar @$possible_prot > 1 && !$multiple_ok;
$slice = $slice_adaptor->fetch_by_region($transcript->coord_system_name(), $transcript->seq_region_name());
};
if($@) {
$errors{$@} = 1;
next;
}

foreach my $poss(@$possible_prot) {

$slice = $slice_adaptor->fetch_by_region($transcript->coord_system_name(),$transcript->seq_region_name());
my $result;

eval {
$result = $self->_hgvs_from_components(
$hgvs, $reference, $type, $description, $slice,
@$poss
)
};

if($@) {
$errors{$@} = 1;
}
else {
push @results, $result;
}
}
}

my @results = map {$self->_hgvs_from_components($hgvs, $reference, $type, $description, $slice, @$_)} @$possible_prot;
throw(join("\n", keys %errors)) unless @results;

return $multiple_ok ? \@results : $results[0];
}
Expand All @@ -1799,6 +1845,26 @@ sub fetch_by_hgvs_notation {
}
}

sub _get_gene_transcripts {
my ($self, $transcript_adaptor, $reference, $multiple_ok) = @_;

my $gene_adaptor = $transcript_adaptor->db->get_GeneAdaptor();
my ($gene) = grep {($_->external_name || '') eq $reference} @{$gene_adaptor->fetch_all_by_external_name($reference)};

my @transcripts;

if($gene) {
if($multiple_ok) {
push @transcripts, @{$gene->get_all_Transcripts};
}
else {
push @transcripts, $self->_pick_likely_transcript($gene->get_all_Transcripts);
print STDERR "Picked transcript ".$transcripts[0]->stable_id."\n" if $DEBUG==1;
}
}

return \@transcripts;
}


=head2 fetch_all_possible_by_hgvs_notation
Expand Down
Loading

0 comments on commit 8955df5

Please sign in to comment.