Permalink
Browse files

big refactor of gbsyn database code into a cross-database system, Bio…

…::DB::Synteny::Store, similar in structure to Bio::DB::SeqFeature::Store
  • Loading branch information...
1 parent 7f18438 commit ab058720e0ae9ea086db70ae39c396864f09f06d @rbuels rbuels committed Aug 15, 2011

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -1,210 +1,55 @@
package Bio::DB::Synteny::Store;
use strict;
-use DBI;
-use Bio::DB::GFF::Util::Binning qw(bin bin_bot bin_top);
+use Carp;
+
+use base 'Bio::Root::Root';
+
use Bio::DB::GFF::Util::Rearrange qw(rearrange);
use Bio::DB::Synteny::Block;
-use constant MINBIN => 1000;
-use constant MAXBIN => 1_000_000_000;
-use constant EPSILON => 1e-7; # set to zero if you trust mysql's floating point comparisons
-use constant POSRANGE => 200;
-
sub new {
- my $class = shift;
- my $dsn = shift;
- my $dbh;
-
- if (ref($dsn) && $dsn->isa('DBI::db')) {
- $dbh = $dsn;
- } else {
- $dbh = DBI->connect($dsn,@_) or die "$dsn: Can't open; ",DBI->errstr;
- }
-
- my $self = {dbh=>$dbh};
- $self->{nomap} = 1 unless _has_map($dbh);
- return bless $self, $class;
-}
-
-sub dbh { shift->{dbh} }
-
-sub nomap { shift->{nomap} }
-
-# a method to get the nearest residue position match
-# (for truncating hits and gridlines). Return the nearest mapped
-# source residue and the corresponding target residue.
-sub get_nearest_position_match {
- my $self = shift;
- my ($hit,$src,$pos,$range) = @_;
-
- my @hits = ref $hit && $hit->parts > 1 ? @{$hit->parts} : ($hit);
- $range ||= POSRANGE;
-
- for my $h (@hits) {
- my $min = $pos - int($range/2);
- my $max = $pos + int($range/2);
- my $sth = $self->position_handle;
- my $hname = ref $h ? $h->name : $h;
- $hname =~ s/r|\.\d+//g;
- $sth->execute($hname,$src,$min,$max) or die $self->dbh->errstr;
- my %match;
- while (my @row = $sth->fetchrow_array) {
- $match{abs($row[0] - $pos)} = \@row;
- }
-
- my ($nearest) = map {$match{$_}} sort {$a <=> $b} keys %match;
- $nearest ||= [undef,undef];
- return @$nearest if ref $nearest && defined $nearest->[0];
- }
-}
-
-# a method to get a range of exact grid coordinates
-# for synteny data with sparse gridlines that are
-# not suitable for rounding off to the nearest multiple of 10
-sub grid_coords_by_range {
- my $self = shift;
- my ($hit,$src) = @_;
-
- my @hits = ref $hit && $hit->parts > 1 ? @{$hit->parts} : ($hit);
- my @pairs;
- my $sth = $self->position_handle;
-
- for my $h (@hits) {
- my $hname = ref $h ? $h->name : $h;
- $hname =~ s/r|\.\d+//g;
- $sth->execute($hname,$src,$hit->start,$hit->end);
- my $pairs = $sth->fetchall_arrayref;
- push @pairs, @$pairs;
- }
-
- return @pairs;
-}
-
+ my $class = shift;
+ my ($adaptor,$debug,$create,$args);
+ if (@_ == 1) {
+ $args = {DSN => shift}
+ } else {
+ ($adaptor,$debug,$create) =
+ rearrange(['ADAPTOR',
+ 'DEBUG',
+ 'CREATE',
+ ],@_);
+ }
+ $adaptor ||= 'DBI::mysql';
+ $args->{WRITE}++ if $create;
+ $args->{CREATE}++ if $create;
-# Check to see of grid-lines are possible. Some data sources
-# may lack the grid coordinate data (not that there is anything
-# wrong with that).
-sub _has_map {
- my $dbh = shift;
- my $sth = $dbh->prepare('SELECT count(*) FROM map');
- $sth->execute;
- my ($count) = $sth->fetchrow_array;
- return $count;
+ my $driver_class = "Bio::DB::Synteny::Store::$adaptor";
+ eval "require $driver_class" or croak $@;
+ my $obj = $driver_class->new_instance( @_ );
+ $obj->debug($debug) if defined $debug;
+ $obj->init($args);
+ $obj->post_init($args);
+ $obj;
}
-
-sub position_handle {
+sub debug {
my $self = shift;
-
- unless (defined $self->{position_query}) {
- my $query = <<END;
-select pos1,pos2 from map
-WHERE hit_name = ?
-AND src1 = ?
-AND pos1 >= ?
-AND pos1 <= ?
-END
-;
- $self->{position_query} = $self->dbh->prepare_cached($query) or die $self->dbh->errstr;
-
- }
-
- return $self->{position_query};
+ my $d = $self->{debug};
+ $self->{debug} = shift if @_;
+ $d;
}
-sub get_synteny_by_range {
- my $self = shift;
- my ($src, # a symbolic data source, like "worm"
- $ref, # reference for search range - contig or chromosome name
- $start, # start of search range
- $end, # end of search range
- $tgt # optional data source target, like "yeast"
- ) = rearrange([qw(SRC REF START END TGT)],@_);
- my ($query,@args) = $self->make_range_query($src,$ref,$start,$end,$tgt);
- my $sth = $self->dbh->prepare_cached($query) or die $self->dbh->errstr;
- $sth->execute(@args) or die $sth->errstr;
- my %HITS;
- while (my($hit,
- $src,$ref1,$start1,$end1,$strand1,$seq1,
- $tgt,$ref2,$start2,$end2,$strand2,$seq2) = $sth->fetchrow_array) {
- $HITS{$hit} ||= Bio::DB::Synteny::Block->new($hit);
- $HITS{$hit}->add_part([$src,$ref1,$start1,$end1,$strand1,$seq1],
- [$tgt,$ref2,$start2,$end2,$strand2,$seq2]
- );
- }
- return values %HITS;
+sub init {
}
-
-sub make_range_query {
- my $self = shift;
- my ($src,$ref,$start,$end,$tgt) = @_;
- my $query = <<'';
- SELECT
- hit_name
- , src1, ref1, start1, end1, strand1, seq1
- , src2, ref2, start2, end2, strand2, seq2
- FROM alignments
-
- my @where;
- my @args;
-
- if (defined $src) {
- push @where,'src1=?';
- push @args,$src;
- }
-
- if (defined $ref) {
- push @where,'ref1=?';
- push @args,$ref;
- }
-
- if (defined $start and defined $end) {
- my ($range_part,@range_args) = $self->bin_query($start,$end);
- push @where,$range_part;
- push @args,@range_args;
- }
-
- if (defined $tgt) {
- push @where,'src2=?';
- push @args,$tgt;
- }
-
- if (@where) {
- $query .= "\n WHERE ".join(' AND ',@where);
- }
-
- return ($query,@args);
+sub post_init {
}
-# stolen from Bio::DB::GFF::Adaptor::dbi
-sub bin_query {
- my $self = shift;
- my ($start,$end) = @_;
- my ($query,@args);
-
- $start = 0 unless defined($start);
- $end = MAXBIN unless defined($end);
-
- my @bins;
- my $minbin = MINBIN;
- my $maxbin = MAXBIN;
- my $tier = $maxbin;
- while ($tier >= $minbin) {
- my ($tier_start,$tier_stop) = (bin_bot($tier,$start)-EPSILON(),bin_top($tier,$end)+EPSILON());
- if ($tier_start == $tier_stop) {
- push @bins,'bin=?';
- push @args,$tier_start;
- } else {
- push @bins,'bin between ? and ?';
- push @args,($tier_start,$tier_stop);
- }
- $tier /= 10;
- }
-
- my $bin_part = join("\n\t OR ",@bins);
- $query = "($bin_part) AND end1>=? AND start1<=?";
- return ($query,@args,$start,$end);
+sub invert {
+ my ( $self, $strand1, $strand2 ) = @_;
+ $$strand1 = $$strand1 eq '+' ? '-' : '+';
+ $$strand2 = $$strand2 eq '+' ? '-' : '+';
}
+
1;
Oops, something went wrong.

0 comments on commit ab05872

Please sign in to comment.