Skip to content

Commit

Permalink
ENSCORESW-3132: Limited implementation of Intervals with start > end …
Browse files Browse the repository at this point in the history
…(i.e. spanning the origin of a circular chromosome)
  • Loading branch information
ens-bwalts committed Jun 17, 2019
1 parent faba0de commit bc387e6
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 22 deletions.
102 changes: 83 additions & 19 deletions modules/Bio/EnsEMBL/Utils/Interval.pm
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Bio::EnsEMBL::Utils::Interval
=head1 SYNOPSIS
# let's get an interval spanning 9e5 bp and associated it with some data
my $i2 = Bio::EnsEMBL::Utils::Interval->new(1e5, 1e6, { 'key1' => 'value1', 'key2' => 'value2' });
my $i1 = Bio::EnsEMBL::Utils::Interval->new(1e5, 1e6, { 'key1' => 'value1', 'key2' => 'value2' });
# and another one which overlaps with the previous,
# but with scalar associated data
Expand All @@ -50,12 +50,18 @@ Bio::EnsEMBL::Utils::Interval
print "I1 and I2 do not overlap\n";
}
# If an interval is defined with a start > end, then it is assumed
# to be spanning the origin on a circular chromosome
my $i3 = Bio::EnsEMBL::Utilities::Interval->new(1e5, 1e2);
warn "Interval spans the origin" if $i3->spans_origin;
etc.
=head1 DESCRIPTION
A class representing an interval defined on a genomic region. Instances of this
class can store arbitrarily defined data.
class can store arbitrarily defined data. If created with start > end, then it
is assumed that this interval is on a circular chromosome spanning the origin.
=head1 METHODS
Expand All @@ -78,9 +84,12 @@ use Bio::EnsEMBL::Utils::Exception qw(throw);
Arg [3] : (optional) $data
The data associated with the interval, can be anything
Example : my $i = Bio::EnsEMBL::Utils::Interval(1e2, 2e2, { 'key' => 'value' });
my $i2 = Bio::EnsEMBL::Utilities::Interval(1e5, 1e2);
$i->spans_origin # returns 0
$i2->spans_origin # returns 1
Description : Constructor. Creates a new instance
Returntype : Bio::EnsEMBL::Utils::Interval
Exceptions : none
Exceptions : Throws an exception if start and end are not defined.
Caller : general
=cut
Expand All @@ -92,9 +101,17 @@ sub new {
my ($start, $end, $data) = @_;
throw 'Must specify interval boundaries [start, end]'
unless defined $start and defined $end;
throw 'start must be <= end' if $start > $end;

my $spans_origin = 0;
if ($start > $end) {
$spans_origin = 1;
}

my $self = bless({ start => $start, end => $end, data => $data }, $class);
my $self = bless({ start => $start,
end => $end,
data => $data ,
spans_origin => $spans_origin},
$class);
return $self;
}

Expand Down Expand Up @@ -146,6 +163,23 @@ sub data {
return $self->{data};
}

=head2 spans_origin
Arg [] : none
Description : Returns whether this interval was created spanning zero
(more particularly: if the interval was instantiated with start > end)
Returntype : boolean
Exceptions : none
Caller : general
=cut

sub spans_origin {
my $self = shift;

return $self->{spans_origin};
}

=head2 is_empty
Arg [] : none
Expand All @@ -159,7 +193,11 @@ sub data {
sub is_empty {
my $self = shift;

return $self->start >= $self->end;
if ($self->spans_origin) {
return ($self->end >= $self->start);
} else {
return ($self->start >= $self->end);
}
}

=head2 is_point
Expand Down Expand Up @@ -194,7 +232,11 @@ sub contains {
return 0 if $self->is_empty or not defined $point;
throw 'point must be a number' unless looks_like_number($point);

return ($point >= $self->start and $point <= $self->end);
if ($self->spans_origin) {
return ($point >= $self->start or $point <= $self->end);
} else {
return ($point >= $self->start and $point <= $self->end);
}
}

=head2 intersects
Expand All @@ -210,16 +252,26 @@ sub contains {
sub intersects {
my ($self, $interval) = @_;
assert_ref($interval, 'Bio::EnsEMBL::Utils::Interval');

return ($self->start <= $interval->end and $interval->start <= $self->end);

if ($self->spans_origin and $interval->spans_origin) {
return 1;
} elsif ($self->spans_origin or $interval->spans_origin) {
return ($interval->end >= $self->start or $interval->start <= $self->end);
} else {
return ($self->start <= $interval->end and $interval->start <= $self->end);
}
}

=head2 is_right_of
Arg [1] : An instance of Bio::EnsEMBL::Utils::Interval or a scalar
Description : Checks if this current interval is entirely to the right of a point.
Description : Checks if this current interval is entirely to the right of a point
or Interval.
More formally, the method will return true, if for every point x from
the current interval the inequality x > point holds.
the current interval the inequality x > point holds, where point
is either a single scalar, or point is the end of another Interval.
If spans_origin is true for either this Interval or an Interval
passed in, then this method returns false.
Returntype : boolean
Exceptions : none
Caller : general
Expand All @@ -232,18 +284,26 @@ sub is_right_of {
return 0 unless defined $other;

if ( looks_like_number($other) ) {
return $self->start > $other;
return $self->spans_origin ?
throw "is_right_of not defined for an interval that spans the origin" :
$self->start > $other;
} elsif ($self->spans_origin or $other->spans_origin) {
throw "is_right_of not defined for an interval that spans the origin";
} else {
return $self->start > $other->end;
}

return $self->start > $other->end;
}

=head2 is_left_of
Arg [1] : An instance of Bio::EnsEMBL::Utils::Interval or a scalar
Description : Checks if this current interval is entirely to the left of a point.
Description : Checks if this current interval is entirely to the left of a point
or Interval.
More formally, the method will return true, if for every point x from
the current interval the inequality x < point holds.
the current interval the inequality x < point holds, where point
is either a single scalar, or point is the start of another Interval.
If spans_origin is true for either this Interval or an Interval
passed in, then this method returns false
Returntype : boolean
Exceptions : none
Caller : general
Expand All @@ -256,10 +316,14 @@ sub is_left_of {
return 0 unless defined $other;

if ( looks_like_number($other) ) {
return $self->end < $other;
return $self->spans_origin ?
throw "is_left_of not defined for an interval that spans the origin" :
$self->end < $other;
} elsif ($self->spans_origin or $other->spans_origin) {
throw "is_left_of not defined for an interval that spans the origin";
} else {
return $self->end < $other->start;
}

return $self->end < $other->start;
}

1;
Expand Down
6 changes: 6 additions & 0 deletions modules/Bio/EnsEMBL/Utils/Tree/Interval/Immutable.pm
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ be added to or removed from the tree during its life cycle.
Implementation heavily inspired by https://github.com/tylerkahn/intervaltree-python
This implementation does not support Intervals having a start > end - i.e.
intervals spanning the origin of a circular chromosome.
=head1 METHODS
=cut
Expand Down Expand Up @@ -297,6 +300,9 @@ sub _divide_intervals {
my ($s_center, $s_left, $s_right) = ([], [], []);

foreach my $interval (@{$intervals}) {
if ($interval->spans_origin) {
throw "Cannot build a tree containing an interval that spans the origin";
}
if ($interval->end < $x_center) {
push @{$s_left}, $interval;
} elsif ($interval->start > $x_center) {
Expand Down
6 changes: 5 additions & 1 deletion modules/Bio/EnsEMBL/Utils/Tree/Interval/Mutable/PP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ use Carp;

use Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node;
use Bio::EnsEMBL::Utils::Interval;
use Bio::EnsEMBL::Utils::Exception qw(throw);

=head2 new
Expand Down Expand Up @@ -108,14 +109,17 @@ sub size {
Example : $tree->insert(Bio::EnsEMBL::Utils::Interval->new(10, 20, 'data'));
Description : Insert an interval in the tree
Returntype : scalar (1), upon success
Exceptions : none
Exceptions : thrown if Interval spans origin (has start > end)
Caller : general
=cut

sub insert {
my ($self, $i) = @_;

if ($i->spans_origin) {
throw "Cannot insert an interval that spans the origin into a mutable tree";
}
# base case: empty tree, assign new node to root
unless (defined $self->root) {
$self->root(Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node->new($self, $i));
Expand Down
71 changes: 69 additions & 2 deletions modules/t/interval_tree_immutable.t
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ use_ok 'Bio::EnsEMBL::Utils::Interval';

throws_ok { Bio::EnsEMBL::Utils::Interval->new() } qr/specify.+?boundaries/, 'Throws with no arguments';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(1) } qr/specify.+?boundaries/, 'Throws with an undefined argument';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(10, 1) } qr/start.+?end/, 'Throws with invalid arguments';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(100, 10) } qr/start.+?end/, 'Throws with invalid arguments';

# degenerate (point) case
my $i = Bio::EnsEMBL::Utils::Interval->new(10, 10);
Expand All @@ -36,45 +34,114 @@ ok($i->is_point, 'interval is point');

# a normal interval, start < end
$i = Bio::EnsEMBL::Utils::Interval->new(100, 200);
# an interval spanning the origin, start > end
my $i_span = Bio::EnsEMBL::Utils::Interval->new(200,100);

isa_ok($i, 'Bio::EnsEMBL::Utils::Interval');
isa_ok($i_span, 'Bio::EnsEMBL::Utils::Interval');

is($i->spans_origin, 0, 'spans_origin false for non-spanning interval');
is($i_span->spans_origin, 1, 'spans_origin true for spanning interval');

is($i->start, 100, 'start position');
is($i->end, 200, 'end position');
is($i_span->start, 200, 'spanning start position');
is($i_span->end, 100, 'spanning end position');

ok(!$i->is_empty, 'interval not empty');
ok(!$i->is_point, 'interval\'s not a point');
ok(!$i_span->is_empty, 'interval not empty');
ok(!$i_span->is_point, 'interval\'s not a point');

ok($i->contains(100) && $i->contains(200) && $i->contains(150), 'interval contains points');
ok(!$i->contains(99) && !$i->contains(201), 'interval does not contain points');
ok($i_span->contains(100) && $i_span->contains(200) && $i_span->contains(250), 'spanning interval contains points');
ok(!$i_span->contains(101) && !$i_span->contains(199), 'spanning interval does not contain points');

# check is_right_of/is_left_of with point/interval
ok(!$i->is_right_of && !$i->is_left_of, 'interval is not left/right of nothing');
ok(!$i_span->is_right_of && !$i_span->is_left_of, 'spanning interval is not left/right of nothing');

ok($i->is_right_of(99), 'interval right of point');
ok(!$i->is_right_of(100) && !$i->is_right_of(150) && !$i->is_right_of(201), 'interval not right of point');
ok($i->is_left_of(201), 'interval left of point');
ok(!$i->is_left_of(99) && !$i->is_left_of(150) && !$i->is_left_of(200), 'interval not left of point');
throws_ok { $i_span->is_right_of(150) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval and a point';
throws_ok { $i_span->is_left_of(150) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval and a point';

my $j = Bio::EnsEMBL::Utils::Interval->new(50, 99);
my $k = Bio::EnsEMBL::Utils::Interval->new(50, 150);
my $l = Bio::EnsEMBL::Utils::Interval->new(201, 250);
my $m = Bio::EnsEMBL::Utils::Interval->new(101, 199);
my $n_span = Bio::EnsEMBL::Utils::Interval->new(201,100);

# non-spanning with non-spanning query
ok($i->is_right_of($j), 'interval right of another');
ok(!$i->is_right_of($k) && !$i->is_right_of($l), 'interval not right of others');
ok($i->is_left_of($l), 'interval left of another');
ok(!$i->is_left_of($j) && !$i->is_left_of($k), 'interval not left of others');

# non-spanning with spanning query
throws_ok { $i->is_right_of($n_span) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i->is_left_of($n_span) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';

# spanning with non-spanning query
throws_ok { $i_span->is_right_of($m) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i_span->is_left_of($m) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';

# spanning with spanning query
throws_ok { $i_span->is_right_of($n_span) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i_span->is_left_of($n_span) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';


# check interval data
$j = Bio::EnsEMBL::Utils::Interval->new(100, 200, [100, 200]);
is_deeply($j->data, [100, 200], 'interval data');

# check intersection with other intervals
$k = Bio::EnsEMBL::Utils::Interval->new(50, 150);
ok($i->intersects($k), 'intervals intersect');
ok($i_span->intersects($k), 'spanning interval and interval intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(150, 250);
ok($i->intersects($k), 'intervals intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(50, 99);
ok(!$i->intersects($k), 'intervals do not intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(201, 250);
ok(!$i->intersects($k), 'intervals do not intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(101,199);
ok(!$i_span->intersects($k), 'spanning interval and interval do not intersect');
ok($i_span->intersects($n_span), 'two spanning intervals intersect');
ok($i->intersects($n_span), 'interval and spanning interval intersect');
my $o_span = Bio::EnsEMBL::Utils::Interval->new(201,99);
ok(!$i->intersects($o_span), 'interval and spanning interval do not intersect');

use_ok 'Bio::EnsEMBL::Utils::Tree::Interval::Immutable::Node';

use_ok 'Bio::EnsEMBL::Utils::Tree::Interval::Immutable';

my $intervals_with_span = [ Bio::EnsEMBL::Utils::Interval->new(20, 30),
Bio::EnsEMBL::Utils::Interval->new(30, 20)];

throws_ok { my $impossible_tree = Bio::EnsEMBL::Utils::Tree::Interval::Immutable->new($intervals_with_span) }
qr/Cannot build a tree containing an interval that spans the origin/,
'exception when building an interval tree with an interval that spans the origin';

my $intervals = [ Bio::EnsEMBL::Utils::Interval->new(121626874, 122092717),
Bio::EnsEMBL::Utils::Interval->new(121637917, 121658918),
Bio::EnsEMBL::Utils::Interval->new(122096077, 124088369) ];
Expand Down
5 changes: 5 additions & 0 deletions modules/t/interval_tree_mutable.t
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ is(scalar @{$search_result}, 2, 'Number of search results');
is($search_result->[0]->data, 'data1', 'Search result');
is($search_result->[1]->data, 'data2', 'Search result');

$tree = Bio::EnsEMBL::Utils::Tree::Interval::Mutable->new();
throws_ok { $tree->insert(make_interval(200, 100, 'spanning_interval')) }
qr/Cannot insert an interval that spans the origin into a mutable tree/,
'exception when trying to insert an interval that spans the origin';

$tree = Bio::EnsEMBL::Utils::Tree::Interval::Mutable->new();
map { $tree->insert($_) } @{$intervals};
is($tree->size(), scalar @{$intervals}, 'Tree size');
Expand Down

0 comments on commit bc387e6

Please sign in to comment.