Skip to content

Commit

Permalink
feat: added union_no_overlap from aw-research
Browse files Browse the repository at this point in the history
  • Loading branch information
ErikBjare committed Jun 8, 2021
1 parent 601ac7d commit d6f783d
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 0 deletions.
2 changes: 2 additions & 0 deletions aw_transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .simplify import simplify_string
from .flood import flood
from .classify import categorize, tag, Rule
from .union_no_overlap import union_no_overlap

__all__ = [
"flood",
Expand All @@ -24,6 +25,7 @@
"period_union",
"filter_period_intersect",
"union",
"union_no_overlap",
"concat",
"sum_durations",
"sort_by_timestamp",
Expand Down
122 changes: 122 additions & 0 deletions aw_transform/union_no_overlap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""
Originally from aw-research
"""

from copy import deepcopy
from typing import List, Tuple, Optional
from datetime import datetime, timedelta, timezone

from timeslot import Timeslot

from aw_core import Event


def _split_event(e: Event, dt: datetime) -> Tuple[Event, Optional[Event]]:
if e.timestamp < dt < e.timestamp + e.duration:
e1 = deepcopy(e)
e2 = deepcopy(e)
e1.duration = dt - e.timestamp
e2.timestamp = dt
e2.duration = (e.timestamp + e.duration) - dt
return (e1, e2)
else:
return (e, None)


def test_split_event():
now = datetime(2018, 1, 1, 0, 0).astimezone(timezone.utc)
td1h = timedelta(hours=1)
e = Event(timestamp=now, duration=2 * td1h, data={})
e1, e2 = _split_event(e, now + td1h)
assert e1.timestamp == now
assert e1.duration == td1h
assert e2.timestamp == now + td1h
assert e2.duration == td1h


def union_no_overlap(events1: List[Event], events2: List[Event]) -> List[Event]:
"""Merges two eventlists and removes overlap, the first eventlist will have precedence
Example:
events1 | xxx xx xxx |
events1 | ---- ------ -- |
result | xxx-- xx ----xxx -- |
"""
events1 = deepcopy(events1)
events2 = deepcopy(events2)

# I looked a lot at aw_transform.union when I wrote this
events_union = []
e1_i = 0
e2_i = 0
while e1_i < len(events1) and e2_i < len(events2):
e1 = events1[e1_i]
e2 = events2[e2_i]
e1_p = Timeslot(e1.timestamp, e1.timestamp + e1.duration)
e2_p = Timeslot(e2.timestamp, e2.timestamp + e2.duration)

if e1_p.intersects(e2_p):
if e1.timestamp <= e2.timestamp:
events_union.append(e1)
e1_i += 1

# If e2 continues after e1, we need to split up the event so we only get the part that comes after
_, e2_next = _split_event(e2, e1.timestamp + e1.duration)
if e2_next:
events2[e2_i] = e2_next
else:
e2_i += 1
else:
e2_next, e2_next2 = _split_event(e2, e1.timestamp)
events_union.append(e2_next)
e2_i += 1
if e2_next2:
events2.insert(e2_i, e2_next2)
else:
if e1.timestamp <= e2.timestamp:
events_union.append(e1)
e1_i += 1
else:
events_union.append(e2)
e2_i += 1
events_union += events1[e1_i:]
events_union += events2[e2_i:]
return events_union


def test_union_no_overlap():
from pprint import pprint

now = datetime(2018, 1, 1, 0, 0)
td1h = timedelta(hours=1)
events1 = [
Event(timestamp=now + 2 * i * td1h, duration=td1h, data={"test": 1})
for i in range(3)
]
events2 = [
Event(timestamp=now + (2 * i + 0.5) * td1h, duration=td1h, data={"test": 2})
for i in range(3)
]

events_union = _union_no_overlap(events1, events2)
# pprint(events_union)
dur = sum((e.duration for e in events_union), timedelta(0))
assert dur == timedelta(hours=4, minutes=30)
assert sorted(events_union, key=lambda e: e.timestamp)

events_union = _union_no_overlap(events2, events1)
# pprint(events_union)
dur = sum((e.duration for e in events_union), timedelta(0))
assert dur == timedelta(hours=4, minutes=30)
assert sorted(events_union, key=lambda e: e.timestamp)

events1 = [
Event(timestamp=now + (2 * i) * td1h, duration=td1h, data={"test": 1})
for i in range(3)
]
events2 = [Event(timestamp=now, duration=5 * td1h, data={"test": 2})]
events_union = _union_no_overlap(events1, events2)
pprint(events_union)
dur = sum((e.duration for e in events_union), timedelta(0))
assert dur == timedelta(hours=5, minutes=0)
assert sorted(events_union, key=lambda e: e.timestamp)

0 comments on commit d6f783d

Please sign in to comment.