In [1]:
# Ensure project root (OpenCEP) is on sys.path for imports like `from base.Pattern import Pattern`
import sys, os, pathlib

nb_dir = (
    pathlib.Path(__file__).parent if "__file__" in globals() else pathlib.Path.cwd()
)
project_root = str((nb_dir / "..").resolve())
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
from datetime import timedelta
from CEP import CEP
from base.Pattern import Pattern
from base.PatternStructure import (
    SeqOperator,
    PrimitiveEventStructure,
    KleeneClosureOperator,
)
from condition.CompositeCondition import AndCondition
from condition.Condition import Variable, SimpleCondition
from condition.KCCondition import KCIndexCondition
from stream.FileStream import FileOutputStream
from stream.DataFrameStream import CitiBikeDataFrameInputStream
from plugin.citibike.CitiBike2 import (
    CitiBikeTripEventTypeClassifier,
    CitiBikeDataFormatter,
)
import test
from tree.PatternMatchStorage import TreeStorageParameters
from parallel.ParallelExecutionParameters import (
    DataParallelExecutionParametersHirzelAlgorithm,
)
from parallel.ParallelExecutionPlatforms import ParallelExecutionPlatforms
from plugin.citibike.RessourceConsumption import RessourceConsumption

monitor = RessourceConsumption()


In [3]:
citibikeHotPathsPattern = Pattern(
    SeqOperator(
        KleeneClosureOperator(PrimitiveEventStructure("CitiBikeTrip", "a"), max_size=5),
        PrimitiveEventStructure("CitiBikeTrip", "b"),
    ),
    AndCondition(
        KCIndexCondition(
            names={"a"},
            getattr_func=lambda x: x["bikeid"],
            relation_op=lambda a1, a2: a1 == a2,
            offset=-1,
        ),
        KCIndexCondition(
            names={"a"},
            getattr_func=lambda x: (
                int(float(x["startstationid"])),
                int(float(x["endstationid"])),
            ),
            relation_op=lambda a1, a2: a1[0] == a2[1],
            offset=-1,
        ),
        SimpleCondition(
            Variable("a", lambda x: x[-1]["bikeid"]),
            Variable("b", lambda x: x["bikeid"]),
            relation_op=lambda a, b: a == b,
        ),
        SimpleCondition(
            Variable("a", lambda x: int(float(x[-1]["endstationid"]))),
            Variable("b", lambda x: int(float(x["startstationid"]))),
            relation_op=lambda a, b: a == b,
        ),
        SimpleCondition(
            Variable("b", lambda x: int(float(x["endstationid"]))),
            relation_op=lambda end_id: str(end_id) in {"111111"},
        ),
    ),
    timedelta(minutes=31),
)

In [4]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated-small.csv"
output_file = "output_citibike_baseline_small_try.txt"

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)
cep = CEP([citibikeHotPathsPattern])
monitor.run(
    cep.run,
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

Creating evaluation manager...
 - Parallel execution: None
 - Storage: None
 - Using ParallelExecutionModes.SEQUENTIAL execution mode
!!! Using default evaluation mechanism parameters...
!!! Creating tree-based evaluation mechanism...
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=False, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=False, load_shedding_threshold=1000, load_shedding_drop_rate=0.1, load_shedding_strategy=random,latency_bound=None)
InternalNode creating storage: sort=False, sorting_key=None
UnsortedPatternMatchStorage created with storage_params: TreeStorageParameters(sort_storage=False, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=False, load_shedding_threshold=1000, load_shedding_drop_rate=0.1, load_shedding_strategy=random,latency_bound=None)
InternalNode creating storage: sort=False, sorting_key=None
Unso

44.117049

# Baseline - small dataset

In [4]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated-small.csv"
output_file = "output_citibike_baseline_small.txt"

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)
cep = CEP([citibikeHotPathsPattern])
monitor.run(
    cep.run,
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

Creating evaluation manager...
 - Parallel execution: None
 - Storage: None
 - Using ParallelExecutionModes.SEQUENTIAL execution mode
!!! Using default evaluation mechanism parameters...
!!! Creating tree-based evaluation mechanism...
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=False, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=False, load_shedding_threshold=1000, load_shedding_drop_rate=0.1, load_shedding_strategy=random,latency_bound=None)
InternalNode creating storage: sort=False, sorting_key=None
UnsortedPatternMatchStorage created with storage_params: TreeStorageParameters(sort_storage=False, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=False, load_shedding_threshold=1000, load_shedding_drop_rate=0.1, load_shedding_strategy=random,latency_bound=None)
InternalNode creating storage: sort=False, sorting_key=None
Unso

44.733967

# Both small dataste

In [5]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated-small.csv"
output_file = "output_citibike_both_small.txt"

load_shedding_params = TreeStorageParameters(
    sort_storage=True,
    enable_load_shedding=True,
    load_shedding_threshold=30,
    load_shedding_drop_rate=0.3,
    load_shedding_strategy="oldest",
    clean_up_interval=10,
    latency_bound=0.08
)
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=12,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params, storage_params=load_shedding_params )

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)

monitor.run(
    cep.run,
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

Creating evaluation manager...
 - Parallel execution: <parallel.ParallelExecutionParameters.DataParallelExecutionParametersHirzelAlgorithm object at 0x118e678f0>
 - Storage: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=30, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.08)
 - Using ParallelExecutionModes.DATA_PARALLELISM execution mode
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=30, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.08)
LeafNode.create_storage_unit called with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, en

0.022949

# Both half dataset

In [4]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated-original-half.csv"
output_file = "output_citibike_both_half2.txt"

load_shedding_params = TreeStorageParameters(
    sort_storage=True,
    enable_load_shedding=True,
    load_shedding_threshold=30,
    load_shedding_drop_rate=0.3,
    load_shedding_strategy="oldest",
    clean_up_interval=10,
    latency_bound=0.08
)
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=12,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params, storage_params=load_shedding_params )

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)

monitor.run(
    cep.run,
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

Creating evaluation manager...
 - Parallel execution: <parallel.ParallelExecutionParameters.DataParallelExecutionParametersHirzelAlgorithm object at 0x11bc0f560>
 - Storage: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=30, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.08)
 - Using ParallelExecutionModes.DATA_PARALLELISM execution mode
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=30, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.08)
LeafNode.create_storage_unit called with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, en

Exception in thread Thread-6 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation/

we are updating event count 248000
computed currlat 1.113262
Found match: {'tripduration': '104', 'starttime': 2019-01-08 12:53:33.200000, 'stoptime': 2019-01-08 12:55:17.645000, 'startstationid': '326', 'endstationid': '236', 'bikeid': '34654', 'eventid': 246900}
{'tripduration': '356', 'starttime': 2019-01-08 12:55:20.357000, 'stoptime': 2019-01-08 13:01:17.326000, 'startstationid': '236', 'endstationid': '111111', 'bikeid': '34654', 'eventid': 246965}


event count 248104 1417.863883333
computed currlat 1.113262
Found match: {'tripduration': '73', 'starttime': 2019-01-08 13:11:45.406000, 'stoptime': 2019-01-08 13:12:58.484000, 'startstationid': '259', 'endstationid': '427', 'bikeid': '33615', 'eventid': 247450}
{'tripduration': '1290', 'starttime': 2019-01-08 13:13:02.735000, 'stoptime': 2019-01-08 13:34:33.205000, 'startstationid': '427', 'endstationid': '111111', 'bikeid': '33615', 'eventid': 247476}


event count 248691 1417.960836041
we are updating event count 249000
we are upd

Exception in thread Thread-11 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

we are updating event count 331000
we are updating event count 332000
we are updating event count 333000
computed currlat 1.113262
Found match: {'tripduration': '109', 'starttime': 2019-01-10 16:20:46.677000, 'stoptime': 2019-01-10 16:22:36.621000, 'startstationid': '128', 'endstationid': '3467', 'bikeid': '16381', 'eventid': 339700}
{'tripduration': '1176', 'starttime': 2019-01-10 16:22:37.199000, 'stoptime': 2019-01-10 16:42:13.589000, 'startstationid': '3467', 'endstationid': '111111', 'bikeid': '16381', 'eventid': 339758}


event count 333130 1431.298632958
computed currlat 1.113262
Found match: {'tripduration': '136', 'starttime': 2019-01-10 15:53:22.812000, 'stoptime': 2019-01-10 15:55:39.106000, 'startstationid': '311', 'endstationid': '307', 'bikeid': '27846', 'eventid': 338750}
{'tripduration': '472', 'starttime': 2019-01-10 15:55:40.358000, 'stoptime': 2019-01-10 16:03:32.716000, 'startstationid': '307', 'endstationid': '111111', 'bikeid': '27846', 'eventid': 338832}


event 

Exception in thread Thread-4 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation/

computed currlat 1.113262
Found match: {'tripduration': '114', 'starttime': 2019-01-12 14:00:09.005000, 'stoptime': 2019-01-12 14:02:03.234000, 'startstationid': '3368', 'endstationid': '3373', 'bikeid': '35130', 'eventid': 399550}
{'tripduration': '1294', 'starttime': 2019-01-12 14:02:04.919000, 'stoptime': 2019-01-12 14:23:39.017000, 'startstationid': '3373', 'endstationid': '111111', 'bikeid': '35130', 'eventid': 399611}


event count 383598 1439.212909666
we are updating event count 384000
we are updating event count 385000
we are updating event count 386000
we are updating event count 387000
computed currlat 1.113262
Found match: {'tripduration': '234', 'starttime': 2019-01-12 21:44:31.732000, 'stoptime': 2019-01-12 21:48:26.317000, 'startstationid': '3151', 'endstationid': '3154', 'bikeid': '28761', 'eventid': 411850}
{'tripduration': '323', 'starttime': 2019-01-12 21:48:39.349000, 'stoptime': 2019-01-12 21:54:02.743000, 'startstationid': '3154', 'endstationid': '111111', 'bikeid

Exception in thread Thread-7 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation/

computed currlat 1.113262
Found match: {'tripduration': '121', 'starttime': 2019-01-13 08:48:03.457000, 'stoptime': 2019-01-13 08:50:05.138000, 'startstationid': '3307', 'endstationid': '3293', 'bikeid': '31641', 'eventid': 414950}
{'tripduration': '259', 'starttime': 2019-01-13 08:50:08.613000, 'stoptime': 2019-01-13 08:54:28.077000, 'startstationid': '3293', 'endstationid': '111111', 'bikeid': '31641', 'eventid': 414983}


event count 390551 1440.318696833
computed currlat 1.113262
Found match: {'tripduration': '349', 'starttime': 2019-01-12 20:52:59.285000, 'stoptime': 2019-01-12 20:58:48.365000, 'startstationid': '3256', 'endstationid': '426', 'bikeid': '30073', 'eventid': 411200}
{'tripduration': '592', 'starttime': 2019-01-12 20:59:15.485000, 'stoptime': 2019-01-12 21:09:07.946000, 'startstationid': '426', 'endstationid': '111111', 'bikeid': '30073', 'eventid': 411272}


event count 390870 1440.374256125
we are updating event count 391000
computed currlat 1.113262
Found match: {'

87.413837

# Both full dataset- max() latency

In [5]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated-original.csv"
output_file = "output_citibike_both_FULL_DATASET.txt"

load_shedding_params = TreeStorageParameters(
    sort_storage=True,
    enable_load_shedding=True,
    load_shedding_threshold=30,
    load_shedding_drop_rate=0.3,
    load_shedding_strategy="oldest",
    clean_up_interval=10,
    latency_bound=0.08
)
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=12,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params, storage_params=load_shedding_params )

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)

monitor.run(
    cep.run,
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

Creating evaluation manager...
 - Parallel execution: <parallel.ParallelExecutionParameters.DataParallelExecutionParametersHirzelAlgorithm object at 0x11bc61460>
 - Storage: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=30, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.08)
 - Using ParallelExecutionModes.DATA_PARALLELISM execution mode
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=30, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.08)
LeafNode.create_storage_unit called with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, en

Exception in thread Thread-18 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

computed currlat 1.113262
Found match: {'tripduration': '104', 'starttime': 2019-01-08 12:53:33.200000, 'stoptime': 2019-01-08 12:55:17.645000, 'startstationid': '326', 'endstationid': '236', 'bikeid': '34654', 'eventid': 246900}
{'tripduration': '356', 'starttime': 2019-01-08 12:55:20.357000, 'stoptime': 2019-01-08 13:01:17.326000, 'startstationid': '236', 'endstationid': '111111', 'bikeid': '34654', 'eventid': 246965}


event count 697414 1809.349850541
we are updating event count 698000
computed currlat 1.113262
Found match: {'tripduration': '73', 'starttime': 2019-01-08 13:11:45.406000, 'stoptime': 2019-01-08 13:12:58.484000, 'startstationid': '259', 'endstationid': '427', 'bikeid': '33615', 'eventid': 247450}
{'tripduration': '1290', 'starttime': 2019-01-08 13:13:02.735000, 'stoptime': 2019-01-08 13:34:33.205000, 'startstationid': '427', 'endstationid': '111111', 'bikeid': '33615', 'eventid': 247476}


event count 698408 1809.526579208
we are updating event count 699000
we are upd

Exception in thread Thread-23 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

we are updating event count 781000
we are updating event count 782000
computed currlat 1.113262
Found match: {'tripduration': '136', 'starttime': 2019-01-10 15:53:22.812000, 'stoptime': 2019-01-10 15:55:39.106000, 'startstationid': '311', 'endstationid': '307', 'bikeid': '27846', 'eventid': 338750}
{'tripduration': '472', 'starttime': 2019-01-10 15:55:40.358000, 'stoptime': 2019-01-10 16:03:32.716000, 'startstationid': '307', 'endstationid': '111111', 'bikeid': '27846', 'eventid': 338832}


event count 782326 1823.821227541
computed currlat 1.113262
Found match: {'tripduration': '109', 'starttime': 2019-01-10 16:20:46.677000, 'stoptime': 2019-01-10 16:22:36.621000, 'startstationid': '128', 'endstationid': '3467', 'bikeid': '16381', 'eventid': 339700}
{'tripduration': '1176', 'starttime': 2019-01-10 16:22:37.199000, 'stoptime': 2019-01-10 16:42:13.589000, 'startstationid': '3467', 'endstationid': '111111', 'bikeid': '16381', 'eventid': 339758}


event count 782471 1823.845838166
compute

Exception in thread Thread-16 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

we are updating event count 833000
we are updating event count 834000
we are updating event count 835000
we are updating event count 836000
we are updating event count 837000
computed currlat 1.113262
Found match: {'tripduration': '234', 'starttime': 2019-01-12 21:44:31.732000, 'stoptime': 2019-01-12 21:48:26.317000, 'startstationid': '3151', 'endstationid': '3154', 'bikeid': '28761', 'eventid': 411850}
{'tripduration': '323', 'starttime': 2019-01-12 21:48:39.349000, 'stoptime': 2019-01-12 21:54:02.743000, 'startstationid': '3154', 'endstationid': '111111', 'bikeid': '28761', 'eventid': 411893}


event count 837439 1833.342381333
we are updating event count 838000
computed currlat 1.113262
Found match: {'tripduration': '387', 'starttime': 2019-01-13 01:56:56.121000, 'stoptime': 2019-01-13 02:03:23.925000, 'startstationid': '3105', 'endstationid': '445', 'bikeid': '17769', 'eventid': 413710}
{'tripduration': '1328', 'starttime': 2019-01-13 02:03:33.996000, 'stoptime': 2019-01-13 02:25:4

Exception in thread Thread-19 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

computed currlat 1.113262
Found match: {'tripduration': '121', 'starttime': 2019-01-13 08:48:03.457000, 'stoptime': 2019-01-13 08:50:05.138000, 'startstationid': '3307', 'endstationid': '3293', 'bikeid': '31641', 'eventid': 414950}
{'tripduration': '259', 'starttime': 2019-01-13 08:50:08.613000, 'stoptime': 2019-01-13 08:54:28.077000, 'startstationid': '3293', 'endstationid': '111111', 'bikeid': '31641', 'eventid': 414983}


event count 840055 1833.81060275
computed currlat 1.113262
Found match: {'tripduration': '349', 'starttime': 2019-01-12 20:52:59.285000, 'stoptime': 2019-01-12 20:58:48.365000, 'startstationid': '3256', 'endstationid': '426', 'bikeid': '30073', 'eventid': 411200}
{'tripduration': '592', 'starttime': 2019-01-12 20:59:15.485000, 'stoptime': 2019-01-12 21:09:07.946000, 'startstationid': '426', 'endstationid': '111111', 'bikeid': '30073', 'eventid': 411272}


event count 840160 1833.835003875
computed currlat 1.113262
Found match: {'tripduration': '463', 'starttime': 2

Exception in thread Thread-20 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

computed currlat 1.113262
Found match: {'tripduration': '199', 'starttime': 2019-01-15 12:23:27.973000, 'stoptime': 2019-01-15 12:26:47.035000, 'startstationid': '3016', 'endstationid': '293', 'bikeid': '30841', 'eventid': 486486}
{'tripduration': '267', 'starttime': 2019-01-15 12:26:47.723000, 'stoptime': 2019-01-15 12:31:14.741000, 'startstationid': '293', 'endstationid': '111111', 'bikeid': '30841', 'eventid': 486557}


event count 890608 1842.581043458
we are updating event count 891000
computed currlat 1.113262
Found match: {'tripduration': '156', 'starttime': 2019-01-15 15:13:35.565000, 'stoptime': 2019-01-15 15:16:12.089000, 'startstationid': '442', 'endstationid': '334', 'bikeid': '33329', 'eventid': 491150}
{'tripduration': '696', 'starttime': 2019-01-15 15:16:14.147000, 'stoptime': 2019-01-15 15:27:50.485000, 'startstationid': '334', 'endstationid': '111111', 'bikeid': '33329', 'eventid': 491214}


event count 891682 1842.770796708
we are updating event count 892000
we are up

Exception in thread Thread-24 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

computed currlat 0.687088
Found match: {'tripduration': '642', 'starttime': 2019-01-27 07:56:55.499000, 'stoptime': 2019-01-27 08:07:37.937000, 'startstationid': '396', 'endstationid': '524', 'bikeid': '34341', 'eventid': 828473}
{'tripduration': '769', 'starttime': 2019-01-27 08:08:06.708000, 'stoptime': 2019-01-27 08:20:55.953000, 'startstationid': '524', 'endstationid': '111111', 'bikeid': '34341', 'eventid': 828548}


event count 1083238 1880.055734833
we are updating event count 1084000
we are updating event count 1085000
computed currlat 0.687088
Found match: {'tripduration': '196', 'starttime': 2019-01-26 20:25:25.131000, 'stoptime': 2019-01-26 20:28:41.485000, 'startstationid': '3016', 'endstationid': '3109', 'bikeid': '24797', 'eventid': 824600}
{'tripduration': '1010', 'starttime': 2019-01-26 20:28:43.609000, 'stoptime': 2019-01-26 20:45:34.539000, 'startstationid': '3109', 'endstationid': '111111', 'bikeid': '24797', 'eventid': 824665}


event count 1085112 1880.393320583
co

Exception in thread Thread-21 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

computed currlat 0.687088
Found match: {'tripduration': '169', 'starttime': 2019-01-28 12:44:57.703000, 'stoptime': 2019-01-28 12:47:47.344000, 'startstationid': '3454', 'endstationid': '3081', 'bikeid': '35387', 'eventid': 871600}
{'tripduration': '702', 'starttime': 2019-01-28 12:47:48.190000, 'stoptime': 2019-01-28 12:59:30.981000, 'startstationid': '3081', 'endstationid': '111111', 'bikeid': '35387', 'eventid': 871661}


event count 1117919 1886.306971708
we are updating event count 1118000
we are updating event count 1119000
computed currlat 0.687088
Found match: {'tripduration': '70', 'starttime': 2019-01-28 21:32:19.283000, 'stoptime': 2019-01-28 21:33:29.990000, 'startstationid': '3576', 'endstationid': '3574', 'bikeid': '32077', 'eventid': 891150}
{'tripduration': '1696', 'starttime': 2019-01-28 21:33:31.329000, 'stoptime': 2019-01-28 22:01:47.890000, 'startstationid': '3574', 'endstationid': '111111', 'bikeid': '32077', 'eventid': 891180}


event count 1119068 1886.509465166


157.927018

# Both full dataset, new latency and without weird if

In [4]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated-original.csv"
output_file = "output_citibike_both_FULL_DATASET-fixed?.txt"

load_shedding_params = TreeStorageParameters(
    sort_storage=True,
    enable_load_shedding=True,
    load_shedding_threshold=30,
    load_shedding_drop_rate=0.3,
    load_shedding_strategy="oldest",
    clean_up_interval=10,
    latency_bound=0.08
)
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=12,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params, storage_params=load_shedding_params )

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)

monitor.run(
    cep.run,
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

Creating evaluation manager...
 - Parallel execution: <parallel.ParallelExecutionParameters.DataParallelExecutionParametersHirzelAlgorithm object at 0x1169b2bd0>
 - Storage: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=30, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.08)
 - Using ParallelExecutionModes.DATA_PARALLELISM execution mode
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=30, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.08)
LeafNode.create_storage_unit called with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, en

Exception in thread Thread-6 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation/

we are updating event count 248000
computed currlat 1.306634
Found match: {'tripduration': '104', 'starttime': 2019-01-08 12:53:33.200000, 'stoptime': 2019-01-08 12:55:17.645000, 'startstationid': '326', 'endstationid': '236', 'bikeid': '34654', 'eventid': 246900}
{'tripduration': '356', 'starttime': 2019-01-08 12:55:20.357000, 'stoptime': 2019-01-08 13:01:17.326000, 'startstationid': '236', 'endstationid': '111111', 'bikeid': '34654', 'eventid': 246965}


event count 248189 2310.707337541
computed currlat 1.306634
Found match: {'tripduration': '73', 'starttime': 2019-01-08 13:11:45.406000, 'stoptime': 2019-01-08 13:12:58.484000, 'startstationid': '259', 'endstationid': '427', 'bikeid': '33615', 'eventid': 247450}
{'tripduration': '1290', 'starttime': 2019-01-08 13:13:02.735000, 'stoptime': 2019-01-08 13:34:33.205000, 'startstationid': '427', 'endstationid': '111111', 'bikeid': '33615', 'eventid': 247476}


event count 248745 2310.798910041
we are updating event count 249000
we are upd

Exception in thread Thread-11 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

we are updating event count 331000
we are updating event count 332000
we are updating event count 333000
computed currlat 1.306634
Found match: {'tripduration': '109', 'starttime': 2019-01-10 16:20:46.677000, 'stoptime': 2019-01-10 16:22:36.621000, 'startstationid': '128', 'endstationid': '3467', 'bikeid': '16381', 'eventid': 339700}
{'tripduration': '1176', 'starttime': 2019-01-10 16:22:37.199000, 'stoptime': 2019-01-10 16:42:13.589000, 'startstationid': '3467', 'endstationid': '111111', 'bikeid': '16381', 'eventid': 339758}


event count 333066 2323.627353958
computed currlat 1.306634
Found match: {'tripduration': '136', 'starttime': 2019-01-10 15:53:22.812000, 'stoptime': 2019-01-10 15:55:39.106000, 'startstationid': '311', 'endstationid': '307', 'bikeid': '27846', 'eventid': 338750}
{'tripduration': '472', 'starttime': 2019-01-10 15:55:40.358000, 'stoptime': 2019-01-10 16:03:32.716000, 'startstationid': '307', 'endstationid': '111111', 'bikeid': '27846', 'eventid': 338832}


event 

Exception in thread Thread-4 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation/

we are updating event count 385000
we are updating event count 386000
we are updating event count 387000
computed currlat 1.306634
Found match: {'tripduration': '234', 'starttime': 2019-01-12 21:44:31.732000, 'stoptime': 2019-01-12 21:48:26.317000, 'startstationid': '3151', 'endstationid': '3154', 'bikeid': '28761', 'eventid': 411850}
{'tripduration': '323', 'starttime': 2019-01-12 21:48:39.349000, 'stoptime': 2019-01-12 21:54:02.743000, 'startstationid': '3154', 'endstationid': '111111', 'bikeid': '28761', 'eventid': 411893}


event count 387897 2332.219908125
we are updating event count 388000
we are updating event count 389000
computed currlat 1.306634
Found match: {'tripduration': '387', 'starttime': 2019-01-13 01:56:56.121000, 'stoptime': 2019-01-13 02:03:23.925000, 'startstationid': '3105', 'endstationid': '445', 'bikeid': '17769', 'eventid': 413710}
{'tripduration': '1328', 'starttime': 2019-01-13 02:03:33.996000, 'stoptime': 2019-01-13 02:25:42.059000, 'startstationid': '445', 

Exception in thread Thread-7 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation/

computed currlat 1.306634
Found match: {'tripduration': '121', 'starttime': 2019-01-13 08:48:03.457000, 'stoptime': 2019-01-13 08:50:05.138000, 'startstationid': '3307', 'endstationid': '3293', 'bikeid': '31641', 'eventid': 414950}
{'tripduration': '259', 'starttime': 2019-01-13 08:50:08.613000, 'stoptime': 2019-01-13 08:54:28.077000, 'startstationid': '3293', 'endstationid': '111111', 'bikeid': '31641', 'eventid': 414983}


event count 390516 2332.617946
computed currlat 1.306634
Found match: {'tripduration': '349', 'starttime': 2019-01-12 20:52:59.285000, 'stoptime': 2019-01-12 20:58:48.365000, 'startstationid': '3256', 'endstationid': '426', 'bikeid': '30073', 'eventid': 411200}
{'tripduration': '592', 'starttime': 2019-01-12 20:59:15.485000, 'stoptime': 2019-01-12 21:09:07.946000, 'startstationid': '426', 'endstationid': '111111', 'bikeid': '30073', 'eventid': 411272}


event count 390802 2332.667593416
we are updating event count 391000
computed currlat 1.306634
Found match: {'tri

Exception in thread Thread-8 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation/

computed currlat 1.306634
Found match: {'tripduration': '102', 'starttime': 2019-01-15 11:52:25.421000, 'stoptime': 2019-01-15 11:54:07.452000, 'startstationid': '421', 'endstationid': '366', 'bikeid': '34777', 'eventid': 485700}
{'tripduration': '907', 'starttime': 2019-01-15 11:54:08.950000, 'stoptime': 2019-01-15 12:09:15.985000, 'startstationid': '366', 'endstationid': '111111', 'bikeid': '34777', 'eventid': 485748}


event count 440693 2340.340456541
we are updating event count 441000
computed currlat 1.306634
Found match: {'tripduration': '199', 'starttime': 2019-01-15 12:23:27.973000, 'stoptime': 2019-01-15 12:26:47.035000, 'startstationid': '3016', 'endstationid': '293', 'bikeid': '30841', 'eventid': 486486}
{'tripduration': '267', 'starttime': 2019-01-15 12:26:47.723000, 'stoptime': 2019-01-15 12:31:14.741000, 'startstationid': '293', 'endstationid': '111111', 'bikeid': '30841', 'eventid': 486557}


event count 441264 2340.43226425
we are updating event count 442000
computed c

Exception in thread Thread-12 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation

we are updating event count 635000
we are updating event count 636000
we are updating event count 637000
computed currlat 1.306634
Found match: {'tripduration': '500', 'starttime': 2019-01-27 06:37:30.921000, 'stoptime': 2019-01-27 06:45:51.005000, 'startstationid': '350', 'endstationid': '3701', 'bikeid': '33149', 'eventid': 828100}
{'tripduration': '299', 'starttime': 2019-01-27 06:45:55.295000, 'stoptime': 2019-01-27 06:50:54.854000, 'startstationid': '3701', 'endstationid': '111111', 'bikeid': '33149', 'eventid': 828121}


event count 637734 2372.123539333
we are updating event count 638000
computed currlat 1.306634
Found match: {'tripduration': '314', 'starttime': 2019-01-26 22:14:23.433000, 'stoptime': 2019-01-26 22:19:37.927000, 'startstationid': '3362', 'endstationid': '3086', 'bikeid': '25297', 'eventid': 826009}
{'tripduration': '80', 'starttime': 2019-01-26 22:19:49.604000, 'stoptime': 2019-01-26 22:21:09.940000, 'startstationid': '3086', 'endstationid': '111111', 'bikeid': 

Exception in thread Thread-9 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP recover/OpenCEP/tree/evaluation/

we are updating event count 668000
computed currlat 1.306634
Found match: {'tripduration': '169', 'starttime': 2019-01-28 12:44:57.703000, 'stoptime': 2019-01-28 12:47:47.344000, 'startstationid': '3454', 'endstationid': '3081', 'bikeid': '35387', 'eventid': 871600}
{'tripduration': '702', 'starttime': 2019-01-28 12:47:48.190000, 'stoptime': 2019-01-28 12:59:30.981000, 'startstationid': '3081', 'endstationid': '111111', 'bikeid': '35387', 'eventid': 871661}


event count 668881 2377.198196583
we are updating event count 669000
computed currlat 1.306634
Found match: {'tripduration': '70', 'starttime': 2019-01-28 21:32:19.283000, 'stoptime': 2019-01-28 21:33:29.990000, 'startstationid': '3576', 'endstationid': '3574', 'bikeid': '32077', 'eventid': 891150}
{'tripduration': '1696', 'starttime': 2019-01-28 21:33:31.329000, 'stoptime': 2019-01-28 22:01:47.890000, 'startstationid': '3574', 'endstationid': '111111', 'bikeid': '32077', 'eventid': 891180}


event count 669754 2377.343666208
we a

144.107126

In [None]:
# Parallelization only - big dataset

In [None]:
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=12,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params)

In [None]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated.csv"
output_file = "output_citibike_only_parallel.txt"

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)


In [None]:
# cep.run(
#     events,
#     FileOutputStream("../test/demo/Matches/testing", output_file),
#     CitiBikeDataFormatter(),
# )
#ran for 513 min

# Parallelization only - small dataset

In [None]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated-small.csv"
output_file = "output_citibike_only_parallel-small.txt"

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)

In [None]:
cep.run(
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

# Parallelization & Load shedding

In [None]:
load_shedding_params = TreeStorageParameters(
    sort_storage=True,
    enable_load_shedding=True,
    load_shedding_threshold=15,
    load_shedding_drop_rate=0.3,
    load_shedding_strategy="oldest",
    clean_up_interval=10,
)
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=8,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params, storage_params=load_shedding_params )


In [None]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated.csv"
output_file = "output_citibike_both.txt"

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)

In [None]:
cep.run(
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)# ran for an hour

# Load shedding 2

In [None]:
load_shedding_params = TreeStorageParameters(
    sort_storage=True,
    enable_load_shedding=True,
    load_shedding_threshold=15,
    load_shedding_drop_rate=0.3,
    load_shedding_strategy="oldest",
    clean_up_interval=10,
)
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=8,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params, storage_params=load_shedding_params )


In [None]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated.csv"
output_file = "output_citibike_both-2.txt"

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)

In [None]:
cep.run(
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

# Both after merge

In [None]:
load_shedding_params = TreeStorageParameters(
    sort_storage=True,
    enable_load_shedding=True,
    load_shedding_threshold=15,
    load_shedding_drop_rate=0.3,
    load_shedding_strategy="oldest",
    clean_up_interval=10,
    latency_bound=0.01
)
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=8,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params, storage_params=load_shedding_params )


In [None]:
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated-small.csv"
output_file = "output_citibike_both-merge.txt"

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)

In [None]:
monitor.run(
    cep.run,
    events,
    FileOutputStream("../test/demo/Matches", output_file),
    CitiBikeDataFormatter(),
)

# Both big dataset

In [4]:
load_shedding_params = TreeStorageParameters(
    sort_storage=True,
    enable_load_shedding=True,
    load_shedding_threshold=50,
    load_shedding_drop_rate=0.3,
    load_shedding_strategy="oldest",
    clean_up_interval=10,
    latency_bound=0.1
)
dp_params = DataParallelExecutionParametersHirzelAlgorithm(
    platform=ParallelExecutionPlatforms.THREADING,
    units_number=12,  # how many threads
    key="bikeid",
)
cep = CEP([citibikeHotPathsPattern], parallel_execution_params=dp_params, storage_params=load_shedding_params )
input_file = "../test/EventFiles/201901-citibike-tripdata-1-fabricated.csv"
output_file = "output_citibike_both-merge-big-0.1latency.txt"

events = CitiBikeDataFrameInputStream(
    input_file,
    timestamp_column="starttime",
)


Creating evaluation manager...
 - Parallel execution: <parallel.ParallelExecutionParameters.DataParallelExecutionParametersHirzelAlgorithm object at 0x119ac7b90>
 - Storage: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=50, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.1)
 - Using ParallelExecutionModes.DATA_PARALLELISM execution mode
!!! Using default evaluation mechanism parameters...
!!! Creating tree-based evaluation mechanism...
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=50, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest,latency_bound=0.1)
InternalNode creating storage: sort=True, sorting_key=None
UnsortedPatternMatchStorage 

In [None]:
monitor.run(
    cep.run,
    events,
    FileOutputStream("../test/demo/Matches/testing", output_file),
    CitiBikeDataFormatter(),
)

Starting CEP evaluation...
Using optimized DataFrame input stream processing
we are updating event count 0
UnsortedPatternMatchStorage.add() called! Total matches: 0
computed currlat None
Found match: {'tripduration': '1408', 'starttime': 2019-01-01 00:31:26.061000, 'stoptime': 2019-01-01 00:54:54.412000, 'startstationid': '327', 'endstationid': '2021', 'bikeid': '15839', 'eventid': 48}
{'tripduration': '138', 'starttime': 2019-01-01 00:54:59.190000, 'stoptime': 2019-01-01 00:57:17.802000, 'startstationid': '2021', 'endstationid': '111111', 'bikeid': '15839', 'eventid': 107}


UnsortedPatternMatchStorage.add() called! Total matches: 0
computed currlat 0.875365
comp latency not none
inside latency computation
Found match: {'tripduration': '700', 'starttime': 2019-01-01 00:37:51.478000, 'stoptime': 2019-01-01 00:49:31.666000, 'startstationid': '490', 'endstationid': '340', 'bikeid': '30705', 'eventid': 60}
{'tripduration': '2531', 'starttime': 2019-01-01 00:49:33.962000, 'stoptime': 2019

Exception in thread Thread-6 (_run):
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP/OpenCEP/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP/OpenCEP/parallel/data_parallel/DataParallelExecutionAlgorithm.py", line 119, in _run
    evaluation_manager.eval(events, matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP/OpenCEP/parallel/manager/SequentialEvaluationManager.py", line 32, in eval
    self.__eval_mechanism.eval(event_stream, pattern_matches, data_formatter)
  File "/Users/cricoche/Desktop/aalto_master/SSDM/OpenCEP/OpenCEP/tree/evaluation/TreeBasedEvaluationMechanism.py"

we are updating event count 248000
UnsortedPatternMatchStorage.add() called! Total matches: 1
computed currlat 0.4508430999999912
comp latency not none
inside latency computation
Found match: {'tripduration': '104', 'starttime': 2019-01-08 12:53:33.200000, 'stoptime': 2019-01-08 12:55:17.645000, 'startstationid': '326', 'endstationid': '236', 'bikeid': '34654', 'eventid': 246900}
{'tripduration': '356', 'starttime': 2019-01-08 12:55:20.357000, 'stoptime': 2019-01-08 13:01:17.326000, 'startstationid': '236', 'endstationid': '111111', 'bikeid': '34654', 'eventid': 246965}


UnsortedPatternMatchStorage.add() called! Total matches: 3
computed currlat 0.4122501999999824
comp latency not none
inside latency computation
Found match: {'tripduration': '73', 'starttime': 2019-01-08 13:11:45.406000, 'stoptime': 2019-01-08 13:12:58.484000, 'startstationid': '259', 'endstationid': '427', 'bikeid': '33615', 'eventid': 247450}
{'tripduration': '1290', 'starttime': 2019-01-08 13:13:02.735000, 'stoptim