In [1]:
# Ensure project root (OpenCEP) is on sys.path for imports like `from base.Pattern import Pattern`
import sys, os, pathlib

nb_dir = (
    pathlib.Path(__file__).parent if "__file__" in globals() else pathlib.Path.cwd()
)
project_root = str((nb_dir / "..").resolve())
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
import time
from datetime import timedelta
from CEP import CEP
from base.Pattern import Pattern
from base.PatternStructure import (
    SeqOperator,
    PrimitiveEventStructure,
    KleeneClosureOperator,
)
from condition.CompositeCondition import AndCondition
from condition.BaseRelationCondition import SmallerThanCondition
from condition.Condition import Variable, SimpleCondition
from condition.KCCondition import KCIndexCondition
from stream.FileStream import FileInputStream, FileOutputStream
from plugin.citibike.CitiBike import (
    CitiBikeTripEventTypeClassifier,
    CitiBikeDataFormatter,
)
from tree.PatternMatchStorage import TreeStorageParameters

In [3]:
# Define the CitiBike hot paths pattern
citibikeHotPathsPattern = Pattern(
    SeqOperator(
        KleeneClosureOperator(PrimitiveEventStructure("CitiBikeTrip", "a")),
        PrimitiveEventStructure("CitiBikeTrip", "b"),
    ),
    AndCondition(
        KCIndexCondition(
            names={"a"},
            getattr_func=lambda x: x["bike_id"],
            relation_op=lambda a1, a2: a1 == a2,
            offset=-1,
        ),
        KCIndexCondition(
            names={"a"},
            getattr_func=lambda x: (
                int(float(x["startstationid"])),
                int(float(x["endstationid"])),
            ),
            relation_op=lambda a1, a2: a1[0] == a2[1],
            offset=-1,
        ),
        SimpleCondition(
            Variable("a", lambda x: x[-1]["bike_id"]),
            Variable("b", lambda x: x["bike_id"]),
            relation_op=lambda a, b: a == b,
        ),
        SimpleCondition(
            Variable("a", lambda x: int(float(x[-1]["endstationid"]))),
            Variable("b", lambda x: int(float(x["startstationid"]))),
            relation_op=lambda a, b: a == b,
        ),
        SimpleCondition(
            Variable("b", lambda x: int(float(x["endstationid"]))),
            relation_op=lambda end_id: str(end_id) in {"484", "3630"},
        ),
    ),
    timedelta(minutes=60),
)

In [None]:
load_shedding_params = TreeStorageParameters(
    sort_storage=True,                   
    enable_load_shedding=True,            
    load_shedding_threshold=15,         
    load_shedding_drop_rate=0.3,         
    load_shedding_strategy='oldest',     
    clean_up_interval=10                 
)

cep_with_load_shedding = CEP(
    patterns=[citibikeHotPathsPattern],   # Your pattern(s)  
    storage_params=load_shedding_params   # Storage configuration with load shedding
)

events = FileInputStream("../test/EventFiles/citibike-withbikeid-med.txt")

start_time = time.time()

cep_with_load_shedding.run(
    events,
    FileOutputStream("../test/demo/Matches", "output_citibike_load_shedding.txt"),
    CitiBikeDataFormatter(),
)

execution_time = time.time() - start_time

Creating evaluation manager...
 - Parallel execution: None
 - Storage: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=15, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest)
 - Using ParallelExecutionModes.SEQUENTIAL execution mode
!!! Using default evaluation mechanism parameters...
!!! Creating tree-based evaluation mechanism...
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=True, load_shedding_threshold=15, load_shedding_drop_rate=0.3, load_shedding_strategy=oldest)
InternalNode creating storage: sort=True, sorting_key=None
UnsortedPatternMatchStorage created with storage_params: TreeStorageParameters(sort_storage=True, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_times