In [8]:
# Ensure project root (OpenCEP) is on sys.path for imports like `from base.Pattern import Pattern`
import sys, os, pathlib

nb_dir = (
    pathlib.Path(__file__).parent if "__file__" in globals() else pathlib.Path.cwd()
)
project_root = str((nb_dir / "..").resolve())
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [9]:
from datetime import timedelta
from CEP import CEP
from base.Pattern import Pattern
from base.PatternStructure import (
    SeqOperator,
    PrimitiveEventStructure,
    KleeneClosureOperator,
)
from condition.CompositeCondition import AndCondition
from condition.Condition import Variable, SimpleCondition
from condition.KCCondition import KCIndexCondition
from stream.FileStream import FileOutputStream
from stream.DataFrameStream import CitiBikeDataFrameInputStream
from plugin.citibike.CitiBike2 import (
    CitiBikeTripEventTypeClassifier,
    CitiBikeDataFormatter,
)
import test

In [10]:
"""citibikeConsecutiveRidesPattern = Pattern(
    SeqOperator(
        PrimitiveEventStructure("CitiBikeTrip", "a"),
        PrimitiveEventStructure("CitiBikeTrip", "b"),
    ),
    AndCondition(
        SimpleCondition(
            Variable("a", lambda x: (x["startstationid"], x["endstationid"])),
            Variable("b", lambda x: (x["startstationid"], x["endstationid"])),
            relation_op=lambda a, b: a[1] == b[0],
        )
    ),
    timedelta(minutes=5),
)"""

'citibikeConsecutiveRidesPattern = Pattern(\n    SeqOperator(\n        PrimitiveEventStructure("CitiBikeTrip", "a"),\n        PrimitiveEventStructure("CitiBikeTrip", "b"),\n    ),\n    AndCondition(\n        SimpleCondition(\n            Variable("a", lambda x: (x["startstationid"], x["endstationid"])),\n            Variable("b", lambda x: (x["startstationid"], x["endstationid"])),\n            relation_op=lambda a, b: a[1] == b[0],\n        )\n    ),\n    timedelta(minutes=5),\n)'

In [11]:
citibikeHotPathsPattern = Pattern(
    SeqOperator(
        KleeneClosureOperator(PrimitiveEventStructure("CitiBikeTrip", "a")),
        PrimitiveEventStructure("CitiBikeTrip", "b"),
    ),
    AndCondition(
        KCIndexCondition(
            names={"a"},
            getattr_func=lambda x: x["bikeid"],
            relation_op=lambda a1, a2: a1 == a2,
            offset=-1,
        ),
        KCIndexCondition(
            names={"a"},
            getattr_func=lambda x: (
                int(float(x["startstationid"])),
                int(float(x["endstationid"])),
            ),
            relation_op=lambda a1, a2: a1[0] == a2[1],
            offset=-1,
        ),
        SimpleCondition(
            Variable("a", lambda x: x[-1]["bikeid"]),
            Variable("b", lambda x: x["bikeid"]),
            relation_op=lambda a, b: a == b,
        ),
        SimpleCondition(
            Variable("a", lambda x: int(float(x[-1]["endstationid"]))),
            Variable("b", lambda x: int(float(x["startstationid"]))),
            relation_op=lambda a, b: a == b,
        ),
        SimpleCondition(
            Variable("b", lambda x: int(float(x["endstationid"]))),
            relation_op=lambda end_id: str(end_id) in {"484.0", 484.0},
        ),
    ),
    timedelta(minutes=1),
)

In [12]:
def create_sample_preprocessor():
    return CitiBikeDataFrameInputStream.create_citibike_preprocessor(
        bike_ids_filter={5206, 5215, 5220}  # Only include these bike IDs
    )


events = CitiBikeDataFrameInputStream(
    "../test/EventFiles/201901-citibike-tripdata-1-small.csv",
    timestamp_column="starttime"
    # preprocessor=create_sample_preprocessor()  
)
print(events.dataframe[0:5])  # Print first 5 rows of the dataframe

   tripduration               starttime                stoptime  \
0           320 2019-01-01 00:01:47.401 2019-01-01 00:07:07.581   
1           320 2019-01-01 00:01:47.401 2019-01-01 00:07:07.581   
2           316 2019-01-01 00:04:43.736 2019-01-01 00:10:00.608   
3           316 2019-01-01 00:04:43.736 2019-01-01 00:10:00.608   
4           591 2019-01-01 00:06:03.997 2019-01-01 00:15:55.438   

   startstationid  endstationid  bikeid  
0          3160.0         123.0   15839  
1           123.0         519.0   15839  
2           519.0         484.0   32723  
3          5119.0         484.0   32723  
4          3171.0         484.0   27451  


In [13]:
cep = CEP([citibikeHotPathsPattern])

Creating evaluation manager...
 - Parallel execution: None
 - Storage: None
 - Using ParallelExecutionModes.SEQUENTIAL execution mode
!!! Using default evaluation mechanism parameters...
!!! Creating tree-based evaluation mechanism...
Tree calling create_storage_unit with storage_params: TreeStorageParameters(sort_storage=False, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=False, load_shedding_threshold=1000, load_shedding_drop_rate=0.1, load_shedding_strategy=random)
InternalNode creating storage: sort=False, sorting_key=None
UnsortedPatternMatchStorage created with storage_params: TreeStorageParameters(sort_storage=False, attributes_priorities={}, clean_up_interval=10, prioritize_sorting_by_timestamp=True, enable_load_shedding=False, load_shedding_threshold=1000, load_shedding_drop_rate=0.1, load_shedding_strategy=random)
InternalNode creating storage: sort=False, sorting_key=None
UnsortedPatternMatchStorage created with s

In [14]:
cep.run(
    events,
    FileOutputStream("../test/demo/Matches", "output_citibike.txt"),
    CitiBikeDataFormatter(),
)

Starting CEP evaluation...
Using optimized DataFrame input stream processing
Processing event in TreeBasedEvaluationMechanism: {'tripduration': 320, 'starttime': Timestamp('2019-01-01 00:01:47.401000'), 'stoptime': Timestamp('2019-01-01 00:07:07.581000'), 'startstationid': 3160.0, 'endstationid': 123.0, 'bikeid': 15839}
SortedPatternMatchStorage.add() called Key: 2019-01-01 00:01:47.401000, length of partial matches: 0
Current events in pattern match: [{'tripduration': 320, 'starttime': 2019-01-01 00:01:47.401000, 'stoptime': 2019-01-01 00:07:07.581000, 'startstationid': 3160.0, 'endstationid': 123.0, 'bikeid': 15839}]
UnsortedPatternMatchStorage.add() called! Total matches: 0
Processing event in TreeBasedEvaluationMechanism: {'tripduration': 320, 'starttime': Timestamp('2019-01-01 00:01:47.401000'), 'stoptime': Timestamp('2019-01-01 00:07:07.581000'), 'startstationid': 123.0, 'endstationid': 519.0, 'bikeid': 15839}
SortedPatternMatchStorage.add() called Key: 2019-01-01 00:01:47.401000

0.020589