# Work around for working with many update files without caching (~ more than few hours)

Without using the cache, the next cell will crash because it makes too many requests with pybgpkit.  
This is because `BGPKITStream.__iter__` instantiate many `bgpkit.Parser` objects and BGPKIT is not lazy at instantiation.

In [1]:
import datetime
from pybgpkitstream import BGPStreamConfig, BGPKITStream

config = BGPStreamConfig(
    start_time=datetime.datetime(2010, 9, 1, 0, 0),
    end_time=datetime.datetime(2010, 9, 1, 23, 55, 0),
    collectors=["route-views.sydney", "route-views.wide"],
    data_types=["updates"],
)

stream = BGPKITStream.from_config(config)
for elem in stream:
    pass


thread '<unnamed>' panicked at src/lib.rs:129:57:
called `Result::unwrap()` on an `Err` value: ParserErrorWithBytes { error: OneIoError(RemoteIoError(reqwest::Error { kind: Request, url: "http://archive.routeviews.org/route-views.sydney/bgpdata/2010.09/UPDATES/updates.20100901.1015.bz2", source: TimedOut })), bytes: None }
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace


PanicException: called `Result::unwrap()` on an `Err` value: ParserErrorWithBytes { error: OneIoError(RemoteIoError(reqwest::Error { kind: Request, url: "http://archive.routeviews.org/route-views.sydney/bgpdata/2010.09/UPDATES/updates.20100901.1015.bz2", source: TimedOut })), bytes: None }

A workaround is to use the cache or to break down your stream to smaller ones and simply chain them

In [3]:
from itertools import chain, pairwise


def sample_every(
    start: datetime.datetime, end: datetime.datetime, interval: datetime.timedelta
) -> list[datetime.datetime]:
    current = start
    samples = []
    while current < end:
        samples.append(current)
        current += interval
    samples.append(end)
    return samples


def decompose_bgpstreamconfig(
    config: BGPStreamConfig, interval=datetime.timedelta(hours=2)
) -> list[BGPStreamConfig]:
    intermediate_steps = sample_every(config.start_time, config.end_time, interval)
    configs = []
    for start, end in pairwise(intermediate_steps):
        subconfig = BGPStreamConfig(
            start_time=start,
            end_time=end
            - datetime.timedelta(
                microseconds=0.1
            ),  # prevent overlapping (bgpstream includes borders)
            collectors=config.collectors,
            data_types=config.data_types,
            cache_dir=config.cache_dir,
            filters=config.filters,
        )
        configs.append(subconfig)
    return configs


config = BGPStreamConfig(
    start_time=datetime.datetime(2010, 9, 1, 0, 0),
    end_time=datetime.datetime(2010, 9, 1, 23, 55, 0),
    collectors=["route-views.sydney", "route-views.wide"],
    data_types=["updates"],
)

subconfigs = decompose_bgpstreamconfig(config, interval=datetime.timedelta(hours=2))
stream = chain.from_iterable(
    BGPKITStream.from_config(subconfig) for subconfig in subconfigs
)

n_elems = 0
for elem in stream:
    n_elems += 1
print(f"Parsed {n_elems} BGP elements")

Parsed 1271312 BGP elements
