-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from ContinuumIO/new_intake
updates for intake 0.2
- Loading branch information
Showing
6 changed files
with
32 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ requirements: | |
- jinja2 | ||
run: | ||
- attrs | ||
- intake | ||
- intake>=0.2 | ||
- python | ||
|
||
test: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,6 @@ | ||
from intake.source import base | ||
|
||
from ._version import get_versions | ||
|
||
__version__ = get_versions()['version'] | ||
del get_versions | ||
|
||
|
||
class Plugin(base.Plugin): | ||
"""Cisco Netflow packets to sequence of Python dicts reader""" | ||
|
||
def __init__(self): | ||
super(Plugin, self).__init__(name='netflow', version='0.1', container='python', partition_access=False) | ||
|
||
def open(self, urlpath, **kwargs): | ||
""" | ||
Parameters: | ||
urlpath : str | ||
Location of the data files; can include protocol and glob characters. | ||
""" | ||
from .source import NetflowSource | ||
base_kwargs, source_kwargs = self.separate_base_kwargs(kwargs) | ||
return NetflowSource(urlpath=urlpath, metadata=base_kwargs['metadata']) | ||
from .source import NetflowSource |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,53 @@ | ||
from dask.bytes import open_files | ||
|
||
from intake.source import base | ||
|
||
from .v9 import RecordStream | ||
from . import __version__ | ||
|
||
|
||
class NetflowSource(base.DataSource): | ||
name = 'netflow' | ||
version = __version__ | ||
container = 'python' | ||
partition_access = True | ||
|
||
def __init__(self, urlpath, metadata=None): | ||
"""Source to load Cisco Netflow packets as sequence of Python dicts. | ||
Parameters: | ||
urlpath : str | ||
Location of the data files; can include protocol and glob characters. | ||
Location of the data files; can include protocol and glob | ||
characters. | ||
""" | ||
self._urlpath = urlpath | ||
self._streams = open_files(urlpath, mode='rb') | ||
|
||
super(NetflowSource, self).__init__(container='python', metadata=metadata) | ||
super(NetflowSource, self).__init__(metadata=metadata) | ||
|
||
def _get_schema(self): | ||
self._streams = open_files(self._urlpath, mode='rb') | ||
self.npartitions = len(self._streams) | ||
return base.Schema(datashape=None, | ||
dtype=None, | ||
shape=None, | ||
npartitions=len(self._streams), | ||
extra_metadata={}) | ||
|
||
def _get_partition(self, i): | ||
with self._streams[i] as f: | ||
return list(RecordStream(f)) | ||
return read_stream(self._streams[i]) | ||
|
||
def _close(self): | ||
for stream in self._streams: | ||
stream.close() | ||
def read(self): | ||
return self.to_dask().compute() | ||
|
||
def to_dask(self): | ||
import dask.delayed | ||
import dask.bag as db | ||
dpart = dask.delayed(read_stream) | ||
parts = [dpart(stream) for stream in self._streams] | ||
return db.from_delayed(parts) | ||
|
||
def _close(self): | ||
self._streams = None | ||
|
||
|
||
def read_stream(stream): | ||
from .v9 import RecordStream | ||
with stream as f: | ||
return list(RecordStream(f)) |