# HyperStream Tutorial 2: Reading CSV


In [1]:
import sys
sys.path.append("../") # Add parent dir in the Path

from hyperstream import HyperStream

## Starting a Hyperstream instance

First of all, we will create a HyperStream instance. This instance will connect to the MongoDB server that is specified in the configuration file and it will raise an error if the MongoDB server is not running.

In [2]:
from hyperstream import HyperStream

hs = HyperStream(loglevel=0)
print hs

HyperStream version 0.2.6, connected to mongodb://localhost:27017/hyperstream


In [3]:
from datetime import datetime

from hyperstream import HyperStream
from hyperstream import TimeInterval

from hyperstream.utils import UTC

hs = HyperStream()

reader = hs.plugins.data_importers.tools.csv_reader('plugins/data_importers/data/sea_ice.csv')

ti = TimeInterval(datetime(1990, 1, 1).replace(tzinfo=UTC), datetime(2011, 4, 1).replace(tzinfo=UTC))

sea_ice = hs.channel_manager.memory.get_or_create_stream("sea_ice")
reader.execute(sources=[], sink=sea_ice, interval=ti)
for key, value in sea_ice.window().items():
    print '[%s]: %s' % (key, value)


# comp = hs.tools.list_sum().execute()
sea_ice_sums = hs.channel_manager.mongo.get_or_create_stream('sea_ice_sums')
comp = hs.tools.list_sum().execute(sources=[sea_ice], sink=sea_ice_sums,
                                   interval=ti)

for key, value in sea_ice_sums.window().items():
    print '[%s]: %s' % (key, value)

assert(sea_ice_sums.window().last().value == sum(sea_ice.window().last().value))

expected = [sum(values) for values in sea_ice.window().values()]
assertItemsEqual(sea_ice_sums.window().values(), expected)


AttributeError: 'HyperStream' object has no attribute 'plugins'

## Selecting the tool to read csv

HyperStream counts with a set of predefined tools in hyperstream.tools. This tools can be used to define the nodes of a factor graph that will produce values or compute certain functions given the specified input nodes. For this tutorial, we will focus on the **clock** tool. This tool produces time ticks from the specified start and stride times.

In [None]:
from hyperstream import StreamId

T = hs.channel_manager.tools

clock = StreamId(name="clock")

clock_tool = T[clock].window().last().value(stride=2.0)

## Specifying the memory channel

We need to specify where do we want to store the resulting stream of data that will be generated. It is possible to choose a MongoDB database instead of in memory selecting **hs.channel_manger.mongo**. In this tutorial we use the memory channel by creating an instance of memory and then creating the stream on it.

In [None]:
M = hs.channel_manager.memory

ticker = M.get_or_create_stream(stream_id=StreamId(name="ticker"))

## Querying the tool

Now we only need to create the time interval that we want to query. We do this by specifieng the begining and end.

In [None]:
from pytz import UTC
from datetime import datetime, timedelta

now = datetime.utcnow().replace(tzinfo=UTC)
before = (now - timedelta(seconds=10)).replace(tzinfo=UTC)

from hyperstream import TimeInterval

ti = TimeInterval(before, now)

## Executing the tool

Now that we defined the tool to use, where we want to store the results and the time interval, it is possible to execute the tool.

In [None]:
clock_tool.execute(sources=[], sink=ticker, interval=ti, alignment_stream=None)

## Printing the results

The resulting stream is stored in the ticker. We can get now a list of tuples containing the timestamps and its corresponding clock value.

In [None]:
for timestamp, value in ticker.window().items():
    print '[%s]: %s' % (timestamp, value)