# cuDF Cheat Sheets sample code

(c) 2020 NVIDIA, Blazing SQL

Distributed under Apache License 2.0

# Imports

In [None]:
import streamz
import cudf
import json

from streamz.dataframe import DataFrame

# cuStreamz Functions

#### streamz.accumulate()

In [25]:
source = streamz.Stream()
source.accumulate(lambda acc, x: acc + x).sink(print)

Output()

In [29]:
source.emit(1)
source.emit(1)

7
8


#### streamz.collect()

In [46]:
source1 = streamz.Stream()
source2 = streamz.Stream()
collector = streamz.collect(source1)
collector.sink(print)
source2.map(lambda msg: print(msg)).sink(collector.flush)

Output()

In [50]:
source1.emit('RAPIDS rocks!!!')
source1.emit('It is easy to use!!!')
source2.emit('Tell me something good.')  # flushes collector

Tell me something good.
('RAPIDS rocks!!!', 'It is easy to use!!!')


In [51]:
source1.emit('If you like RAPIDS you should also')
source1.emit('try BlazingSQL and Dask')
source2.emit('Anything else?')

Anything else?
('If you like RAPIDS you should also', 'try BlazingSQL and Dask')


#### streamz.combine_latest()

In [62]:
source1 = streamz.Stream()
source2 = streamz.Stream()
source3 = streamz.combine_latest(source1, source2)
source3.sink(print)

Output()

In [65]:
source1.emit('RAPIDS ')
source2.emit('rocks!!')
source2.emit('is the game!!')
source1.emit('BlazingSQL ')

('RAPIDS ', 3)
('RAPIDS ', 'rocks!!')
('RAPIDS ', 'is the game!!')
('BlazingSQL ', 'is the game!!')


#### streamz.delay()

In [69]:
source = streamz.Stream().delay(2)
source.sink(print)

Output()

In [70]:
source.emit(1)

1


#### streamz.filter()

In [72]:
source = streamz.Stream()
source.filter(lambda msg: 'RAPIDS' in msg).sink(print)

Output()

In [73]:
source.emit('RAPIDS rocks!')
source.emit('Time to stop.')

RAPIDS rocks!


#### streamz.flatten()

In [74]:
source = streamz.Stream()
source.flatten().sink(print)

Output()

In [75]:
source.emit([1,2,3])
source.emit([11,2,123,4])

1
2
3
11
2
123
4


#### streamz.map()

In [76]:
source = streamz.Stream()
source.map(lambda msg: msg * 2).sink(print)

Output()

In [77]:
for i in range(4):
    source.emit(i)

0
2
4
6


#### streamz.partition()

In [92]:
from time import sleep

source = streamz.Stream()
source.partition(2).sink(print)

Output()

In [93]:
for i in range(6):
    source.emit(i)

(0, 1)
(2, 3)
(4, 5)


#### streamz.rate_limit()

#### streamz.sink()

In [98]:
source = streamz.Stream()
L = []
source.sink(L.append)

Output()

In [99]:
source.emit(1)
source.emit(2)

In [100]:
L

[1, 2]

#### streamz.sliding_window()

#### streamz.timed_window()

#### streamz.union()

In [101]:
source1 = streamz.Stream()
source2 = streamz.Stream()
source3 = streamz.union(source1, source2)
source3.sink(print)

Output()

In [102]:
source1.emit('RAPIDS')
source2.emit(' rocks!!')

RAPIDS
 rocks!!


#### streamz.unique()

In [103]:
source = streamz.Stream()
source.unique().sink(print)

Output()

In [104]:
source.emit(1)
source.emit(2)
source.emit(1)

1
2


#### streamz.pluck()

In [106]:
source = streamz.Stream()
source.pluck([0, 3]).sink(print)
for x in [[1, 2, 3, 4], [4, 5, 6, 7], [8, 9, 10, 11]]:
    source.emit(x)

(1, 4)
(4, 7)
(8, 11)


In [108]:
source = streamz.Stream()
source.pluck('name').sink(print)
for x in [
    {'name': 'RAPIDS', 'x': 666}
    , {'name': 'Dask', 'x': 456}
    , {'name': 'BlazingSQL', 'x': 333}
]:
    source.emit(x)

RAPIDS
Dask
BlazingSQL


#### streamz.zip()

In [109]:
source1 = streamz.Stream()
source2 = streamz.Stream()
source3 = streamz.zip(source1, source2)
source3.sink(print)

Output()

In [110]:
source1.emit('RAPIDS')
source2.emit(' rocks!!')

('RAPIDS', ' rocks!!')


#### streamz.zip_latest()

In [112]:
source1 = streamz.Stream()
source2 = streamz.Stream()
source3 = streamz.zip_latest(source1, source2)
source3.sink(print)

Output()

In [113]:
source1.emit('RAPIDS')
source2.emit(' rocks!!')

('RAPIDS', ' rocks!!')


#### streamz.filenames()

In [117]:
source = streamz.Stream.filenames('kafka')

In [119]:
source.start()