# cuDF Cheat Sheets sample code

(c) 2020 NVIDIA, Blazing SQL

Distributed under Apache License 2.0

# Imports

In [1]:
import streamz
import cudf
import json

from streamz.dataframe import DataFrame

# cuStreamz Stream

#### streamz.Stream()

In [2]:
source1 = streamz.Stream(
    stream_name = 'stream1'
    , upstream=None
    , upstreams=None
)

#### streamz.Stream.connect()

In [3]:
source2 = streamz.Stream(stream_name='stream2')

source1.connect(source2)

In [4]:
source2.upstreams[0].name

'stream1'

#### streamz.Stream.disconnect()

In [5]:
source1.disconnect(source2)

In [6]:
source2.upstreams

[[None]]

#### streamz.Stream.destroy()

In [9]:
source3 = streamz.Stream(stream_name='stream3')
source1.connect(source3)
source3.upstreams

[<stream1; Stream>]

In [10]:
source3.destroy()

In [11]:
source3.upstreams

[]

#### streamz.Stream.emit()

In [5]:
def print_message(msg):
    return msg.split(' ')
    
source_df = source1.map(print_message).sink(print)

In [7]:
source1.emit('RAPIDS is super fast!!!')
source1.emit('Blazing Notebooks is the easiest way to start GPU Data Science!!!')

['RAPIDS', 'is', 'super', 'fast!!!']
['Blazing', 'Notebooks', 'is', 'the', 'easiest', 'way', 'to', 'start', 'GPU', 'Data', 'Science!!!']


#### streamz.Stream.frequencies()

In [10]:
source1.frequencies().sink(print)

Output()

In [12]:
source1.emit('RAPIDS is super fast!!!')
source1.emit('RAPIDS is super fast!!!')
source1.emit('RAPIDS is super fast!!!')
source1.emit('RAPIDS is super fast!!!')
source1.emit('RAPIDS is super fast!!!')
source1.emit('RAPIDS is super fast!!!')
source1.emit('Blazing Notebooks is the easiest way to start GPU Data Science!!!')

{'RAPIDS is super fast!!!': 7,
 'Blazing Notebooks is the easiest way to start GPU Data Science!!!': 2}

{'RAPIDS is super fast!!!': 7, 'Blazing Notebooks is the easiest way to start GPU Data Science!!!': 2}


In [17]:
import toolz

#### streamz.Stream.register_api()

In [119]:
@streamz.Stream.register_api()
class Blazing(streamz.Stream):
    def __init__(
        self
        , stream_name=None
        , upstream=None
        , upstreams=None
    ):
        super(Blazing, self).__init__(stream_name, upstream, upstreams)
        
    def word_frequencies(self, **kwargs):        
        def update_frequencies(last, x):
            temp = last
            
            for msg in x.split(' '):
                temp = toolz.assoc(temp, msg, temp.get(msg, 0) + 1)
            return temp

        return self.scan(update_frequencies, start={}, **kwargs)
        
source3 = streamz.Stream().Blazing()
source3.word_frequencies().sink(print)

Output()

In [116]:
source3.emit('RAPIDS is super fast!!!')
source3.emit('RAPIDS is super fast!!!')
source3.emit('RAPIDS is super fast!!!')
source3.emit('RAPIDS is super fast!!!')
source3.emit('RAPIDS is super fast!!!')
source3.emit('RAPIDS is super fast!!!')
source3.emit('Blazing Notebooks is the easiest way to start GPU Data Science!!!')

{'RAPIDS': 109, 'is': 127, 'super': 109, 'fast!!!': 109, 'Blazing': 18, 'Notebooks': 18, 'the': 18, 'easiest': 18, 'way': 18, 'to': 18, 'start': 18, 'GPU': 18, 'Data': 18, 'Science!!!': 18}
{'RAPIDS': 110, 'is': 128, 'super': 110, 'fast!!!': 110, 'Blazing': 18, 'Notebooks': 18, 'the': 18, 'easiest': 18, 'way': 18, 'to': 18, 'start': 18, 'GPU': 18, 'Data': 18, 'Science!!!': 18}
{'RAPIDS': 111, 'is': 129, 'super': 111, 'fast!!!': 111, 'Blazing': 18, 'Notebooks': 18, 'the': 18, 'easiest': 18, 'way': 18, 'to': 18, 'start': 18, 'GPU': 18, 'Data': 18, 'Science!!!': 18}
{'RAPIDS': 112, 'is': 130, 'super': 112, 'fast!!!': 112, 'Blazing': 18, 'Notebooks': 18, 'the': 18, 'easiest': 18, 'way': 18, 'to': 18, 'start': 18, 'GPU': 18, 'Data': 18, 'Science!!!': 18}
{'RAPIDS': 113, 'is': 131, 'super': 113, 'fast!!!': 113, 'Blazing': 18, 'Notebooks': 18, 'the': 18, 'easiest': 18, 'way': 18, 'to': 18, 'start': 18, 'GPU': 18, 'Data': 18, 'Science!!!': 18}
{'RAPIDS': 114, 'is': 132, 'super': 114, 'fast!!!'

#### streamz.Stream.sink_to_list()

In [140]:
source4 = streamz.Source()

def word_append(word):
    return f'RAPIDS is {word}'
L = source4.map(word_append).sink_to_list()

In [143]:
source4.emit('super fast!!!')
source4.emit('beyond fast!!!')

In [144]:
L

['RAPIDS is super fast!!!',
 'RAPIDS is super fast!!!',
 'RAPIDS is beyond fast!!!']

#### streamz.Stream.update()

In [146]:
source4.update('super fast!!!')

In [148]:
L

['RAPIDS is super fast!!!',
 'RAPIDS is super fast!!!',
 'RAPIDS is beyond fast!!!',
 'RAPIDS is super fast!!!',
 'RAPIDS is super fast!!!']