# SmokeAvro 
Example of using Python, Avro and EventStreams to transport blob data. 

For convenience/compactness this is one application with two Streams, practically 
it would be 2 applications each with one Stream each.

## needs
Other than the streamsx components you'll need to sseclient.
```
  pip install sseclient
 ```

In [None]:
%matplotlib inline
from IPython.core.debugger import set_trace
from IPython.display import display, clear_output
import time
import os
import io
import sys
import json
import logging

import ipywidgets as widgets

from sseclient import SSEClient as EventSource
import streamsx
from streamsx.topology.topology import *
import streamsx.rest as rest
from streamsx.topology import context
from streamsx.eventstreams.schema import Schema
import streamsx.eventstreams as eventstreams
import streamsx.avro as avro

## eventstreams credentials...
import credential

if '../scripts' not in sys.path:
    sys.path.insert(0, '../scripts')
import jupyter_streams
#import cvsupport
#import streams_operations
#import streams_render


## has credentials for cloud & kafka/event stream
import logging
print("topology", streamsx.topology.topology.__version__)
print("eventstreams", eventstreams.__version__)
print("avro", avro.__version__)

In [None]:
instance,cfg = jupyter_streams.get_instance(None, service_name='Streaming3Turbine')

In [None]:
def get_events():
    """fetch recent changes from wikievents site using SSE
    
    Notes::
    Institutions keep their copy of wikipedia in order that it is not completly overwelme. 
    In order to keep these instances up to date, wikipedia reflect the changes make to the
    site out on this SSE feed. 
        
    """
    for change in EventSource('https://stream.wikimedia.org/v2/stream/recentchange'):
        if len(change.data):
            try:
                obj = json.loads(change.data)
            except json.JSONDecodeError as err:
                print("JSON l1 error:", err, "Invalid JSON:", change.data)
            except json.decoder.JSONDecodeError as err:
                print("JSON l2 error:", err, "Invalid JSON:", change.data)
            else:
                yield(obj)


## AvroSndRcv application 
Example of using avro and Message hub to transmit data between Streams. 

- Serialize/deserialiation is done with Avro 
- Messages as transmitted in a binary format. 
- The schema is known to the reciever and transmitter.
- Uses the SPL aggregate function to convert the blob schema to rectify this error : "Blob schema is not currently supported for Python.", 

In [None]:

import streamsx.avro as avro
from streamsx.spl.toolkit import add_toolkit
import streamsx.spl.op as op
import streamsx.avro as avro
from streamsx.eventstreams.schema import Schema
from streamsx.eventstreams.schema import Schema as MsgSchema
import streamsx.eventstreams.schema

## Schema being transmitted via Avro.
avro_schema=   '{"type" : "record","name" :  "wiki_schema", "fields": [' \
'{"name":"time_stamp","type": "int"},'  \
'{"name":"id","type": "string"},'       \
'{"name":"msg_type","type": "string"},' \
'{"name":"wiki","type": "string"},'     \
'{"name":"user","type": "string"}, '    \
'{"name":"title","type": "string"}]}'


def avro_sndrcv(jobName=None):
    """
    Compose topology 
    """
    topo = Topology(name=jobName)
    topo.add_pip_package('SSEClient===0.0.22')
    avro_tk = avro.download_toolkit()
    es_tk = eventstreams.download_toolkit()
    add_toolkit(topo, avro_tk)
    add_toolkit(topo, es_tk)


    ## Get wiki data - This could be the start of applicagtion I
    wiki_events = topo.source(get_events, name="wikiEvents")
    ## Filter out bots 
    all_human = wiki_events.filter(lambda x: x['bot'] is False, name='humanFilter')
    ## pare down # of fields 
    pared_human = all_human.map(lambda x : {'time_stamp':x['timestamp'],'id':x['meta']['id'],'msg_type':x['type'],'wiki':x['wiki'],'user':x['user'],'title':x['title']}, name="paredAll")
    
    avro_blob = avro.json_to_avro(pared_human.as_json(), avro_schema)

    # Map to schema of eventstreams BinaryMessage
    #  BinaryMessage : fields 'key', 'message'
    avro_transmit = op.Map('spl.relational::Functor', avro_blob, 
                           schema = streamsx.eventstreams.schema.Schema.BinaryMessage)
    avro_transmit.message = avro_transmit.output(avro_transmit.attribute('binary'))
    avro_transmit.key = avro_transmit.output('"key - necessary but inconsequential?"')

    eventstreams.publish(avro_transmit.stream, topic='TopicForward', 
                         credentials=json.loads(credential.magsEventStream))
    # Receive wiki data - this could be the start of application II
    from_kafka = eventstreams.subscribe(topo, topic='TopicForward', 
                                        schema= streamsx.eventstreams.schema.Schema.BinaryMessage, 
                                        credentials=json.loads(credential.magsEventStream))

    ## Map from BinaryMessage's message -> avroMessage
    avro_map =  op.Map('spl.relational::Functor', from_kafka, schema='tuple<blob arvoMessage>')
    avro_map.arvoMessage = avro_map.output(avro_map.attribute('message'))
    avro_map.stream.print(tag="mapped", name="maped")

    ## arvoMessage will be de-serialized into avro_schema 
    avro_received = avro.avro_to_json(avro_map.stream, avro_schema)
    avro_received.print(tag="avro_received", name="avro_received")
    
    ## separate the 'common' and 'french' wiki's out of the received eventstream
    france = avro_received.filter(lambda x: x['name'] =='frwiki', name='franceFilter')
    france.print(name="france")

    commons = avro_received.filter(lambda x: x['name'] =='commonswiki', name='commonsFilter')
    commons.print(name="commons")
    
   
    return ({"topo":topo})

topo = avro_sndrcv("AvroSndRcv")["topo"]
jupyter_streams.commonSubmit(cfg, "Streaming3Turbine", topo, credential=credential)  


In [None]:
jupyter_streams.list_jobs(instance, cancel=True)