In [2]:
#steps to run
#docker-compose up

In [4]:
# dependencies
from ksql import KSQLAPI
import uuid
import random
import logging
import pandas as pd
from confluent_kafka import Producer
import json
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to /Users/pax-
[nltk_data]     data/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [5]:
# kafka producer variables

simple_messages = [
'I love this pony',
'This restaurant is great',
'The weather is bad today',
'I will go to the beach this weekend',
'She likes to swim',
'Apple is a great company'
]

bootstrap_servers='127.0.0.1:9092'
topic='test'
msg_count=5

In [6]:
def delivery_report(err, msg):
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {}'.format(msg.topic()))

def confluent_kafka_producer():

    p = Producer({'bootstrap.servers': bootstrap_servers})
    for data in simple_messages:
        
        record_key = str(uuid.uuid4())
        record_value = json.dumps({'data': data})
        
        p.produce(topic, key=record_key, value=record_value, on_delivery=delivery_report)
        p.poll(0)

    p.flush()
    print('we\'ve sent {count} messages to {brokers}'.format(count=len(simple_messages), brokers=bootstrap_servers))


In [7]:
confluent_kafka_producer()

Message delivered to test
Message delivered to test
Message delivered to test
Message delivered to test
Message delivered to test
Message delivered to test
we've sent 6 messages to 127.0.0.1:9092


In [10]:
logging.basicConfig(level=logging.DEBUG)
client = KSQLAPI(url='http://localhost:8088', timeout=60)

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:8088
DEBUG:urllib3.connectionpool:http://localhost:8088 "GET /info HTTP/1.1" 200 None


In [11]:
client.create_table(table_name='test_data',
                   columns_type=['data varchar'],
                   topic='test',
                   value_format='JSON',
                   key='data')

DEBUG:root:KSQL generated: CREATE table test_data (data varchar) WITH (kafka_topic='test', value_format='JSON', key='data');
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:8088
DEBUG:urllib3.connectionpool:http://localhost:8088 "POST /ksql HTTP/1.1" 200 None


True

In [50]:
client.ksql('show tables')

DEBUG:root:KSQL generated: show tables
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:8088
DEBUG:urllib3.connectionpool:http://localhost:8088 "POST /ksql HTTP/1.1" 200 None


[{'@type': 'tables',
  'statementText': 'show tables;',
  'tables': [{'format': 'JSON',
    'isWindowed': False,
    'name': 'TEST_DATA',
    'topic': 'test',
    'type': 'TABLE'}]}]

In [51]:
res = client.query('select * from test_data limit 5')

In [52]:
def parse_results(res):
    res = ''.join(res)
    res = res.replace('\n', '')
    res = res.replace('}{', '},{')
    res = '[' + res + ']'
    return json.loads(res)

In [53]:
res_dict = parse_results(res)

DEBUG:root:KSQL generated: select * from test_data limit 5
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:8088
DEBUG:urllib3.connectionpool:http://localhost:8088 "POST /query HTTP/1.1" 200 None


In [67]:
def apply_sent(res):
    sent_res = []
    for r in res:
        sid = SentimentIntensityAnalyzer()
        try:
            sent_res.append(sid.polarity_scores(r['row']['columns'][2]))
        except TypeError:
            print('limit reached')
    return sent_res    

In [68]:
send_res = apply_sent(res_dict)

limit reached


In [69]:
# final sentiment analysis for each sentence
send_res

[{'compound': 0.6369, 'neg': 0.0, 'neu': 0.323, 'pos': 0.677},
 {'compound': 0.6249, 'neg': 0.0, 'neu': 0.423, 'pos': 0.577},
 {'compound': -0.5423, 'neg': 0.467, 'neu': 0.533, 'pos': 0.0},
 {'compound': 0.0, 'neg': 0.0, 'neu': 1.0, 'pos': 0.0},
 {'compound': 0.4215, 'neg': 0.0, 'neu': 0.517, 'pos': 0.483}]