In [2]:
# Importing the modules
from kafka import KafkaConsumer, KafkaProducer
# Tweepy
from tweepy import API, OAuthHandler, Stream, OAuth2AppHandler
# Confluent 
from confluent_kafka import Producer as cProducer
from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer
# # Schema Registry
# from schema_registry.client import AsyncSchemaRegistryClient, schema
# Others
import json
import os
import configparser
import datetime
import time
import typing
import uuid

In [3]:
# Define the Kafka producer to send streaming data to
producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                        value_serializer=lambda m: json.dumps(m).encode('ascii'))
kafka_topic = "kafka_tweets_stream"
producer_config = {
        "bootstrap.servers": 'localhost:9092',
        "schema.registry.url": 'http://localhost:8081'
    }
key_schema = avro.loads("""
    {"type": "string"}
    """)
value_schema = avro.load('test.avsc')

avroProducer = AvroProducer(producer_config, default_key_schema=key_schema, default_value_schema=value_schema)

In [4]:
# Reading the secrets file for credentials
config = configparser.ConfigParser()
config.read('secrets.ini')
# print(config.sections())
consumer_access_key = config["twitter"]["consumer_access_key"]
consumer_secret = config["twitter"]["consumer_secret"]
twitter_access_token = config["twitter"]["access_token"]
twitter_access_token_secret = config["twitter"]["access_token_secret"]

# confluent
confluent_default = dict(config['confluent_local'])
confluent_producer = cProducer(confluent_default)

In [5]:
def delivery_callback(err, msg):
    if err:
        print('ERROR: Message failed delivery: {}'.format(err))
    else:
        print("Produced event to topic {topic}: key = {key:12} value = {value:12}".format(
            topic=msg.topic(), key=msg.key().decode('utf-8'), value=msg.value().decode('utf-8')))


In [6]:
# Streaming of Tweets and sending to Kafka in real time
class MyStreamListener(Stream):

    def on_connect(self):
        print("Connection established for stream")

    def on_status(self, status):
        print(status.text)

    def on_data(self, data):
        mydata = json.loads(data)
        # print(type(data), type(mydata))
        filename = datetime.datetime.today().strftime("%Y%m%d")
        # producer.send(kafka_topic, mydata)
        confluent_producer.produce(kafka_topic, data, callback=delivery_callback)
        # avroProducer.produce(topic=kafka_topic, key=str(uuid.uuid4()), value=mydata)
        # print(mydata)
        return True

    def on_limit(self, status):
        print("Twitter API Rate Limit: Resuming soon...")
        time.sleep(60)
        return True

    def on_error(self, status):
        print(status)

        
if __name__ == '__main__':
    words_to_analyse = ['Akuffo Addo', 'Bawumia']
    stream = MyStreamListener(consumer_access_key,
                    consumer_secret, twitter_access_token,
                    twitter_access_token_secret)
    stream.filter(track=words_to_analyse, threaded=True)

Connection established for stream
