## Twitter Streaming App
This is the code to run a Stream Listener to capture Twitter feeds based on a specific keyword list (could be hashtags)<br>
<b>Each tweet is analysed for sentiment and emotion in realtime <br>
   and the tweet+analysis+goe-coordinates are saved on a Cloudant database for further processing/display etc</b>



In [None]:
import pip
pip.__version__
!python3 -m pip install --user tweepy
!python3 -m pip install --user cloudant
!pip install --upgrade watson-developer-cloud

The following cell contains code to connect to Twitter
requires Twitter API keys


def <b>connect_to_twitter():</b><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;import tweepy<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;consumerKey = 'xxxxxxxxxxxxxxxxxxx'<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;consumerSecret = 'xxxxxxxxxxxxxxxx'<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;accessToken = 'xxxxxxxxxxxxxxxxxxxx'<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;accessTokenSecret = 'xxxxxxxxxxxxxxxxxxxxxx'<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;auth = tweepy.OAuthHandler(consumerKey, consumerSecret)<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;auth.set_access_token(accessToken, accessTokenSecret)<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return tweepy.API(auth_handler=auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)  <br>


In [55]:
# The code was removed by Watson Studio for sharing.

In [3]:
#from twitterconnect import connect_to_twitter
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

The following cell contains the credentials to access Watson NLU API

#Watson NLU credentials<br>
credentials_NLU = {<br>
  'url':'https://gateway.watsonplatform.net/natural-language-understanding/api',<br>
  'username':'xxxxxxxxxxxxxxxxxxxx',<br>
  'password':'xxxxxxxxxxxxxxxxxxxxxxxx'<br>
}<br>


In [4]:
# The code was removed by Watson Studio for sharing.

In [5]:
import json
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 import Features, SentimentOptions, EmotionOptions

NLU_service = NaturalLanguageUnderstandingV1(
    version='2018-03-16',
    ## url is optional, and defaults to the URL below. Use the correct URL for your region.
    # url='https://gateway.watsonplatform.net/natural-language-understanding/api',
    username=credentials_NLU['username'],
    password=credentials_NLU['password'])


In [6]:
# The code was removed by Watson Studio for sharing.

In [7]:
#connect via Spark driver

spark = SparkSession\
    .builder\
    .appName("Cloudant Storage for Tweets")\
    .config("cloudant.host",credentials_cloudant['custom_url'].split('@')[1])\
    .config("cloudant.username", credentials_cloudant['username'])\
    .config("cloudant.password",credentials_cloudant['password'])\
    .config("jsonstore.rdd.partitions", 1)\
    .getOrCreate()

#df = spark.read.load("shake_classification", "org.apache.bahir.cloudant")


In [56]:
#Cloudant database management

def create_database(db_name):
    database = client.create_database(db_name, throw_on_exists=False)
    if database.exists():
        print("{} database successfully created".format(db_name))
        return database
    else:
        return None
    
def delete_database(db_name):
    try :
        client.delete_database(db_name)
    except CloudantException:
        print("There was a problem deleting {}.\n".format(db_name))
    else:
        print("{} successfully deleted.\n".format(db_name))


In [148]:
#Tweets Streaming Classes
import datetime


def set_db_name(hashtag_list, country_list, place_bbox):
    db_name = '_'.join(hashtag_list).replace(' ', '',).replace(',', '&',).lower()
    db_name = db_name+'_'+'_'.join(country_list).replace(' ', '',).lower() if country_list !=[]  else db_name
    bbox_str = ''.join(str(place_bbox)).replace(', ','_').replace('[','').replace(']','')
    db_name = db_name+'_'+bbox_str if bbox_str !='' else db_name
    return db_name+"_tweets"


def create_stream_record(hashtag_list, country_list, place_bbox):
    db_name = set_db_name(hashtag_list, country_list, place_bbox)
    date_time = datetime.datetime.now()
    json_doc = {
        "stream_name": db_name,
        "dateCreated": date_time.isoformat().split('T')[0],
        "timeCreated": date_time.isoformat().split('T')[1],
        "hashtag_list" : hashtag_list,
        "country_list" : country_list,
        "place_bbox" : place_bbox}
    return db_name, json_doc


def get_center_coordinates(b_box):
    coords = b_box.coordinates
    return {"type": 'Point', 
            "coordinates": [sum([coords[0][x][0] for x in range(4)])/4, 
                            sum([coords[0][x][1] for x in range(4)])/4]}


def create_JSON_record(status):
    coords=None
    country=""
    if (status.coordinates != None):
        coords = status.coordinates['coordinates']
    if (status.place != None):
        coords = get_center_coordinates(status.place.bounding_box.coordinates)
        country = status.place.country_code
    try:
        response = NLU_service.analyze(text=status.text, 
            features=Features(sentiment=SentimentOptions(), emotion=EmotionOptions())).get_result()
        tweet_emotion = response['emotion']['document']['emotion']
        tweet_sentiment = response['sentiment']['document']
    except:
        tweet_emotion = {}
        tweet_sentiment = {}        
    json_doc = {
        "dateCreated": status.created_at.isoformat().split('T')[0],
        "timeCreated": status.created_at.isoformat().split('T')[1],
        "country": country,
        "coords": coords,
        "sentiment": tweet_sentiment,
        "emotion": tweet_emotion,
        "text": status.text}
    return json_doc


def is_in_localisation(status, country_list, place_bbox):
    if (country_list) !=[] and (status.place != None):
        country = status.place.country_code
        return country in country_list
    elif place_bbox !=[]:
        coords = get_center_coordinates(status.place.bounding_box)
        # TODO test for bounding box 
        return True
    else:
        return True
    


def create_GeoJSON_record(status):
    coords=None
    country=""
    if (status.coordinates != None):
        coords = status.coordinates
    if (status.place != None):
        coords = get_center_coordinates(status.place.bounding_box)
        country = status.place.country_code
    try:
        response = NLU_service.analyze(text=status.text, 
            features=Features(sentiment=SentimentOptions(), emotion=EmotionOptions())).get_result()
        tweet_emotion = response['emotion']['document']['emotion']
        tweet_sentiment = response['sentiment']['document']
    except:
        tweet_emotion = {}
        tweet_sentiment = {}        
    json_doc = {
        "type": 'Feature',
        "geometry": coords,
        "dateCreated": status.created_at.isoformat().split('T')[0],
        "timeCreated": status.created_at.isoformat().split('T')[1],
        "country": country,
        "sentiment": tweet_sentiment,
        "emotion": tweet_emotion,
        "text": status.text}
    return json_doc


class Listener(StreamListener):
    def __init__(self, tweets_database, country_list, place_bbox, verbose=False):
        super(Listener, self).__init__()
        self.tweets_database = tweets_database
        self.country_list = country_list
        self.place_bbox = place_bbox
        self.verbose = verbose
        self.counter=0
        
    def on_status(self, status):
        if (status.place != None) or (status.coordinates != None):
            if is_in_localisation(status, self.country_list, self.place_bbox):
                self.counter +=1
                json_doc = create_GeoJSON_record(status)
                new_record = self.tweets_database.create_document(json_doc)
                tweet_details = "#{:d} date/time: {}/{} country: {} coords: {}". format(self.counter,
                    json_doc["dateCreated"],json_doc["timeCreated"], 
                    json_doc["country"], json_doc["geometry"]["coordinates"])
                tweet_NLU = "sentiment: {} emotion: {}". format(json_doc["sentiment"], json_doc["emotion"])
                tweet_text = "text: {}".format(json_doc["text"])
                if self.verbose:
                    print(tweet_details+"\n"+tweet_NLU+"\n"+tweet_text)
        return True

    def on_error(self, status_code):
        if status_code == 420:
            #returning False in on_data disconnects the stream
            return False
    
        
        
class TwitterStreamer():  #Class for streaming and processing live tweets.
    def __init__(self, stream_db, hashtag_list, country_list, place_bbox, verbose=False):
        self.db_name, new_record = create_stream_record(hashtag_list, country_list, place_bbox)
        stream_db.create_document(new_record)
        self.hashtag_list = hashtag_list
        self.country_list = country_list
        self.place_bbox = place_bbox
        self.verbose = verbose
        self.tweets_database = create_database(self.db_name)        
        pass

    def stream_tweets(self):
        #This handles Twitter authetification and the connection to Twitter Streaming API
        listener = Listener(self.tweets_database, self.country_list, self.place_bbox,self.verbose)
        stream = Stream(twapi.auth, listener)
        #This line filter Twitter Streams to capture data by the keywords: 
        stream.filter(track=self.hashtag_list)

In [None]:
#Main streaming procedure       
    
twapi=connect_to_twitter()       
streams_database = create_database("twitter_streams_db")

country_list = []
hashtag_list = ['brexit']
place_bbox = []
        
if __name__ == '__main__':
    ts = TwitterStreamer(streams_database, hashtag_list, country_list, place_bbox, True)
    try:
        ts.stream_tweets()
    except KeyboardInterrupt:
        print("exited early")
    #client.disconnect()


twitter_streams_db database successfully created
brexit_tweets database successfully created
#1 date/time: 2018-10-19/14:56:52 country: GB coords: [-1.7874955, 52.447226]
sentiment: {'score': -0.699925, 'label': 'negative'} emotion: {'sadness': 0.195009, 'joy': 0.112054, 'anger': 0.408608, 'disgust': 0.219387, 'fear': 0.315313}
text: Ok...bye and don’t let the door hit you on the way out..
#2 date/time: 2018-10-19/14:57:12 country: IE coords: [-9.110049499999999, 52.858974]
sentiment: {'score': 0.0, 'label': 'neutral'} emotion: {'sadness': 0.165327, 'joy': 0.49331, 'anger': 0.091925, 'disgust': 0.048812, 'fear': 0.245837}
text: Wake up, France &amp; British admirers of Macron! #Macron is France's version of #TonyBlair; therefore, my French frien… https://t.co/scMId9J3Hp
#3 date/time: 2018-10-19/14:57:28 country: GB coords: [-1.5710735, 54.984875]
sentiment: {'score': 0.0, 'label': 'neutral'} emotion: {'sadness': 0.083082, 'joy': 0.460064, 'anger': 0.092251, 'disgust': 0.094356, 'fear':

In [144]:
result_collection = Result(tweets_database.all_docs, include_docs=True)
print("Retrieved minimal document:\n{0}\n".format(result_collection[0]))


Retrieved minimal document:
[{'id': '0101029b49763406b302b25b3f1e3a1d', 'doc': {'_id': '0101029b49763406b302b25b3f1e3a1d', 'geometry': {'coordinates': [4.2716075, 51.9244575], 'type': 'Point'}, '_rev': '1-7ea1fc1e33756fbd1952f8e7db39b520', 'timeCreated': '04:01:56', 'dateCreated': '2018-10-19', 'text': 'Brexit\n\nHet levert vast\nEen ijzersterke\nBbc documentaire\nOp\n\nLoesje @ Rotterdam, Netherlands https://t.co/y0XwerzdNu', 'type': 'Feature', 'sentiment': {}, 'country': 'NL', 'emotion': {}}, 'value': {'rev': '1-7ea1fc1e33756fbd1952f8e7db39b520'}, 'key': '0101029b49763406b302b25b3f1e3a1d'}]



In [52]:
tweet_emotion

{'anger': 0.121029,
 'disgust': 0.083401,
 'fear': 0.065787,
 'joy': 0.304163,
 'sadness': 0.101967}

In [53]:
tweet_sentiment

{'label': 'positive', 'score': 0.922588}

In [31]:
from cloudant import index
index_name="_design/geodd"
index_definition= {
    "_id":index_name,
    "views":{},
    "language": "javascript",
    "st_indexes": {
    "geoidx": {"function (doc) {if (doc.geometry && doc.geometry.coordinates) {st_index(doc.geometry);}}"
}}}
tweets_index = index.Index(tweets_database, )

In [57]:
place_bbox = [[],[],[],[]]

In [123]:
place_bbox =[12,4,3]
set_db_name(["brexit", "test, try"], ['GB', 'IE'], place_bbox)

'brexit_test&try_gb_ie_12_4_3'

In [116]:
place_bbox =[]

In [117]:
''.join(str(place_bbox)).replace(', ','_').replace('[','').replace(']','')

''

In [134]:
'GB' in ['GB', 'IE']

True