In [1]:
import findspark
findspark.init()

In [2]:
import json, requests, sys, re
from nltk.corpus import stopwords
from operator import add
from pyspark import SparkContext
from pyspark import StorageLevel
from pyspark.streaming import StreamingContext
from textblob import TextBlob

keyword=['myanmar']

In [3]:
# text classification
def getSentiment(text):
    sent = TextBlob(text).sentiment.polarity
    neutral_threshold = 0.05
    
    if sent >= neutral_threshold:
        return (1, 0, 0) # positive
    elif sent > -neutral_threshold:
        return (0, 1, 0) # neutral
    else:
        return (0, 0, 1) # negative

In [4]:
def getTweetsCounter(dstream_tweets_sentiment_analysed, window_length, sliding_interval):

    tweets_to_count = dstream_tweets_sentiment_analysed. \
        map(lambda x: ('count', (1, x[2])))

    tweets_count_acc_sent = tweets_to_count. \
        reduceByKeyAndWindow(lambda x, y: (x[0] + y[0], (x[1][0] + y[1][0], x[1][1] + y[1][1], x[1][2] + y[1][2])), None,
                             window_length, sliding_interval)

    total_count = tweets_count_acc_sent
    
    total_count.pprint()
    return total_count
    
def sendTweetsCounter(sentiments, url):
    def takeAndSend(time, rdd):
        if not rdd.isEmpty():
            (name, (total, (pos, neutral, neg))) = rdd.first()

            json_data = {'positive': pos, 'neutral': neutral, 'negative': neg, 'total': total}
            #print(json_data)

            response = requests.post(url, data=json_data)

    sentiments.foreachRDD(takeAndSend)

In [5]:
def getTweets(kvs):
    tweets_text = kvs.map(lambda x: json.loads(x)) \
                .map(lambda json_object: (json_object["user"]["screen_name"], json_object["text"], json_object["user"]["followers_count"], json_object["id"])) \
                .transform(lambda rdd: rdd.sortBy(lambda x: x[2], ascending = False))
    
    #tweets_text.pprint()
    return tweets_text
    
def sendTweets(tweets, url):
    def takeAndSend(time, rdd):
        if not rdd.isEmpty():
            tweets_data = rdd.take(10)

            users = []
            texts = []
            tweet_ids = []

            for (user, text, follower_count, tweet_id) in tweets_data:
                users.append(user)
                texts.append(text)
                tweet_ids.append(tweet_id)

            json_data = {'user': str(users), 'text': str(texts), 'id': str(tweet_ids)}
            #print(json_data)

            response = requests.post(url, data=json_data)

    tweets.foreachRDD(takeAndSend)

In [6]:
def getTopWords(tweets, window_length, sliding_interval):
    words = tweets.map(lambda line:re.sub(r'http\S+','',line[1])) \
                  .map(lambda line:re.sub(r'bit.ly/\S+','', line)) \
                  .map(lambda line:line.strip('[link]')) \
                  .flatMap(lambda line: re.split(r"[\n;,\.\s]",line))

    ## This part does the word count
    sw = stopwords.words('english')
    sw.extend(['rt']+keyword)
    
    counts = words.map(lambda word: word.strip().lower()) \
                  .filter(lambda word: word not in sw) \
                  .filter(lambda word: len(word) >= 2 and word[0] != '#' and word[0] != '@') \
                  .map(lambda word: (word, 1)) \
                  .reduceByKeyAndWindow(add, None,  window_length, sliding_interval)\
                  .transform(lambda rdd: rdd.sortBy(lambda x: x[1], ascending = False))
    
    counts.pprint()
    return counts

In [7]:
def getTopHashTags(tweets, window_length, sliding_interval):
    words = tweets.map(lambda line:re.sub(r'http\S+','',line[1])) \
                  .map(lambda line:re.sub(r'bit.ly/\S+','', line)) \
                  .map(lambda line:line.strip('[link]')) \
                  .flatMap(lambda line: re.split(r"[\n;,\.\s]",line))

    hashtags = words.map(lambda word: word.strip().lower()) \
            .filter(lambda word: len(word) >= 2 and word[0] == '#') \
            .map(lambda word: (word, 1)) \
            .reduceByKeyAndWindow(add, None,  window_length, sliding_interval)\
            .transform(lambda rdd: rdd.sortBy(lambda x: x[1], ascending = False))
    
    hashtags.pprint()
    return hashtags

In [8]:
def sendTopWords(counts, url, num):
    def takeAndSend(time, rdd):
        if not rdd.isEmpty():
            word_counts = rdd.take(num)

            words = []
            values = []

            for (word, count) in word_counts:
                words.append(word)
                values.append(count)

            json_data = {'words': str(words), 'counts': str(values)}
            print(json_data)

            response = requests.post(url, data=json_data)

    counts.foreachRDD(takeAndSend)

In [9]:
def sendGeoData(path, url):
    filepath = "file:///" + path
    geodata = sc.textFile(filepath) \
                .map(lambda x: x.encode("ascii", "ignore")) \
                .map(lambda x: json.loads(x)) \
                .map(lambda json_object: (json_object["user"]["screen_name"], json_object["coordinates"])) \
                .map(lambda kv: (kv[1]['coordinates'][0], kv[1]['coordinates'][1])) \
                .collect()

    longitudes = []
    latitudes = []

    for geotweet in geodata:
        longitudes.append(geotweet[0])
        latitudes.append(geotweet[1])

    json_data = {'longitude': str(longitudes), 'latitude': str(latitudes)}
    response = requests.post(url, data=json_data)

In [10]:
sc = SparkContext(appName="tweetStream")
# Create a local StreamingContext with batch interval of 2 second
batch_interval = 2
window_length = 15*60
sliding_interval = 6

ssc = StreamingContext(sc, batch_interval)
ssc.checkpoint("twittercheckpt")

# Create a DStream that conencts to hostname:port
tweetStream = ssc.socketTextStream("0.0.0.0", 5555)

In [11]:
tweets = tweetStream. \
        map(lambda  x: json.loads(x)). \
        map(lambda json_object: (json_object["user"]["screen_name"], json_object["text"]))

tweets_sentiment_analysed = tweets. \
        map(lambda x: (x[0], x[1], getSentiment(x[1])))

In [12]:
#tweets_sentiment_analysed.persist(StorageLevel.MEMORY_AND_DISK)

In [13]:
server = 'http://localhost:5000/'

tweet_counters = getTweetsCounter(tweets_sentiment_analysed, window_length, sliding_interval)
sendTweetsCounter(tweet_counters,  server +'update_sentiments')

tweet_text= getTweets(tweetStream)
sendTweets(tweet_text, server + 'update_tweets')

key_words=getTopWords(tweets, window_length, sliding_interval)
sendTopWords(key_words, server + 'update_counts', 10)

hashtag=getTopHashTags(tweets, window_length, sliding_interval)
sendTopWords(hashtag, server + 'update_hashtagcounts', 30)

geodata_path='/Users/shawvin/Desktop/Big data project/geo_tweets.txt'
sendGeoData(geodata_path, server + 'update_geodata')

In [14]:
#getTopWords(tweets, window_length, sliding_interval)

In [15]:
#getTopHashTags(tweets_sentiment_analysed, window_length, sliding_interval)

In [16]:
# Start computing
ssc.start()        
# Wait for termination
ssc.awaitTermination()
ssc.stop(stopGraceFully = True)

-------------------------------------------
Time: 2021-05-27 08:24:02
-------------------------------------------
('count', (3, (0, 2, 1)))

-------------------------------------------
Time: 2021-05-27 08:24:02
-------------------------------------------
('26)', 1)
('show', 1)
('morning', 1)
('children!', 1)
('salutes', 1)
('motorcycle', 1)
('save', 1)
('brought', 1)
('car', 1)
('the…', 1)
...

{'words': "['26)', 'show', 'morning', 'children!', 'salutes', 'motorcycle', 'save', 'brought', 'car', 'the…']", 'counts': '[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]'}
-------------------------------------------
Time: 2021-05-27 08:24:02
-------------------------------------------
('#may26coup', 1)
('#crimesagainstchildren', 1)
('#whatshappeninginmyanmar', 1)

{'words': "['#may26coup', '#crimesagainstchildren', '#whatshappeninginmyanmar']", 'counts': '[1, 1, 1]'}
-------------------------------------------
Time: 2021-05-27 08:24:08
-------------------------------------------
('count', (3, (0, 2, 1)))

-----

-------------------------------------------
Time: 2021-05-27 08:24:50
-------------------------------------------
('military', 8)
('monks', 5)
('payments', 4)
('junta', 4)
('total', 4)
('&amp', 3)
('gas', 3)
('giant', 3)
('energy', 3)
('people', 3)
...

{'words': "['military', 'monks', 'payments', 'junta', 'total', '&amp', 'gas', 'giant', 'energy', 'people']", 'counts': '[8, 5, 4, 4, 4, 3, 3, 3, 3, 3]'}
-------------------------------------------
Time: 2021-05-27 08:24:50
-------------------------------------------
('#may26coup', 5)
('#myanmar', 4)
('#whatshappeninginmyanmar', 3)
('#auspicerefugees', 2)
('#auspicerefugees…', 2)
('#whatshappeninginmyanmar…', 1)
('#french', 1)
('#whatshappeninginmyan…', 1)
('#crimesagainstchildren', 1)
('#whatshappeningi…', 1)
...

{'words': "['#may26coup', '#myanmar', '#whatshappeninginmyanmar', '#auspicerefugees', '#auspicerefugees…', '#whatshappeninginmyanmar…', '#french', '#whatshappeninginmyan…', '#crimesagainstchildren', '#whatshappeningi…', '#indi

-------------------------------------------
Time: 2021-05-27 08:25:32
-------------------------------------------
('count', (54, (10, 32, 12)))

-------------------------------------------
Time: 2021-05-27 08:25:32
-------------------------------------------
('military', 13)
('payments', 8)
('monks', 7)
('total', 7)
('junta', 6)
('cash', 6)
('sagaing', 6)
('french', 6)
('giant', 5)
('energy', 5)
...

{'words': "['military', 'payments', 'monks', 'total', 'junta', 'cash', 'sagaing', 'french', 'giant', 'energy']", 'counts': '[13, 8, 7, 7, 6, 6, 6, 6, 5, 5]'}
-------------------------------------------
Time: 2021-05-27 08:25:32
-------------------------------------------
('#may26coup', 9)
('#whatshappeninginmyanmar', 9)
('#myanmar', 6)
('#auspicerefugees', 4)
('#auspicerefugees…', 2)
('#atrocityalert:', 1)
('#whatshappeninginmyanmar…', 1)
('#french', 1)
('#asean', 1)
('#whatshappeninginmyan…', 1)
...

{'words': "['#may26coup', '#whatshappeninginmyanmar', '#myanmar', '#auspicerefugees', '#a

-------------------------------------------
Time: 2021-05-27 08:26:14
-------------------------------------------
('count', (78, (16, 45, 17)))

-------------------------------------------
Time: 2021-05-27 08:26:14
-------------------------------------------
('military', 22)
('&amp', 9)
('total', 9)
('monks', 8)
('payments', 8)
('junta', 8)
('cash', 8)
('french', 8)
('giant', 6)
('energy', 6)
...

{'words': "['military', '&amp', 'total', 'monks', 'payments', 'junta', 'cash', 'french', 'giant', 'energy']", 'counts': '[22, 9, 9, 8, 8, 8, 8, 8, 6, 6]'}
-------------------------------------------
Time: 2021-05-27 08:26:14
-------------------------------------------
('#whatshappeninginmyanmar', 13)
('#may26coup', 11)
('#myanmar', 9)
('#auspicerefugees', 4)
('#auspicerefugees…', 2)
('#atrocityalert:', 1)
('#whatshappeninginmyanmar…', 1)
('#french', 1)
('#asean', 1)
('#whatshappeninginmyan…', 1)
...

{'words': "['#whatshappeninginmyanmar', '#may26coup', '#myanmar', '#auspicerefugees', '#auspi

-------------------------------------------
Time: 2021-05-27 08:26:50
-------------------------------------------
('#whatshappeninginmyanmar', 15)
('#may26coup', 12)
('#myanmar', 12)
('#auspicerefugees', 4)
('#french', 2)
('#japanese', 2)
('#auspicerefugees…', 2)
('#atrocityalert:', 1)
('#southokkala', 1)
('#breaking', 1)
...

{'words': "['#whatshappeninginmyanmar', '#may26coup', '#myanmar', '#auspicerefugees', '#french', '#japanese', '#auspicerefugees…', '#atrocityalert:', '#southokkala', '#breaking', '#whatshappeninginmyanmar…', '#artist', '#asean', '#whatshappeninginmyan…', '#myanmar’s', '#thailand', '#crimesagainstchildren', '#crimesagainsthumanity', '#whatshappeningi…', '#thai', '#india']", 'counts': '[15, 12, 12, 4, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]'}
-------------------------------------------
Time: 2021-05-27 08:26:56
-------------------------------------------
('count', (103, (24, 57, 22)))

-------------------------------------------
Time: 2021-05-27 08:26:56

-------------------------------------------
Time: 2021-05-27 08:27:26
-------------------------------------------
('#myanmar', 17)
('#whatshappeninginmyanmar', 16)
('#may26coup', 15)
('#auspicerefugees', 6)
('#auspicerefugees…', 3)
('#whatshappeninginmyanmar…', 2)
('#french', 2)
('#japanese', 2)
('#whatshappeningi…', 2)
('#atrocityalert:', 1)
...

{'words': "['#myanmar', '#whatshappeninginmyanmar', '#may26coup', '#auspicerefugees', '#auspicerefugees…', '#whatshappeninginmyanmar…', '#french', '#japanese', '#whatshappeningi…', '#atrocityalert:', '#southokkala', '#breaking', '#artist', '#may26coup…', '#asean', '#myanmar/#burma:', '#whatshappeninginmyan…', '#myanmar’s', '#thailand', '#crimesagainstchildren', '#crimesagainsthumanity', '#thai', '#whats…', '#india']", 'counts': '[17, 16, 15, 6, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]'}
-------------------------------------------
Time: 2021-05-27 08:27:32
-------------------------------------------
('count', (130, (36, 71, 

-------------------------------------------
Time: 2021-05-27 08:28:02
-------------------------------------------
('military', 38)
('monks', 23)
('payments', 18)
('&amp', 17)
('junta', 15)
('show', 13)
('myanmar’s', 13)
('sagaing', 13)
('may', 12)
('total', 12)
...

{'words': "['military', 'monks', 'payments', '&amp', 'junta', 'show', 'myanmar’s', 'sagaing', 'may', 'total']", 'counts': '[38, 23, 18, 17, 15, 13, 13, 13, 12, 12]'}
-------------------------------------------
Time: 2021-05-27 08:28:02
-------------------------------------------
('#myanmar', 19)
('#whatshappeninginmyanmar', 17)
('#may26coup', 16)
('#auspicerefugees', 7)
('#whatshappeninginmyanmar…', 3)
('#japanese', 3)
('#auspicerefugees…', 3)
('#french', 2)
('#myanmar’s', 2)
('#whatshappeningi…', 2)
...

{'words': "['#myanmar', '#whatshappeninginmyanmar', '#may26coup', '#auspicerefugees', '#whatshappeninginmyanmar…', '#japanese', '#auspicerefugees…', '#french', '#myanmar’s', '#whatshappeningi…', '#atrocityalert:', '#southo

-------------------------------------------
Time: 2021-05-27 08:28:38
-------------------------------------------
('count', (176, (51, 93, 32)))

-------------------------------------------
Time: 2021-05-27 08:28:38
-------------------------------------------
('military', 40)
('monks', 26)
('&amp', 19)
('payments', 18)
('junta', 16)
('sagaing', 16)
('people', 15)
('myanmar’s', 15)
('show', 13)
('may', 13)
...

{'words': "['military', 'monks', '&amp', 'payments', 'junta', 'sagaing', 'people', 'myanmar’s', 'show', 'may']", 'counts': '[40, 26, 19, 18, 16, 16, 15, 15, 13, 13]'}
-------------------------------------------
Time: 2021-05-27 08:28:38
-------------------------------------------
('#myanmar', 22)
('#whatshappeninginmyanmar', 19)
('#may26coup', 17)
('#auspicerefugees', 8)
('#whatshappeninginmyanmar…', 3)
('#japanese', 3)
('#auspicerefugees…', 3)
('#french', 2)
('#myanmar’s', 2)
('#whatshappeningi…', 2)
...

{'words': "['#myanmar', '#whatshappeninginmyanmar', '#may26coup', '#auspic

-------------------------------------------
Time: 2021-05-27 08:29:08
-------------------------------------------
('#myanmar', 24)
('#whatshappeninginmyanmar', 24)
('#may26coup', 22)
('#auspicerefugees', 11)
('#whatshappeninginmyanmar…', 4)
('#japanese', 4)
('#auspicerefugees…', 4)
('#kayah', 2)
('#french', 2)
('#myanmar’s', 2)
...

{'words': "['#myanmar', '#whatshappeninginmyanmar', '#may26coup', '#auspicerefugees', '#whatshappeninginmyanmar…', '#japanese', '#auspicerefugees…', '#kayah', '#french', '#myanmar’s', '#whatshappeningi…', '#atrocityalert:', '#myanm…', '#southokkala', '#breaking', '#artist', '#may26coup…', '#whatshappeningnowinmyanmar', '#asean', '#myanmar/#burma:', '#whatshappeninginmyan…', '#japan', '#thailand', '#whatshappeninginmyanmar?', '#crimesagainstchildren', '#crimesagainsthumanity', '#thai', '#whatshappeninginmy…', '#whats…', '#myanmarpeople']", 'counts': '[24, 24, 22, 11, 4, 4, 4, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]'}
------------

-------------------------------------------
Time: 2021-05-27 08:29:44
-------------------------------------------
('count', (222, (60, 117, 45)))

-------------------------------------------
Time: 2021-05-27 08:29:44
-------------------------------------------
('military', 50)
('monks', 41)
('&amp', 24)
('people', 23)
('sagaing', 20)
('payments', 19)
('myanmar’s', 19)
('junta', 19)
('show', 16)
('buddhist', 16)
...

{'words': "['military', 'monks', '&amp', 'people', 'sagaing', 'payments', 'myanmar’s', 'junta', 'show', 'buddhist']", 'counts': '[50, 41, 24, 23, 20, 19, 19, 19, 16, 16]'}
-------------------------------------------
Time: 2021-05-27 08:29:44
-------------------------------------------
('#myanmar', 28)
('#may26coup', 27)
('#whatshappeninginmyanmar', 26)
('#auspicerefugees', 14)
('#whatshappeninginmyanmar…', 5)
('#japanese', 5)
('#auspicerefugees…', 5)
('#whatshappeningi…', 3)
('#kayah', 2)
('#may26coup…', 2)
...

{'words': "['#myanmar', '#may26coup', '#whatshappeninginmyanma

-------------------------------------------
Time: 2021-05-27 08:30:14
-------------------------------------------
('#myanmar', 31)
('#may26coup', 29)
('#whatshappeninginmyanmar', 28)
('#auspicerefugees', 16)
('#whatshappeninginmyanmar…', 7)
('#japanese', 5)
('#auspicerefugees…', 5)
('#may26coup…', 3)
('#whatshappeningi…', 3)
('#kayah', 2)
...

{'words': "['#myanmar', '#may26coup', '#whatshappeninginmyanmar', '#auspicerefugees', '#whatshappeninginmyanmar…', '#japanese', '#auspicerefugees…', '#may26coup…', '#whatshappeningi…', '#kayah', '#french', '#myanmar’s', '#thailand', '#thai', '#whats…', '#atrocityalert:', '#myanm…', '#southokkala', '#breaking', '#artist', '#conflictmap', '#maps', '#whatshappeningnowinmyanmar', '#myanmarmilitary', '#conflicttracker', '#asean', '#myanmar/#burma:', '#sagaing', '#whatshappeninginmyan…', '#japan']", 'counts': '[31, 29, 28, 16, 7, 5, 5, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]'}
-------------------------------------------
Tim

-------------------------------------------
Time: 2021-05-27 08:30:50
-------------------------------------------
('count', (257, (72, 132, 53)))

-------------------------------------------
Time: 2021-05-27 08:30:50
-------------------------------------------
('military', 57)
('monks', 46)
('&amp', 28)
('people', 25)
('myanmar’s', 24)
('junta', 24)
('payments', 22)
('sagaing', 22)
('buddhist', 19)
('show', 18)
...

{'words': "['military', 'monks', '&amp', 'people', 'myanmar’s', 'junta', 'payments', 'sagaing', 'buddhist', 'show']", 'counts': '[57, 46, 28, 25, 24, 24, 22, 22, 19, 18]'}
-------------------------------------------
Time: 2021-05-27 08:30:50
-------------------------------------------
('#may26coup', 34)
('#myanmar', 34)
('#whatshappeninginmyanmar', 33)
('#auspicerefugees', 18)
('#whatshappeninginmyanmar…', 7)
('#auspicerefugees…', 6)
('#japanese', 5)
('#may26coup…', 3)
('#whatshappeningi…', 3)
('#kayah', 2)
...

{'words': "['#may26coup', '#myanmar', '#whatshappeninginmyanma

-------------------------------------------
Time: 2021-05-27 08:31:20
-------------------------------------------
('#may26coup', 38)
('#myanmar', 36)
('#whatshappeninginmyanmar', 36)
('#auspicerefugees', 21)
('#whatshappeninginmyanmar…', 9)
('#auspicerefugees…', 6)
('#japanese', 5)
('#may26coup…', 4)
('#thailand', 3)
('#whatshappeningi…', 3)
...

{'words': "['#may26coup', '#myanmar', '#whatshappeninginmyanmar', '#auspicerefugees', '#whatshappeninginmyanmar…', '#auspicerefugees…', '#japanese', '#may26coup…', '#thailand', '#whatshappeningi…', '#thai', '#kayah', '#myanm…', '#french', '#myanmar’s', '#japan', '#whats…', '#myanmarpeople', '#bago', '#atrocityalert:', '#southokkala', '#breaking', '#artist', '#conflictmap', '#maps', '#whatshappeningnowinmyanmar', '#myanmarmilitary', '#conflicttracker', '#asean', '#myanmar/#burma:']", 'counts': '[38, 36, 36, 21, 9, 6, 5, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]'}
-------------------------------------------
Time: 2021-

KeyboardInterrupt: 