## Stream with toolz

In [1]:
import toolz as tz
import toolz.curried as c
import re
import matplotlib.pyplot as plt
import numpy as np
import time

In [2]:
"""
The python script TwitterStream.py uses the tweepy library to stream from twitter
If you want to know more about twitter streaming, there are many useful tutorials
https://www.dataquest.io/blog/streaming-data-python/
https://cmry.github.io/notes/twitter-python
http://adilmoujahid.com/posts/2014/07/twitter-analytics/
"""
import TwitterStream as ts

A number of parameters of the tweet can be retrieved from tweepy
The components of a tweet object can be viewed here - https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object

For eg;
1. tweet - `text`
2. hashtags - `entities.get('hashtags')`
3. coordinates - `coordinates`

## Task1
Extract top trending hashtags

In [None]:
def extracthashtags(tweet):
    """Extract hashtags from streamed tweet - 
    hashtags are present as list of dicts"""
    arr = tweet.entities.get('hashtags')
    hashtag = [m['text'] for m in arr]
    return hashtag

from collections import defaultdict

def trends(hashtag):
    """Count number of times a hashtag was used. 
    Saved as dict - keys:hashtag, values:count"""
    hashtag_dict = defaultdict(int)
    for word in hashtag:
        word = word.lower()  # Convert to lower case
        hashtag_dict[word] += 1
    return hashtag_dict


def top10trends(data):
    """Return the top 10 hashtags - 
    sort by descending values of the dictionary and length of hashtag"""
    return sorted(data.items(), key=c.get(1), reverse=True)[:10]

In [None]:
"""Stream tweets, get hashtags and 
analyse the top 10 from a set of 100 incoming tweets"""
trends = tz.pipe(ts.gettweets(),
               c.map(extracthashtags),
               tz.concat,
               c.sliding_window(100),
               c.map(trends),
               c.map(top10trends))

In [None]:
from IPython.display import clear_output
for i in trends:
    clear_output(wait=True)
    print(i)

Lets break this down
<br>`map` - Returns an iterator that applies function to every item of iterable, yielding the results
<br>`concat` - concat sequences
<br>`sliding_window` - A sequence of overlapping subsequences
<br> `take` - get the first n elements of a sequence
<br> `do` - Runs func on x, returns x
<br> `get` - Get element in a sequence or dict
<br> `lambda` - anonymous functions (i.e. functions that are not bound to a name) can be built using this construct

In [None]:
# Example of sliding window
tz.pipe(range(5), tz.curried.sliding_window(2), list)