In [1]:
# Before running this notebook, make sure to go to the secrets.py file and
# Replace the values with your own Twitter API Keys, Secrets, and Tokens
# Notebook execution will fail if not set
from secrets import twitter_secrets as ts

## SETTINGS

# Path to write the data out to
OUT_PATH = "/home/jovyan/data-sets/twitter/"

# Which hashtag/phrase to grab data for
QUERY = "word"

# Controls after how many tweets the script shuts down
STOP_AFTER = 50


In [2]:
import json
import tempfile
import requests
import pathlib
from datetime import datetime as dt
from uuid import uuid4
from requests_oauthlib import OAuth1Session

# Create output directory if it does not exist
pathlib.Path(OUT_PATH).mkdir(parents=True, exist_ok=True)

## SCRIPT STARTS HERE
query_data = {
    "track": f"#{QUERY}".replace("#", "").lower(),
    # Which language to use. We want this set to en for English
    "language": "en",
}

# Establish connection
twitter = OAuth1Session(
    client_key=ts.CONSUMER_KEY,
    client_secret=ts.CONSUMER_SECRET,
    resource_owner_key=ts.ACCESS_TOKEN,
    resource_owner_secret=ts.ACCESS_SECRET,
)

url = "https://stream.twitter.com/1.1/statuses/filter.json"
query_url = f"{url}?{'&'.join([f'{k}={v}' for k, v in query_data.items()])}"

print(f"STREAMING {STOP_AFTER} TWEETS")
with twitter.get(query_url, stream=True) as response:
    for i, raw_tweet in enumerate(response.iter_lines()):
        if i == STOP_AFTER:
            break

        try:
            tweet = json.loads(raw_tweet)
            print(
                f"{i+1}/{STOP_AFTER}: {tweet['user']['screen_name']} @ {tweet['created_at'] }: {tweet['text']}\n"
            )
        except (json.JSONDecodeError, KeyError) as err:
            # In case the JSON fails to decode, we just skip this tweet and move on
            print(f"{i+1}/{STOP_AFTER}: ERROR ===> Oof, encountered a mangled line of data here..\n")
            continue
        
        # write to disk
        with pathlib.Path(OUT_PATH) / f"{dt.now().timestamp()}_{uuid4()}.json" as F:
            F.write_bytes(raw_tweet)


STREAMING 500 TWEETS
1/500: trendyblonde @ Mon May 18 14:09:13 +0000 2020: RT @A_AMilne: Christopher Robin came down from the Forest feeling all sunny and careless, and just as if twice nineteen didn't matter a bit…

2/500: RandolphMacon @ Mon May 18 14:09:13 +0000 2020: Rain today, but sunny days ahead, Yellow Jackets! #GreatDayToBeaYJ

3/500: ChillKessel @ Mon May 18 14:09:16 +0000 2020: @DarrinS44793858 @Pens_Lynn @trinault @steelergrl66 Same to you! It's so sunny out where I am I can't wait for the work day to be over! 🤣

4/500: wc_courtenay @ Mon May 18 14:09:17 +0000 2020: Mon 07:00: Sunny; Temp 11 C; Humidity 78%; Press 101 kPa / rising. https://t.co/TeUP8HlOqM

5/500: tarisgal42 @ Mon May 18 14:09:18 +0000 2020: RT @carolJhedges: Not just @LordAshcroft who hangs out in that sunny tax~haven.
Here's another unpleasant little mucker.
Founder of Leave d…

6/500: wc_estevan @ Mon May 18 14:09:23 +0000 2020: Mon 08:00: Sunny; Temp 16 C; Wind S 52 km/h gust 68 km/h; Humidity 51%; Pres