## Usage

- python 3.7
- pipenv
- dotenv
- twitter API (and client library)

In [1]:
# stdlib
import os
import pickle

# libraries
import twitter
from dotenv import load_dotenv

In [2]:
load_dotenv()

CONSUMER_KEY = os.getenv("CONSUMER_KEY")
CONSUMER_SECRET = os.getenv("CONSUMER_SECRET")
ACCESS_TOKEN_KEY = os.getenv("ACCESS_TOKEN_KEY")
ACCESS_TOKEN_SECRET = os.getenv("ACCESS_TOKEN_SECRET")

# WARNING: do not commit this file to git with these values printed to a cell's output

In [3]:
api = twitter.Api(consumer_key=CONSUMER_KEY,
                  consumer_secret=CONSUMER_SECRET,
                  access_token_key=ACCESS_TOKEN_KEY,
                  access_token_secret=ACCESS_TOKEN_SECRET)
print(api.VerifyCredentials())

{"created_at": "Tue Oct 24 06:56:07 +0000 2017", "default_profile": true, "description": "Software developer, AI enthusiast", "favourites_count": 4, "followers_count": 3, "friends_count": 69, "id": 922718326249287681, "id_str": "922718326249287681", "lang": "en", "location": "Melbourne, Victoria", "name": "Philip Castiglione", "profile_background_color": "F5F8FA", "profile_image_url": "http://pbs.twimg.com/profile_images/922742937741438976/WR7tRb1R_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/922742937741438976/WR7tRb1R_normal.jpg", "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "screen_name": "PhilCastiglione", "url": "https://t.co/dZeZogyr0o"}


In [4]:
term = "libspill"
per_page = 100 # max for this endpoint

In [14]:
#raw_query = f"q={term}%20&result_type=recent&since=2014-07-19&count={per_page}"
raw_query = f"q={term}%20&result_type=recent&count={per_page}&tweet_mode=extended"
results = api.GetSearch(raw_query=raw_query)

In [15]:
len(results)
# => 100, even though i initially ran it with `per_page = 500`

type(results)
# => list

type(results[0])
# => twitter.models.Status

twitter.models.Status

In [19]:
results[9].full_text

"@PatsKarvelas I'm not seeing the problem here.\n#AusPol  #LibSpill\n\n(Also, is Old MacDonald saying that he thinks Dutton would be found ineligible by the High Court over this issue?)"

In [22]:
results[-1]

Status(ID=1040501684416344064, ScreenName=JulieLyford, Created=Fri Sep 14 07:25:07 +0000 2018, Text="RT @PeterWMurphy1: Watching @ScottMorrisonMP's original tweet, it seems the PM and his Govt are boasting about how much money they have. So…")

## Data Plans

- if there are no cached tweets (or if some flag is passed)
- search for tweets with a given hashtag using 30 day api in batches of 100 tweets
- receive 100 tweets, filter them by some characteristics and stream the filtered tweets into a collection
- page forward and repeat
- when the collection contains N tweets (or if we reach the end of the 30 day feed?) then cease fetching
- cache the results so we can access them from a file and not run out of monthly requests...

In [26]:
cache_filename = f"cached_tweets_{term}.pkl"
with open(cache_filename, 'wb') as f:
    pickle.dump(results, f)

In [31]:
test_load_results = None
with open(cache_filename, 'rb') as f:
    test_load_results = pickle.load(f)

In [34]:
type(test_load_results[0])

twitter.models.Status

## Ideas

- some potential hashtags include #libspill and #auspol
- filter tweets: too short, replace/ignore hyperlinks, strip formatting ('\n') etc...

## Notes

- will need to handle API rate limiting (15 calls in 15 mins)
    - one option is to use the client libraries sleep function: `sleep_on_rate_limit=True`
    - alternatively, we could catch an error or attempt to use our own loops