In [None]:
# SETUP
# Configure regular Twitter API
# Note that this cell has been separated - if you reconnect constantly, Twitter will get pissy with you
# You don't need to reconnect constantly, so you can just use the twython connection in other cells after running this cell.
# Only exception is the streaming cell at the bottom
# You must re-run this cell anytime you do Kernel->Restart or wait more than 5 minutes
# You also might need to enter the command "pip instal twython" (no quotes) in a separate command prompt/terminal window first
# This is so twython will be installed

from twython import Twython
import pprint

# This twython object represents our connection to Twitter via Python. By using methods housed within this object,
# we can grab whatever we want from Twitter
# You have to get the API keys yourself by making an account on Twitter.
# Once you have a Twitter account, go to dev.twitter.com and create a "new app". Just fill it out with dummy information
# If you look around, you should be able to find your app key and your app secret key.
twython = Twython(app_key='YOUR-API-KEY-HERE',
        app_secret='YOUR-API-SECRET-KEY-HERE')

print("Twitter API Ready!")

In [None]:
# Task #1: Get user information for a list of users
# Standard Rate Limit: 300 users per 15 minutes

import datetime
import delorean # May need to do "pip install delorean" (without quotes) to install

# Get a list of Twitter screen names
congress_twitter_handles = ["MacTXPress","RepAdamSmith","RepMikeTurner"]

# The meat - connect with twython to find the users
users = twython.lookup_user(screen_name = congress_twitter_handles)
requests_until_limit = str(twython.get_lastfunction_header(header='X-Rate-Limit-Remaining'))
print("You have: " + requests_until_limit + " requests remaining in this 15 minute window")
# Print some common information on the screen for every user
for user in users:
    print(user['description'])
    print(user['geo_enabled'])
    print(user['name'])
    print(user['statuses_count'])
    
    # delorean.parse is useful because if it's in a list, we can sort the datetimes!
    print(delorean.parse(user['created_at']).datetime)
print("Done!")

# Important Twitter User-object data:

# user['created_at'] - When the account was first created
# user['description'] - Description of account written by user
# user['entities'] - Nested dictionary with information on websites the user associates
# user['favourites_count'] - How man favourites there are - note stupid British spelling
# user['friends_count'] - How many friends they have
# user['geo_enalbed'] - Are they mappable when they tweet?
# user['id'] - Their unique user ID number
# user['name'] - Their real name, as they put it
# user['statuses_count'] - The number of overall status updates that the user has made

In [None]:
# Task #2: Get Tweets from a single user's timeline
# Rate Limit: 300 Tweets per 15 minutes

import pprint

# The meat - Grab the twitter timeline for the given screen name.
# Count naturally means how many of the most recent tweets. count=100 is thus the past 100 tweets
smith_tweets = twython.get_user_timeline(screen_name='RepAdamSmith', count=100)
requests_until_limit = str(twython.get_lastfunction_header(header='X-Rate-Limit-Remaining'))
print("You have: " + requests_until_limit + " requests remaining in this 15 minute window")

for tweet in smith_tweets:
    pprint.pprint(tweet)

# Below are additional optional parameters that can be added to twython.get_user_timeline()

# Optional parameter: exclude_replies - True or False. Excludes reply-Tweets
# Optional parameter: count - Up to 200 at a time. Sets the number of Tweets to return
# Optional parameter: since_id - Go forward in time from the since_id for this person (integer)
# Optional parameter: max_id - Go backward in time from the max_id for this person (integer)
# Optional parameter: exclude_replies - When set to True, it excludes Tweets sent as replies
# Optional parameter: include_rts - When set to False, ignores all retweets

In [None]:
# Task #3: Let's practice writing out critical tweet information to a file!
# This uses the smith_tweets object created in the previous cell
# Thus you naturally must RUN the above cell for this one to work!

import unicodecsv as csv

# Open your file
test_file = open('tweet_file.csv','wb')
csv_write_file = csv.writer(test_file)

# Write the first row - column names
csv_write_file.writerow(['Hashtags','id','text','user_mentions','created_at','user','in_reply_to'])

# Loop through the tweets
for tweet in smith_tweets:
    # Advanced Trick - [join() + List Comprehension] Combo String Generator!
    # Use List Comprehension to perform some useful data conversions
    screen_name_mentions = ", ".join([mention['screen_name'] for mention in tweet['entities']['user_mentions']])
    hashtags = ", ".join([mention['text'] for mention in tweet['entities']['hashtags']])
    
    
    # Finally, write the row
    current_row = [hashtags,tweet['id'], tweet['text'],screen_name_mentions,
                   tweet['created_at'],tweet['user']['id'],tweet['in_reply_to_user_id']]
    csv_write_file.writerow(current_row)
test_file.close()
print("Done!")

# Important Tweet-object data:

# tweet['entities']['hashtags'] - The hashtags used in this specific tweet, organized in a Python list
# tweet['id'] - The tweet's unique ID number
# tweet['place']['bounding_box']['coordinates'] - A list of GPS lat-lon pairs forming a "bounding box"
# tweet['text'] - The actual text of the tweet
# tweet['entities']['user_mentions'] - A LIST of DICTIONARIES containing information about the users mentioned in the tweet
# tweet['created_at'] - The date/time the tweet was created at.
# tweet['user'] - A DICTIONARY containing all the user's information who tweeted this tweet.
# tweet['in_reply_to_status_id'] - Tweet ID if Tweet was in reply; None otherwise
# tweet['in_reply_to_user_id'] - User ID if Tweet was in reply; None otherwise

In [None]:
# Task #4: Do a search for tweets in the archive
# Rate Limit: 450 per 15 minutes
# Note: THIS LOOKS BACK IN TIME FROM THE PRESENT
import time
# Note 1: Count defaults to 15. 100 is the maximum per search
# Note 2: max_id and since_id are valuable parameters, since id is a unique value 
# of a tweet and are in chronological order. Thus, since_id looks forward in time from the ID you give it
# and max_id looks UP TO the ID you give it
# Note 3: language is a best-guess attempt at determining a tweet's language.
# Use the codes located in https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes

# This will find me the last 10 tweets starting from the Present that contained the word Python in them and
# are primarily in the English language
results = twython.cursor(twython.search, q='Python',count=10, language='en')
time.sleep(1)
requests_until_limit = str(twython.get_lastfunction_header(header='X-Rate-Limit-Remaining'))
print("You have: " + requests_until_limit + " requests remaining in this 15 minute window")

for idx, tweet in enumerate(results):
    pprint.pprint(tweet['text'])
    pprint.pprint(tweet['user']['screen_name'])

    #retweets = twython.get_retweets(id=tweet['id'])
    # Nested for loop to print all retweets to a given tweet
    #for retweet in retweets:
        #print("RETWEET!!")
        #pprint.pprint(retweet)
    # Let's only print out the first eight
    if idx == 8:
        break

# Important Tweet-object data:

# tweet['entities']['hashtags'] - The hashtags used in this specific tweet, organized in a Python list
# tweet['id'] - The tweet's unique ID number
# tweet['place']['bounding_box']['coordinates'] - A list of GPS lat-lon pairs forming a "bounding box"
# tweet['text'] - The actual text of the tweet
# tweet['user_mentions'] - A LIST of DICTIONARIES containing information about the users mentioned in the tweet
# tweet['created_at'] - The date/time the tweet was created at.
# tweet['user'] - A DICTIONARY containing all the user's information who tweeted this tweet.
# tweet['in_reply_to_status_id'] - Tweet ID if Tweet was in reply; None otherwise
# tweet['in_reply_to_user_id'] - User ID if Tweet was in reply; None otherwise

In [None]:
# Task #5: What if I need more than a few hundred tweets?
# How do we make Python continually get tweets until a certain point?

# Lets say we want RepAdamSmith's Tweets going back to October 12th, 2015 at 9:42 PM
import delorean
import pprint
import unicodecsv as csv
import time

# Set an end date datetime object. Thus, grab tweets "until" this datetime
end_date = delorean.parse("October 12th, 2011 9:42 PM")

# Next, make a True/False variable as a check to see if we've passed the date in question
# It starts at False
date_past = False
all_tweets = []
current_max_id = 0

# Finally, the meat of the cell!
while date_past == False:
    # Try-Except is something new. It's something called "error catching"
    # Basically, rather than have Python crash on an error, we want it to handle the error and continue processing
    try:
        # If this is the first ever request, max_id will be 0 and thus we want to get the most recent Tweets
        if current_max_id == 0:
            current_tweets = twython.get_user_timeline(screen_name='RepAdamSmith', count=200)
        # However, if we do have a max_id, we want to use it as a "Starting Point" for the next request
        else:
            current_tweets = twython.get_user_timeline(screen_name='RepAdamSmith', count=200, max_id = current_max_id)
            
        # The next line is a useful method of extracting how many requests you have left in the current 15 minute window
        requests_until_limit = str(twython.get_lastfunction_header(header='X-Rate-Limit-Remaining'))
        print("You have: " + requests_until_limit + " requests remaining in this 15 minute window")
        
        # Notice that I use += rather than .append(). += merges lists, whereas .append() makes a list of lists
        all_tweets += current_tweets
        
    except TwythonRateLimitError as error: # This code is only called if the specific error noted occurs
        # The code in here basically checks how much time Twitter wants you to wait, waits that long, and restarts
        print("Error! Rate Limit has been hit!")
        # Get how much time Twitter demands we wait
        remainder = float(twython.get_lastfunction_header(header='x-rate-limit-reset')) - time.time()
        # Disconnect from Twitter
        twython.disconnect()
        print("Now waiting for: " + str(remainder))
        # Wait in sleep mode
        time.sleep(remainder)
        print("Waiting complete! Attempting reconnect...")
        # Reconnect
        twython = Twython(app_key=my_twitter_info[0],app_secret=my_twitter_info[1],
        oauth_token=my_twitter_info[2],oauth_token_secret=my_twitter_info[3])
        # Go to the beginning of the while loop
        continue
    
    # Remember, they go in reverse chronological order, so the last tweet is always the furthest back in time
    last_tweet = current_tweets[(len(current_tweets)-1)]
    
    # Get the date of the LAST tweet in the current grab. This will be the one furthest in the past
    current_date = delorean.parse(last_tweet['created_at'])
    
    # Get the Tweet ID of the LAST tweet in the current grab (once again, will be the one furthest in the past)
    current_max_id = last_tweet['id']
    
    # Datetime check - if the current_date is further in the past than the end_date, end the while loop.
    # This is why we use Delorean! Super-easy datetime comparision is awesome!
    if current_date < end_date:
        date_past = True

# Once the looping is complete, we need to write our results
# This should look familiar!
# Open your file
test_file = open('smith_tweets.csv','wb')
csv_write_file = csv.writer(test_file)

# Write the first row - the headers
csv_write_file.writerow(['Hashtags','id','text','user_mentions','created_at','user','in_reply_to'])

# Loop through the tweets
for tweet in all_tweets:
    # Advanced Trick - [join() + List Comprehension] Combo String Generator!
    # Use List Comprehension to perform some useful data conversions
    screen_name_mentions = ", ".join([mention['screen_name'] for mention in tweet['entities']['user_mentions']])
    hashtags = ", ".join([mention['text'] for mention in tweet['entities']['hashtags']])
    
    
    # Finally, write the row
    current_row = [hashtags,tweet['id'], tweet['text'],screen_name_mentions,
                   tweet['created_at'],tweet['user']['id'],tweet['in_reply_to_user_id']]
    csv_write_file.writerow(current_row)
test_file.close()
print("All data written!")

In [None]:
# Task #6: Constantly stream tweets (ADVANCED!!)
# Rate Limit: 1% of the "tweets-per-second" going on on the network
# This may seem high, but you'd be amazed how easily you can reach it
# NOTE: THIS LOOKS FORWARD IN TIME FROM THE PRESENT
# WARNING: BE CAREFUL ABOUT DISPLAYING YOUR RESULTS ON THE SCREEN IF YOU'RE STREAMING MORE THAN A FEW HUNDRED TWEETS!!
# JUPYTER NOTEBOOK WILL CRASH!!!

from twython import TwythonStreamer
from timeit import default_timer as timer
import aespy
import atexit
import pprint

# Safety precaution - Destroy all password data upon script exiting from the computer's memory
def destroy_passwords():
    del my_twitter_info
    print("Deleted!")
atexit.register(destroy_passwords)

gathered_tweets = []

class JoshStreamer(TwythonStreamer):
    # Start a clock - only called when JoshStreamer is initialized
    start = timer()
    
    def on_success(self, data):
        # Set a timer for Right Now
        current = timer()
        
        # Convert seconds to minutes
        minutes_elapsed = round(current-self.start)/60
        print("Current time running (in minutes) is: {0}".format(minutes_elapsed))
        
        # Print out full nested dictionary
        pprint.pprint(data)
        
        # Add nested dictionary to gathered_tweets list
        gathered_tweets.append(data)

    def on_error(self, status_code, data):
        # If you get Error Code: 406, it means you're using screen names, NOT ids!
        print ("Error Code: " + str(status_code))
        self.disconnect()

with open("password_file.txt","r") as my_pass_file:
    my_twitter_info = [password.replace("\n","") for password in my_pass_file.readlines()]

# Notice I'm creating a JoshStreamer object, NOT a generic Twython Object like above
streamer = JoshStreamer(app_key=my_twitter_info[0],
        app_secret=my_twitter_info[1],
        oauth_token=my_twitter_info[2],
        oauth_token_secret=my_twitter_info[3])

# Clear Twitter password info
my_twitter_info = ''

# How to actually begin the streamer:
# CRITICAL WARNING: THE THREE PRIMARY FILTERS (track/follow/location) USE <OR>, NOT <AND>
# Thus, using two "primary filters" at once is usually a bad idea
# Example: if you use streamer.statuses.filter(track='Python',follow=congress_twitter_handle)
# it will not grab all tweets containing the text Python AND following those congressmen
# Rather it will grab all tweets containing the text Python OR following those congressmen
# Idiotic, I know.
# The proper way to further filter is to filter from within the on_success function

# Track tweet stream by text-matching. Can be used for hashtag matching
#streamer.statuses.filter(track='Python')

# Get all French tweets with the word 'the'
streamer.statuses.filter(track='the',language='en',stall_warnings=True, filter_level='medium')

# congress_twitter_handles = ["MacTXPress","RepAdamSmith","RepMikeTurner"]
# streamer.statuses.filter(follow=congress_twitter_handles)

# WARNING  - WILL ONLY WORK FOR USERS WITH GEOTAGGING ENABLED!
# streamer.statuses.filter(locations='-122.75,36.8,-121.75,37.8,-74,40,-73,41') # New York or San Francisco

# Important Twitter Tweet-Object Data

# tweet['entities']['hashtags'] - The hashtags used in this specific tweet, organized in a Python list
# tweet['id'] - The tweet's unique ID number
# tweet['place']['bounding_box']['coordinates'] - A list of GPS lat-lon pairs forming a "bounding box"
# tweet['text'] - The actual text of the tweet
# tweet['user_mentions'] - A LIST of DICTIONARIES containing information about the users mentioned in the tweet
# tweet['created_at'] - The date/time the tweet was created at.
# tweet['user'] - A DICTIONARY containing all the user's information who tweeted this tweet.
# tweet['in_reply_to_status_id'] - Tweet ID if Tweet was in reply; None otherwise
# tweet['in_reply_to_user_id'] - User ID if Tweet was in reply; None otherwise

In [None]:
# Finally, useful Trick: Convert from string literal to dictionary
# Maybe you saved tweet output dictionary to a text file, for example
import ast
import pprint
my_dict = ast.literal_eval("{'muffin' : 'lolz', 'foo' : 'kitty'}")
pprint.pprint(my_dict)