In [1]:
# Imports
import tweepy
import csv
import pandas as pd
import json


In [2]:
# Saving access tokens in a separate csv file that will not be included with the git push so that my credential stay local.
keys = pd.read_csv('keys.csv')

con_key = keys.con_key[0]
con_secret = keys.con_secret[0]
acc_token = keys.acc_token[0]
acc_secret = keys.acc_secret[0]


In [3]:
#Use tweepy.OAuthHandler to create an authentication using the given key and secret
auth = tweepy.OAuthHandler(consumer_key=con_key, consumer_secret=con_secret)
auth.set_access_token(acc_token, acc_secret)

#Connect to the Twitter API using the authentication
api = tweepy.API(auth)

In [23]:
#We create a subclass of tweepy.StreamListener to write a line to file on_status
class StreamListener_Writer(tweepy.StreamListener):

    def on_status(self, t):
        with open(self.filename,"a", newline="", encoding='utf-8') as tweets:
            #pull out the hashtags
            hashtags = [h["text"] for h in t.entities["hashtags"]]
            author = t.author._json
            authorname = author.get("name", " ")
            authorscreenname = author.get("screen_name", " ")
            #some of these come in as weird types (nonetype and tuple). So, only pull the string ones.
            if isinstance(author.get("location", " "), str) == True:
                authorlocation = author.get("location", " ")
            else:
                authorlocation = "Unknown"
            if isinstance(author.get("description", " "), str) == True:
                authordescription =  author.get("description", " ")
            else:
                authordescription = 'Unknown'    
            language = author.get("lang", " ")
            followers_count = author.get('followers_count', '0')
            friends_count = author.get('friends_count', '0')
            statuses_count = author.get('statuses_count', '0')
            author_id = author.get('id_str', '')
            text = getattr(t, 'text', '')
            tweets.write("%(author_id)s|%(authorname)s|%(authorscreename)s|%(authorlocation)s|%(authordescription)s|%(followers_count)s|%(friends_count)s|%(statuses_count)s|%(language)s|%(coordinates)s|%(created_at)s|%(favorite_count)s|%(geo)s|%(text)s|%(source)s|%(retweetcount)s|%(hashtags)s\n" % 
                                {'author_id': author_id,
                                 'authorname': authorname.replace('|','-').replace('\n', ' ').replace('\r', ' '),
                                 'authorscreename': authorscreenname.replace('|','-').replace('\n', ' ').replace('\r', ' '), 
                                 'authorlocation': authorlocation.replace('|','-').replace('\n', ' ').replace('\r', ' '), 
                                 'authordescription': authordescription.replace('|','-').replace('\n', ' ').replace('\r', ' '),
                                 'followers_count': followers_count,
                                 'friends_count': friends_count,
                                 'statuses_count': statuses_count,
                                 'language': language, 
                                 'coordinates': t.coordinates, 
                                 'created_at': t.created_at, 
                                 'favorite_count': t.favorite_count, 
                                 'geo': t.geo, 
                                 'text': text.replace('|','-').replace('\n', ' '), 
                                 'source':t.source, 
                                 'retweetcount': t.retweet_count, 
                                 'hashtags': hashtags })

        
        
    #disconnect the stream if we receive an error message indicating we are overloading Twitter 
    def on_error(self, status_code):
        if status_code == 420:
            #returning False in on_data disconnects the stream
            return False
 

    #initialize the class with a tweepy stream and a file name
    def set_up_file(self, filename):
        self.filename = filename
        #set up the file
        with open(filename,"w", newline="", encoding='utf-8') as tweets:
            tweets.write("AuthorID|Author.Name|Author.Screen_Name|Author.Location|Author.Description|Followers.Count|Friends.Count|Statuses.Count|Language|Coordinates|Created_At|Favorite_count|Geo|Text|Source|retweet_count|Hashtags\n")
     
        
    

In [24]:
#We create and authenticate an instance of our new ```StreamListener_Writer``` class
my_stream_writer = StreamListener_Writer()
#set up the file
my_stream_writer.set_up_file('metoo_streaming.txt')
my_stream = tweepy.Stream(auth = api.auth, listener=my_stream_writer)


In [None]:
# Now, we're ready to start streaming!  We'll look for recent tweets which use the word "data".
# You can pause the display of tweets by interrupting the Python kernel.

my_stream.filter(track=['#metoo'])

In [27]:
# Even if you pause the display of tweets, your stream is still connected to Twitter!
# To disconnect (for example, if you want to change which words you are searching for), 
# use the disconnect() function.

my_stream.disconnect()