# Requires 
[dataset](https://dataset.readthedocs.io/en/latest/)  
[tweepy](http://tweepy.readthedocs.io/en/v3.5.0/)  

`pip install tweepy`  
`pip install dataset`

In [None]:
import tweepy
import dataset
import json

# Fill in your twitter API keys and tokens  
If you need to get access to the API signup [here](https://dev.twitter.com/)

In [None]:
CONSUMER_KEY = "<Placeholder>"
CONSUMER_SECRET = "<Placeholder>"
ACCESS_TOKEN = "<Placeholder>"
ACCESS_TOKEN_SECRET = "<Placeholder>"

# Create a [StreamListener](http://docs.tweepy.org/en/v3.5.0/streaming_how_to.html)

In [None]:
class StreamListener(tweepy.StreamListener):
    def __init__(self, api=None):
        super(StreamListener, self).__init__()
        self.num_tweets = 0
        
    def on_status(self, status):
        '''This function is called each time a new tweet is sent to subscribed stream'''
        
        # stop once limit is hit
        if self.num_tweets >= LIMIT:
            print('\n \n Stored {} tweets {}'.format(LIMIT, db))
            return False
        
        # skip retweets
        if hasattr(status, 'retweeted_status'):
            return
        
        
        else:
            # count tweet 
            self.num_tweets += 1
            if PRINT_TO_NOTEBOOK == 'Y':
                print(status.text)
            
            # check for hashtags and save as list
            if hasattr(status, 'entities'):
                hashtags = []
                for tag in status.entities['hashtags']:
                    hashtags.append(tag['text'])
                hashtags = json.dumps(hashtags)

            # build dictionary of elements you want to save
            # Just some of many available  fields
            # https://dev.twitter.com/overview/api/tweets
            # https://dev.twitter.com/overview/api/users
            
            tweet_dict = {
                'description': status.user.description,
                'loc': status.user.location,
                'text': status.text,
                'name': status.user.screen_name,
                'user_created': status.user.created_at,
                'followers': status.user.followers_count,
                'id_str': status.id_str,
                'retweet_count': status.retweet_count,
                'friends_count': status.user.friends_count,
                'hashtags': hashtags
            }
            
            store_tweet(tweet_dict)
        
    def on_error(self, status_code):
        '''Twitter is rate limiting, exit'''

        if status_code == 420:
            print('Twitter rate limit error_code {}, exiting...'.format(status_code))
            return False

In [None]:
# Save tweet to sqlite db
def store_tweet(tweet_dict):
    # set table
    table = db['tweets']
    table.insert(tweet_dict)

### Configuration:
`TOPICS` = topics you want to follow/stream  
`LIMIT` = STOP after X number of tweets collected  
`DATABASE_NAME` = name of SQLITE database  
`PRINT_TO_NOTEBOOK` = If `Y` prints tweet text to notbook (turn off for large # of tweets)

In [None]:
TOPICS = ["baseball", "football"]
LIMIT = 10
DATABASE_NAME = 'tweet_stream'
PRINT_TO_NOTEBOOK = 'Y'


# Setup connection & database

In [None]:
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

stream_listener = StreamListener()
stream = tweepy.Stream(auth=api.auth, listener=stream_listener)
db = dataset.connect('sqlite:///{}.sqlite'.format(DATABASE_NAME))

# Try it:

In [None]:
# Starts the stream
stream.filter(track=TOPICS)