# _Test Notebook_

This notebook is going to serve as a playground of sorts in order to build something that will connect to the Twitter API, stream X number of tweets, and then sleep for X minutes before streaming again. Additionally this will be done for a predefined time period. 

Alexander Galea's [blog](https://galeascience.wordpress.com/2016/03/18/collecting-twitter-data-with-python/) was instrumental in getting this going. A lot of the work below is adapted directly from his GitHub.

In [43]:
import os

# what is our current working directory?
print("Our current working directory is '{}'.".format(os.getcwd()))

# import Path to make working with directory even more manageable
from pathlib import Path

# store our main directory path in a variable in case we need to access/download information in that locaton directly
PATH = Path(os.getcwd())
print(PATH)

Our current working directory is '/Users/jai/Documents/projects/twitter-politics'.
/Users/jai/Documents/projects/twitter-politics


In [99]:
# import personal tools
from joetools import private
from textblob import TextBlob
import sqlite3
import dataset
import tweepy
import time

In [107]:
# setup tweepy to authenticate with Twitter with the following code
auth = tweepy.OAuthHandler(private.TWITTER_APP_KEY, private.TWITTER_APP_SECRET)
auth.set_access_token(private.TWITTER_KEY, private.TWITTER_SECRET)
# create an API object to pull data from Twitter, pass in the authentication from above
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
print('API Set-up!')

# connect to database
db = dataset.connect(private.CONNECTION_STRING)
print('Database connected; defining MyStreamListener')

API Set-up!
Database connected; defining MyStreamListener


In [103]:
collection = {}

In [112]:
#override tweepy.StreamListener to add logic to on_status
class MyStreamListener(tweepy.StreamListener):
    def __init__(self, api=None):
        super(MyStreamListener, self).__init__()
        self.num_tweets = 0

    def on_status(self, status):
        # we don't want retweets
        if status.retweeted:
            return
        
        description = status.user.description
        loc = status.user.location
        text = status.text
        coords = status.coordinates
        geo = status.geo
        name = status.user.screen_name
        user_created = status.user.created_at
        followers = status.user.followers_count
        id_str = status.id_str
        created = status.created_at
        retweets = status.retweet_count
        bg_color = status.user.profile_background_color
        blob = TextBlob(text)
        sent = blob.sentiment

        if geo is not None:
            geo = json.dumps(geo)

        if coords is not None:
            coords = json.dumps(coords)

        table = db[private.TABLE_NAME]
        self.num_tweets += 1
        if self.num_tweets < 20:
            try:
                table.insert(dict(
                    user_description=description,
                    user_location=loc,
                    coordinates=coords,
                    text=text,
                    geo=geo,
                    user_name=name,
                    user_created=user_created,
                    user_followers=followers,
                    id_str=id_str,
                    created=created,
                    retweet_count=retweets,
                    user_bg_color=bg_color,
                    polarity=sent.polarity,
                    subjectivity=sent.subjectivity
                ))
                return True
            except ProgrammingError as err:
                print(err)
        else:
                return False

    def on_error(self, status_code):
        if status_code == 420:
            #returning False in on_data disconnects the stream
            return False

In [113]:
stream_listener = MyStreamListener()
stream = tweepy.Stream(auth=api.auth, listener=stream_listener)
stream.filter(track=private.TRACK_TERMS)

In [114]:
from joetools import private
import datafreeze
from datafreeze import freeze
import tweepy
import dataset
from textblob import TextBlob

db = dataset.connect(private.CONNECTION_STRING)

result = db[private.TABLE_NAME].all()
freeze(result, format='csv', filename=private.CSV_NAME)

In [92]:
import sqlite3

conn = sqlite3.connect('tweets.db')

In [93]:
c = conn.cursor()
c.execute('''CREATE TABLE tweets
    (tweetText text,
    user text,
    followers integer,
    date text,
    location text)''')
conn.commit()
conn.close()

In [94]:
import time 

print("printed immediately")
time.sleep(15)
print('Printed after 15 seconds')

printed immediately
Printed after 15 seconds


In [97]:
while time.sleep(5) is True:
    print('True')
    break

In [None]:
# function that does API set-up
def load_api():
    '''Loads Twitter API and authorizes user'''
    consumer_key = secrets.TWITTER_API_KEY
    consumer_secret = secrets.TWITTER_API_SECRET
    access_token = secrets.TWITTER_TOKEN
    access_secret = secrets.TWITTER_TOKEN_SECRET
    auth 

In [84]:
search_phrases = ["trump",  "pelosi", "impeachment"]

In [None]:
def directory_setup():
    '''function that updates directory to one we want to download data to'''
    # print out the current working directory
    print('Current working directory: {}'.format(os.getcwd()))
    print('')
    print('Is this the directory you want to use? Enter either YES or NO.')
    user_input1 = input().lower()
    while user_input1 not in ['yes', 'no']:
        print("Invalid response, please try again (Please choose YES or NO)")
        user_input1 = input("Choose you answer: ")

In [None]:
directory_setup()

In [85]:
for search_phrase in search_phrases:
    #print('Search phrase =', search_phrase)
    ''' other variables '''
    name = search_phrase.split()
    print(name)

['trump']
['pelosi']
['impeachment']


In [46]:
PATH.exists()

True

In [47]:
PATH.is_dir()

True

In [49]:
PATH.home()

PosixPath('/Users/jai')

In [50]:
PATH.cwd()

PosixPath('/Users/jai/Documents/projects/twitter-politics')

In [48]:
[x for x in PATH.iterdir() if x.is_dir()]

[PosixPath('/Users/jai/Documents/projects/twitter-politics/joetools'),
 PosixPath('/Users/jai/Documents/projects/twitter-politics/.ipynb_checkpoints'),
 PosixPath('/Users/jai/Documents/projects/twitter-politics/data')]

In [87]:
for search_phrase in search_phrases:
    #print('Search phrase =', search_phrase)
    ''' other variables '''
    name = search_phrase.split()
    if len(name) > 1:
        json_file_root = name[0] + '_' + name[1]
    else:
        json_file_root = str(name[0])
    
    PATH.mkdir(json_file_root, exist_ok=True)

TypeError: an integer is required (got type str)