# Scraper for Twitter using Tweepy

Package Github: https://github.com/tweepy/tweepy

Package Documentation: https://tweepy.readthedocs.io/en/latest/

### Notebook Author: Martin Beck

In [3]:
# Pip install Tweepy if you don't already have the package
# !pip install tweepy

# Imports
import tweepy
import pandas as pd
import time

## Credentials and Authorization

In [4]:
# Credentials

consumer_key = "XXXXXX"
consumer_secret = "XXXXXX"
access_token = "XXXXXX"
access_token_secret = "XXXXXX"

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)

## Query by Username
Creation of queries using Tweepy API

Function is focused on completing the query then providing a CSV file of that query using pandas

In [5]:
tweets = []

def username_tweets_to_csv(username,count):
    try:      
        # Creation of query method using parameters
        tweets = tweepy.Cursor(api.user_timeline,id=username).items(count)

        # Pulling information from tweets iterable object
        tweets_list = [[tweet.created_at, tweet.id, tweet.text] for tweet in tweets]

        # Creation of dataframe from tweets list
        # Add or remove columns as you remove tweet information
        tweets_df = pd.DataFrame(tweets_list,columns=['Datetime', 'Tweet Id', 'Text'])

        # Converting dataframe to CSV 
        tweets_df.to_csv('{}-tweets.csv'.format(username), sep=',', index = False)

    except BaseException as e:
          print('failed on_status,',str(e))
          time.sleep(3)

In [6]:
# Input username to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
username = 'jack'
count = 150

# Calling function to turn username's past X amount of tweets into a CSV file
username_tweets_to_csv(username, count)

## Query by Text Search
Function is focused on completing the query then providing a CSV file of that query using pandas

In [7]:
tweets = []

def text_query_to_csv(text_query,count):
    try:
        # Creation of query method using parameters
        tweets = tweepy.Cursor(api.search,q=text_query).items(count)

        # Pulling information from tweets iterable object
        tweets_list = [[tweet.created_at, tweet.id, tweet.text] for tweet in tweets]

        # Creation of dataframe from tweets list
        # Add or remove columns as you remove tweet information
        tweets_df = pd.DataFrame(tweets_list,columns=['Datetime', 'Tweet Id', 'Text'])

        # Converting dataframe to CSV 
        tweets_df.to_csv('{}-tweets.csv'.format(text_query), sep=',', index = False)

    except BaseException as e:
        print('failed on_status,',str(e))
        time.sleep(3)

In [8]:
# Input search query to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
text_query = 'USA Election 2020'
count = 150

# Calling function to query X amount of relevant tweets and create a CSV file
text_query_to_csv(text_query, count)