# Scraper for Twitter using GetOldTweets3

Package: https://github.com/Mottl/GetOldTweets3

### Notebook Author: Martin Beck

In [1]:
# Pip install GetOldTweets3 if you don't already have the package
# !pip install GetOldTweets3

# Imports
import GetOldTweets3 as got
import pandas as pd

## Query by Username
Creation of queries using GetOldTweets3

Function is focused on completing the query then providing a CSV file of that query using pandas

In [31]:
# Function the pulls tweets from a specific username and turns to csv file

# Parameters: (list of twitter usernames), (max number of most recent tweets to pull from)
def username_tweets_to_csv(username, count):
    # Creation of query object
    tweetCriteria = got.manager.TweetCriteria().setUsername(username)\
                                            .setMaxTweets(count)
    # Creation of list that contains all tweets
    tweets = got.manager.TweetManager.getTweets(tweetCriteria)

    # Creating list of chosen tweet data
    user_tweets = [[tweet.date, tweet.text] for tweet in tweets]

    # Creation of dataframe from tweets list
    tweets_df = pd.DataFrame(user_tweets, columns = ['Datetime', 'Text'])

    # Converting dataframe to CSV
    tweets_df.to_csv('{}-{}k-tweets.csv'.format(username, int(count/1000)), sep=',')

## Query by Text Search
Function is focused on completing the query then providing a CSV file of that query using pandas

In [34]:
# Function that pulls tweets based on a general search query and turns to csv file

# Parameters: (text query you want to search), (max number of most recent tweets to pull from)
def text_query_to_csv(text_query, count):
    # Creation of query object
    tweetCriteria = got.manager.TweetCriteria().setQuerySearch(text_query)\
                            .setMaxTweets(count)
    # Creation of list that contains all tweets
    tweets = got.manager.TweetManager.getTweets(tweetCriteria)

    # Creating list of chosen tweet data
    text_tweets = [[tweet.date, tweet.text] for tweet in tweets]

    # Creation of dataframe from tweets
    tweets_df = pd.DataFrame(text_tweets, columns = ['Datetime', 'Text'])

    # Converting tweets dataframe to csv file
    tweets_df.to_csv('{}-{}k-tweets.csv'.format(text_query, int(count/1000)), sep=',')

## Query Function Calls
Putting it all together and using functions created.

In [32]:
# Input username(s) to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
username = 'jack'
count = 20000

# Calling function to turn username's past x amount of tweets into a CSV file
username_tweets_to_csv(username, count)

In [29]:
# Input search query to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
text_query = 'USA Election 2020'
count = 5000

# Calling function to query X amount of relevant tweets and create a CSV file
text_query_to_csv(text_query, count)

In [10]:
text_query='#covid19'
count = 2000
location='New York'
since = '2020-03-18'
until = '2020-03-23'

In [11]:
# Function that pulls tweets based on a general search query and turns to csv file

# Parameters: (text query you want to search), (max number of most recent tweets to pull from)

# Creation of query object
tweetCriteria = got.manager.TweetCriteria().setQuerySearch(text_query)\
                        .setMaxTweets(count).setNear(location).setSince(since).setUntil(until)
# Creation of list that contains all tweets
tweets = got.manager.TweetManager.getTweets(tweetCriteria)

# Creating list of chosen tweet data
text_tweets = [[tweet.date, tweet.text] for tweet in tweets]

# Creation of dataframe from tweets
tweets_df = pd.DataFrame(text_tweets, columns = ['Datetime', 'Text'])

In [12]:
tweets_df

Unnamed: 0,Datetime,Text
0,2020-03-22 23:59:54+00:00,#coronavirus #COVIDー19 #COVID19 #besafe #bemin...
1,2020-03-22 23:59:45+00:00,"Haha, I even washed the packaged fish before i..."
2,2020-03-22 23:59:21+00:00,"Our office may be closed, but our intake lines..."
3,2020-03-22 23:59:20+00:00,"You go, Joe. I agree with this recommendation...."
4,2020-03-22 23:59:14+00:00,NYC healthcare workers treating #CoVID19 face ...
5,2020-03-22 23:59:08+00:00,http://medium.com/@paulette_26842/an-open-lett...
6,2020-03-22 23:58:39+00:00,#nyc #covid19 #coronavirus
7,2020-03-22 23:58:39+00:00,@CTICU_NYP @HeadRNColumbia Proud to be working...
8,2020-03-22 23:58:10+00:00,We stand by Italy during these trying times. S...
9,2020-03-22 23:57:34+00:00,How can we make sure that #UHC includes water ...
