In [43]:
import tweepy
import pandas as pd
import time
import csv

In [44]:
# Twitter Developer Credentials
consumer_key        = "consumer_key"
consumer_secret     = "consumer_secret"
access_token        = "access_token"
access_token_secret = "access_token_secret"

In [45]:
# tweepy authorization 
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

# Getting More Information From Tweets

* tweet.user.id_str = User Id of tweets author
* tweet.text = Text content of the tweet
* tweet.created_at = Date tweet was created
* tweet.user.name = Name of the user as they have defined it
* tweet.user.screen_name = Username of tweets author, commonly called user @name
* tweet.user.location = User defined location for account's profile. May be nullable
* tweet.coordinates = Geographic location as reported by user or client. May be null
* tweet.place = Place associated with the tweet where user signed up with like Plainsboro, NJ. May be null
* tweet.lang = Indicates the machine detected language of tweet text

In [54]:
text_query = 'flood'
max_tweets = 3150
coordinates = '42.851306,-96.127041,100mi'
language = 'en'
result_type = 'recent'
until_date = '2021-05-13'


# Creation of query method using parameters
tweets = tweepy.Cursor(api.search,
                       q=text_query,
                       geocode=coordinates, 
                       lang=language, 
                       result_type = result_type, 
                       until = until_date,
                       count = 100,
                       tweet_mode='extended').items(max_tweets)
 
# Pulling information from tweets iterable object
# Add or remove tweet information you want in the below list comprehension
tweets_list = [[tweet.user.id_str,
                tweet.full_text, 
                tweet.created_at,  
                tweet.user.name, 
                tweet.user.screen_name, 
                tweet.user.location,
                tweet.coordinates,
                tweet.place,
                tweet.lang] for tweet in tweets]
 
# Creation of dataframe from tweets_list
# Did not include column names to simplify code 
tweets_df = pd.DataFrame(tweets_list)

# Creation of dataframe from tweets_list
# Add or remove columns as you remove tweet information
tweets_df = pd.DataFrame(tweets_list,columns=['Tweet User Id', 
                                              'Tweet Text', 
                                              'Tweet Time',
                                              'Tweet User Name', 
                                              'Twitter @ Name',
                                              'Tweet User Location',
                                              'Tweet Coordinates', 
                                              'Place Info', 
                                              'Language'])

In [55]:
# Function created to extract coordinates from tweet if it has coordinate info
# Tweets tend to have null so important to run check
# Make sure to run this cell as it is used in a lot of different functions below

def extract_coordinates(row):
    if row['Tweet Coordinates']:
        return row['Tweet Coordinates']['coordinates']
    else:
        return None
# Function created to extract place such as city, state or country from tweet if it has place info
# Tweets tend to have null so important to run check
# Make sure to run this cell as it is used in a lot of different functions below
def extract_place(row):
    if row['Place Info']:
        return row['Place Info'].full_name
    else:
        return None

In [56]:
# Checks if there are coordinates attached to tweets, if so extracts them
tweets_df['Tweet Coordinates'] = tweets_df.apply(extract_coordinates,axis=1)
 
# Checks if there is place information available, if so extracts them
tweets_df['Place Info'] = tweets_df.apply(extract_place,axis=1)

In [57]:
tweets_df.to_csv("tweets_df_USGS_line144", index=False)

# Example of a search query using advanced queries:

In [None]:
# Example may no longer show tweets if until_date falls outside 
# of 7-day period from when you run cell
coordinates = '19.402833,-99.141051,50mi'
language = 'en'
result_type = 'recent'
until_date = '2020-08-10'
max_tweets = 150
 
# Creation of query method using parameters
tweets = tweepy.Cursor(api.search, geocode=coordinates, lang=language, result_type = result_type, until = until_date, count = 100).items(max_tweets)
 
# List comprehension pulling chosen tweet information from tweets iterable object
# Add or remove tweet information you want in the below list comprehension
tweets_list = [[tweet.text, tweet.created_at, tweet.id_str, tweet.favorite_count, tweet.user.screen_name, tweet.user.id_str, tweet.user.location, tweet.user.url, tweet.user.verified, tweet.user.followers_count, tweet.user.friends_count, tweet.user.statuses_count, tweet.user.default_profile_image, 
tweet.lang] for tweet in tweets]
 
# Creation of dataframe from tweets_list
# Did not include column names to simplify code 
tweets_df = pd.DataFrame(tweets_list)

# Example of a query pulling tweet and user information with an advanced query:

In [None]:
text_query = 'Coronavirus'
coordinates = '36.169786,-115.139858,50mi'
max_tweets = 150
 
# Creation of query method using parameters
tweets = tweepy.Cursor(api.search, q = text_query, geocode = coordinates, count = 100).items(max_tweets)
 
# Pulling information from tweets iterable object
# Add or remove tweet information you want in the below list comprehension
tweets_list = [[tweet.text, tweet.created_at, tweet.id_str, tweet.favorite_count, tweet.user.screen_name, tweet.user.id_str, tweet.user.location, tweet.user.followers_count, tweet.coordinates, tweet.place] for tweet in tweets]
 
# Creation of dataframe from tweets_list
# Did not include column names to simplify code
tweets_df = pd.DataFrame(tweets_list)