Code to return Tweets with the word 'Parma' within 100km's of Melbourne from twitter via their API.

In [76]:
# Pip install GetOldTweets3 if you don't already have the package
#!pip install GetOldTweets3

# Imports
import GetOldTweets3 as got


In [77]:
import re
import io
import csv
import tweepy
from tweepy import OAuthHandler
import pandas as pd 
#TextBlob perform simple natural language processing tasks.
#from textblob import TextBlob


In [78]:
#get credentials
creds = pd.read_csv('/Users/jacksmac/twittercreds.csv')

In [79]:
consumer_key = creds.consumer_key[0]
consumer_secret = creds.consumer_secret[0]
access_token = creds.access_token[0]
access_token_secret = creds.access_token_secret[0]
# create OAuthHandler object
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
api = tweepy.API(auth)




In [81]:
# Function created to extract coordinates from tweet if it has coordinate info
# Tweets tend to have null so important to run check
# Make sure to run this cell as it is used in a lot of different functions below
def extract_coordinates(row):
    if row['Tweet Coordinates']:
        return row['Tweet Coordinates']['coordinates']
    else:
        return None

# Function created to extract place such as city, state or country from tweet if it has place info
# Tweets tend to have null so important to run check
# Make sure to run this cell as it is used in a lot of different functions below
def extract_place(row):
    if row['Place Info']:
        return row['Place Info'].full_name
    else:
        return None
    


In [82]:
##function to scrape text from twitter and export csv with tweets
##default values are tweets 100km around melbourne
##with the word Parma
##excluding retweets

def scrape_text_query( max_tweets,latitude_radius="-37.840935,144.946457,100km",text_query='parma -filter:retweets'):
    # Creation of query method using parameters
    #define place
    tweets = tweepy.Cursor(api.search,geocode=latitude_radius, q=text_query , lang='en',tweet_mode='extended').items(max_tweets)

    # List comprehension pulling chosen tweet information from tweets iterable object
    # Add or remove tweet information you want in the below list comprehension
    tweets_list = [[tweet.full_text, tweet.created_at, tweet.id_str, tweet.user.screen_name, tweet.coordinates,
               tweet.place, tweet.retweet_count, tweet.favorite_count, tweet.lang,
               tweet.source, tweet.in_reply_to_status_id_str, 
                tweet.in_reply_to_user_id_str, tweet.is_quote_status,
                ] for tweet in tweets]

    # Creation of dataframe from tweets_list
    # Add or remove columns as you remove tweet information
    tweets_df = pd.DataFrame(tweets_list,columns=['Tweet Text', 'Tweet Datetime', 'Tweet Id', 'Twitter @ Name', 'Tweet Coordinates', 'Place Info',
                                                 'Retweets', 'Favorites', 'Language', 'Source', 'Replied Tweet Id',
                                                  'Replied Tweet User Id Str', 'Quote Status Bool'])

    # Checks if there are coordinates attached to tweets, if so extracts them
    tweets_df['Tweet Coordinates'] = tweets_df.apply(extract_coordinates,axis=1)
    
    # Checks if there is place information available, if so extracts them
    tweets_df['Place Info'] = tweets_df.apply(extract_place,axis=1)

    # Uncomment/comment below lines to decide between creating csv or excel file 
    tweets_df.to_csv('{}-tweets.csv'.format(text_query), sep=',', index = False)

In [83]:
# Input search query to scrape tweets and name csv file
query = 'fire -filter:retweets'

##latitude,radius, distance
##melbourne, 1000km
lat_r = "-37.840935,144.946457,1000km"

# Max recent tweets pulls x amount of most recent tweets 
tweets = 150

# Function scrapes for tweets containing text_query, attempting to pull max_tweet amount and create csv/excel file containing data.
scrape_text_query(text_query=query,latitude_radius=lat_r, max_tweets=tweets)

In [84]:
##default test
tweets = 200
scrape_text_query(max_tweets=tweets)