# Pull Twitter Data

In [1]:
import tweepy as tw
import pandas as pd
import numpy as np
import time
import requests
from datetime import datetime
import seaborn as sns
from matplotlib import pyplot as plt
import calendar

In [2]:
# Get this from the app that you create on the developer portal
BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAEJSWwEAAAAAW0pE8iFqsh6Z6HMK5SEkqtRbt4E%3D9YcPS68vNQybgeQS2Mv64SKSe8fWbUD5w4iUGUzF1ApIV6LUy0"

In [3]:
# Initialize the API client and tell tweepy to wait if rate limit is met
client = tw.Client(bearer_token=BEARER_TOKEN, wait_on_rate_limit=True)

In [16]:
# Example start and end date
start_date = datetime.strptime("2021-01-01", "%Y-%m-%d")
end_date = datetime.strptime("2021-03-31", "%Y-%m-%d") #change short time period
query_results = []
places = {}
max_results_per_request = 500 # must be between 10 and 500

# Use the paginator to loop over all available tweets in batches of 500
# This pulls 500 tweets at a time until all tweets from the time period are pulled
for i, tweets in enumerate(
    tw.Paginator(
        client.search_all_tweets,
        query="#Covid19vaccine -is:retweet place:USA", #change with only one different key words each time you run
        tweet_fields=["created_at", "geo"],
        place_fields=["place_type", "geo"],
        expansions="geo.place_id",
        max_results=max_results_per_request,
        start_time=start_date,
        end_time=end_date,
        limit=1000 #use with 1000
    )
):
    print(f"Response #{i+1}")
    
    # Store the tweet data
    # Note: 'tweets.data' is a list of the 500 tweets
    query_results += tweets.data
    
        # Keep track of all of the places
    for p in tweets.includes['places']:
        places[p["id"]] = p
    


Response #1


In [17]:
# Convert to a dataframe and make a datetime column
query_results = pd.DataFrame(query_results)
query_results['created_at'] = pd.to_datetime(query_results['created_at'])

# Extract out the place id as its own column
query_results['place_id'] = query_results['geo'].apply(lambda geo: geo['place_id'])

In [18]:
# Convert places dict to a dataframe
places_df = []

# Loop over each place_id
for place_id in places:
    
    # Geo info for each place
    values = places[place_id]
    
    # Convert to a dict
    row = dict(values)
    
    # Save it
    places_df.append(row)
    
# Initialize the dataframe
places_df = pd.DataFrame(places_df).rename(columns={"id": "place_id"})

In [19]:
query_results

Unnamed: 0,created_at,geo,id,text,place_id
0,2021-03-30 17:26:22+00:00,{'place_id': '4ec01c9dbc693497'},1376948930257256451,I have been vaxxed! Start the clock for the se...,4ec01c9dbc693497
1,2021-03-30 16:07:03+00:00,{'place_id': '3b98b02fba3f9753'},1376928970046910467,1st 💉 down. 1 to go. Big shout out to @ACHealt...,3b98b02fba3f9753
2,2021-03-30 00:03:11+00:00,{'place_id': '67d92742f1ebf307'},1376686406370746369,"My son-in-law was among the 13,000 today. So g...",67d92742f1ebf307
3,2021-03-29 20:09:10+00:00,{'place_id': 'de599025180e2ee7'},1376627512692903943,"Got dose #1 today, looking forward to dose #2 ...",de599025180e2ee7
4,2021-03-29 19:45:00+00:00,{'place_id': '4ec01c9dbc693497'},1376621431879000067,@POTUS @CityFoodLover such a great news! 90% o...,4ec01c9dbc693497
...,...,...,...,...,...
187,2021-01-08 20:47:03+00:00,{'place_id': 'dd9c503d6c35364b'},1347646019215962113,Updated #PA #COVID19vaccine plan from @PAHeal...,dd9c503d6c35364b
188,2021-01-08 16:14:16+00:00,{'place_id': '9807c5c5f7a2c6ce'},1347577371277324288,Soooo I finally get to shoot my shot! #COVID19...,9807c5c5f7a2c6ce
189,2021-01-08 05:10:57+00:00,{'place_id': '3b98b02fba3f9753'},1347410441522335744,One of the hardest things of being in a Phase ...,3b98b02fba3f9753
190,2021-01-06 18:21:34+00:00,{'place_id': '2d83c71ce16cd187'},1346884631073845250,Got my vaccine today!!! Doing my part! #COVID1...,2d83c71ce16cd187


In [20]:
places_df

Unnamed: 0,full_name,geo,place_id,place_type
0,"Florida, USA","{'type': 'Feature', 'bbox': [-87.634643, 24.39...",4ec01c9dbc693497,admin
1,"North Carolina, USA","{'type': 'Feature', 'bbox': [-84.3219475, 33.7...",3b98b02fba3f9753,admin
2,"Michigan, USA","{'type': 'Feature', 'bbox': [-90.4181075, 41.6...",67d92742f1ebf307,admin
3,"Ohio, USA","{'type': 'Feature', 'bbox': [-84.8203089, 38.4...",de599025180e2ee7,admin
4,"Virginia, USA","{'type': 'Feature', 'bbox': [-83.67529, 36.540...",5635c19c2b5078d1,admin
5,"Minnesota, USA","{'type': 'Feature', 'bbox': [-97.239256, 43.49...",9807c5c5f7a2c6ce,admin
6,"South Carolina, USA","{'type': 'Feature', 'bbox': [-83.353955, 32.04...",6057f1e35bcc6c20,admin
7,"Colorado, USA","{'type': 'Feature', 'bbox': [-109.060257, 36.9...",e21c8e4914eef2b3,admin
8,"Wisconsin, USA","{'type': 'Feature', 'bbox': [-92.889433, 42.49...",7dc5c6d3bfb10ccc,admin
9,"Georgia, USA","{'type': 'Feature', 'bbox': [-85.605166, 30.35...",7142eb97ae21e839,admin


In [21]:
# Merge in the places geo data and the query results
query_results_final = query_results.merge(places_df, how='left', on='place_id')
query_results_final=query_results_final.drop(['geo_x', 'id','place_id','geo_y','place_type'], axis=1)
query_results_final['created_at']=query_results_final['created_at'].dt.strftime('%d-%m-%Y')

In [22]:
query_results_final.head()

Unnamed: 0,created_at,text,full_name
0,30-03-2021,I have been vaxxed! Start the clock for the se...,"Florida, USA"
1,30-03-2021,1st 💉 down. 1 to go. Big shout out to @ACHealt...,"North Carolina, USA"
2,30-03-2021,"My son-in-law was among the 13,000 today. So g...","Michigan, USA"
3,29-03-2021,"Got dose #1 today, looking forward to dose #2 ...","Ohio, USA"
4,29-03-2021,@POTUS @CityFoodLover such a great news! 90% o...,"Florida, USA"


In [33]:
query_results_final.to_csv('pullnew.csv',index=False,encoding='utf-8')