In [30]:
# Run the pip install command below if you don't already have the library
# !pip install git+https://github.com/JustAnotherArchivist/snscrape.git

import os
import math
import pandas as pd
import json
from pandas import json_normalize

import snscrape.modules.twitter as sntwitter
import itertools

from datetime import datetime
from random import randrange

In [31]:
# Search terms: Content in the tweet
search_terms = [
    'coronavirus',
    'covid19',
    'pandemic',
    'quarantine',
]

# Search hashtags: Hashtags in the tweet
search_tags = []

# All cities in all states
cities_json = open('us_cities.json')
us_cities = pd.DataFrame(json.load(cities_json))

# Clean and convert population from string to int
us_cities['2020census'] = us_cities['2020census'].str.replace(',', '').astype(int)

# Cities in California
ca_cities = us_cities[us_cities['State'] == 'California'][['City', '2020census']]

# Los Angeles will be our test city
idv = ca_cities.iloc[0]

# Creates a search parameter for the Twitter scraper
def search_builder(terms, city, since_date, until_date):    
    s = ''
    # Concatenate all search terms
    for i in range(len(terms) - 1):
        # NOTE: Inclusive OR statement
        s += '\"' + terms[i] + '\" OR '
    s += '\"' + terms[len(terms) - 1] + '\" '    
    
    # Within or the near the city
    s += 'near: \"' + city + '\" '
    # Since the first date
    s += 'since:' + since_date + ' '
    # Until the second date
    s += 'until:' + until_date
    
    return s

# Returns a DataFrame with at most tweet_limit tweets resulting from
# the search
def scrape(search_param, tweet_limit):
    return pd.DataFrame(itertools.islice(
        sntwitter.TwitterSearchScraper(search_param).get_items(), tweet_limit))

# Search for tweets in Los Angeles mentioning COVID terms
# from the beginning to the end of 2020.
los_angeles_search = search_builder(
    search_terms,
    "Los Angeles",
    '2020-01-01',
    '2020-12-31')

print(los_angeles_search)

"coronavirus" OR "covid19" OR "pandemic" OR "quarantine" near: "Los Angeles" since:2020-01-01 until:2020-12-31


In [32]:
tweet_count = 100

start_time = datetime.now()
print("[START SEARCH]: " + str(start_time))

# Start scraping
los_angeles_tweets = scrape(los_angeles_search, tweet_count)

end_time = datetime.now()
print("[END SEARCH]: " + str(end_time))

# Time needed to execute scrape
print("[TIME ELAPSED]: " + str(end_time - start_time))

# Let's take a look at the returned DataFrame
los_angeles_tweets

[START SEARCH]: 2021-10-18 09:24:18.352512
[END SEARCH]: 2021-10-18 09:24:21.611880
[TIME ELAPSED]: 0:00:03.259368


Unnamed: 0,url,date,content,renderedContent,id,user,replyCount,retweetCount,likeCount,quoteCount,...,media,retweetedTweet,quotedTweet,inReplyToTweetId,inReplyToUser,mentionedUsers,coordinates,place,hashtags,cashtags
0,https://twitter.com/sadiwahyuhwhats/status/134...,2020-12-21 02:28:06+00:00,Los Angeles County has continued to report sin...,Los Angeles County has continued to report sin...,1340846478521151488,"{'username': 'sadiwahyuhwhats', 'id': 13368384...",1,0,0,0,...,,,,1.340846e+18,"{'username': 'sadiwahyuhwhats', 'id': 13368384...",,,,,
1,https://twitter.com/voyagerthreebot/status/134...,2020-12-18 21:48:29+00:00,Conjunction viewing in southern california? Bo...,Conjunction viewing in southern california? Bo...,1340051332439429121,"{'username': 'voyagerthreebot', 'id': 26969353...",0,0,0,0,...,,,,,,,,,,
2,https://twitter.com/FoodA0808188824/status/134...,2020-12-18 18:59:26+00:00,@1776Dairenn The idea that ICU's in NYC or Los...,@1776Dairenn The idea that ICU's in NYC or Los...,1340008789970399233,"{'username': 'FoodA0808188824', 'id': 76865922...",0,0,1,0,...,,,,1.340007e+18,"{'username': '1776Dairenn', 'id': 117988875242...","[{'username': '1776Dairenn', 'id': 11798887524...",,,,
3,https://twitter.com/melroseaction/status/13397...,2020-12-17 23:34:15+00:00,TENSE TIMES:: BREAKING:: State of California D...,TENSE TIMES:: BREAKING:: State of California D...,1339715563031994369,"{'username': 'melroseaction', 'id': 41006064, ...",7,1,7,1,...,[{'previewUrl': 'https://pbs.twimg.com/media/E...,,,,,,,,[Covid19],
4,https://twitter.com/AsmMuratsuchi/status/13389...,2020-12-15 19:02:27+00:00,We are at a very serious time in the #COVID19 ...,We are at a very serious time in the #COVID19 ...,1338922384586203137,"{'username': 'AsmMuratsuchi', 'id': 8312793820...",1,1,0,0,...,,,,,,,,,[COVID19],
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,https://twitter.com/helenaironline/status/1309...,2020-09-26 19:35:06+00:00,Aerial footage over the Bobcat Fire near Los A...,Aerial footage over the Bobcat Fire near Los A...,1309939575326994432,"{'username': 'helenaironline', 'id': 12568062,...",0,0,0,0,...,,,,,,,,,,
96,https://twitter.com/WiStateJournal/status/1309...,2020-09-26 19:35:06+00:00,Aerial footage over the Bobcat Fire near Los A...,Aerial footage over the Bobcat Fire near Los A...,1309939575255695361,"{'username': 'WiStateJournal', 'id': 15752446,...",0,0,0,0,...,,,,,,,,,,
97,https://twitter.com/WinonaDailyNews/status/130...,2020-09-26 19:35:06+00:00,Aerial footage over the Bobcat Fire near Los A...,Aerial footage over the Bobcat Fire near Los A...,1309939575134060544,"{'username': 'WinonaDailyNews', 'id': 19304488...",0,0,0,0,...,,,,,,,,,,
98,https://twitter.com/Dispatch_Argus/status/1309...,2020-09-26 19:35:06+00:00,Aerial footage over the Bobcat Fire near Los A...,Aerial footage over the Bobcat Fire near Los A...,1309939575133962240,"{'username': 'Dispatch_Argus', 'id': 50000303,...",0,0,0,0,...,,,,,,,,,,


In [34]:
# Now, let's look at the content and hashtags of five random tweets
for i in range(5):
    tweet = los_angeles_tweets.iloc[randrange(tweet_count)]
    print(str(i) + ": " + str(tweet['content']))
    print("\t" + str(tweet['hashtags']) + "\n")

0: TENSE TIMES:: BREAKING:: State of California Data Says Los Angeles County Surpassed a STAGGERING 5100 #Covid19 Hospitalizations Today And News Reports of NEAR ZERO ICU Capacity.  Are We At A Crisis Point?  Shouldn’t We Totally Shut Down? https://t.co/ilIv7eEMeF
	['Covid19']

1: Its Dec, Los Angeles is on near March level lock down #COVID19, lots of stresses and Im finishing up a late manuscript so yes, #christmasmusic is happening, First song made me tear up, #MedTwitter what Christmas/holiday songs make you tear up every time?!? #imnotcryingyourecrying https://t.co/4LeefBzdsv
	['COVID19', 'christmasmusic', 'MedTwitter', 'imnotcryingyourecrying']

2: Aerial footage over the Bobcat Fire near Los Angeles earlier this week shows the massive wildfire sending smoke into the night sky, Brazil is hoping to roll out a potential coronavirus vaccine by December, video from China's air force shows… https://t.co/X7Kllm34w8
	None

3: Access to #COVID19 testing should be easy. 
That's why we prov