In [1]:
# Run the pip install command below if you don't already have the library
# !pip install git+https://github.com/JustAnotherArchivist/snscrape.git

import os
import math
import pandas as pd
import json
from pandas import json_normalize

import snscrape.modules.twitter as sntwitter
import itertools

from datetime import datetime
from random import randrange
import shutil

In [2]:
# Search terms: Content in the tweet
search_terms = [
    'coronavirus',
    'covid19',
    'pandemic',
    'quarantine',
]

# Creates a search parameter for the Twitter scraper
# since_date: inclusive
# until_date: exclusive
def search_builder(terms, city, since_date, until_date):    
    s = ''
    # Concatenate all search terms
    for i in range(len(terms) - 1):
        # NOTE: Inclusive OR statement
        s += '\"' + terms[i] + '\" OR '
    s += '\"' + terms[len(terms) - 1] + '\" '    
    
    # Within or the near the city
    s += 'near:\"' + city + '\" '
    
    #s += 'place:96683cc9126741d1 '

    
    # Since the first date
    s += 'since:' + since_date + ' '
    # Until the second date
    s += 'until:' + until_date
    
    return s

# Returns a DataFrame with at most tweet_limit tweets resulting from
# the search
def scrape(search_param, tweet_limit):
    return pd.DataFrame(itertools.islice(
        sntwitter.TwitterSearchScraper(search_param).get_items(), tweet_limit))

# Search for tweets in Los Angeles mentioning COVID terms
# from the beginning to the end of 2020.
los_angeles_search = search_builder(
    search_terms,
    "Los Angeles",
    '2020-01-01',
    '2020-12-31')

print(los_angeles_search)

"coronavirus" OR "covid19" OR "pandemic" OR "quarantine" near:"Los Angeles" since:2020-01-01 until:2020-12-31


In [3]:
# NOTE: Change this variable for number of tweets generated
tweet_limit = 100
since_date = '2021-01-01'
until_date = '2021-02-01'

total_tweets = 0

# NOTE: Below process with 10000 items for California takes about 45 - 60 minutes to run

print("Compiling tweets (max <=" + str(tweet_limit) + ") from California counties from " + since_date + " to " + until_date)

print("START: " + str(datetime.now()))

ca_counties = pd.read_csv("ca_counties.csv")

for i in range(len(ca_counties)):
    
    county = ca_counties.iloc[i]['County']
    
    county_search = search_builder(
        search_terms,
        county + ", CA",
        since_date,
        until_date)
    
    county_tweets = scrape(county_search, tweet_limit)
    
    total_tweets = total_tweets + len(county_tweets)
    
    # Generates JSON with respective county tweet data
    county_json = county_tweets.to_json(county + ".json", lines=True, orient='records')
    
    # Moves file to data/ca_tweets/ directory
    source = os.getcwd() + "/" + county + ".json"
    destination = os.getcwd() + "/data/ca_tweets/" + county + ".json"
    shutil.move(source, destination)
    
    print("(" + str(i + 1) + "/" + str(len(ca_counties)) + ") " + county + " - " + str(len(county_tweets)) + " tweets: " + str(datetime.now()))
    
print("END: " + str(datetime.now()))
print(str(total_tweets) + " tweets processed")

Compiling tweets (max <=100) from California counties from 2021-01-01 to 2021-02-01
START: 2021-10-25 11:22:32.470603
(1/58) Alameda County - 100 tweets: 2021-10-25 11:22:37.776989
(2/58) Alpine County - 100 tweets: 2021-10-25 11:22:41.087196
(3/58) Amador County - 0 tweets: 2021-10-25 11:22:45.778644
(4/58) Butte County - 11 tweets: 2021-10-25 11:22:47.255328
(5/58) Calaveras County - 0 tweets: 2021-10-25 11:22:48.975574
(6/58) Colusa County - 100 tweets: 2021-10-25 11:22:51.935583
(7/58) Contra Costa County - 100 tweets: 2021-10-25 11:22:58.118924
(8/58) Del Norte County - 0 tweets: 2021-10-25 11:22:59.614043
(9/58) El Dorado County - 9 tweets: 2021-10-25 11:23:01.161568
(10/58) Fresno County - 100 tweets: 2021-10-25 11:23:05.273577
(11/58) Glenn County - 100 tweets: 2021-10-25 11:23:08.365838
(12/58) Humboldt County - 4 tweets: 2021-10-25 11:23:09.851139
(13/58) Imperial County - 20 tweets: 2021-10-25 11:23:11.592046
(14/58) Inyo County - 100 tweets: 2021-10-25 11:23:14.681287
(15/5

# References
## California Counties and Cities: https://www.counties.org/cities-within-each-county