In [69]:
# Run the pip install command below if you don't already have the library
# !pip install git+https://github.com/JustAnotherArchivist/snscrape.git

import os
import math
import pandas as pd
import json
from pandas import json_normalize

import snscrape.modules.twitter as sntwitter
import itertools

from datetime import datetime
from random import randrange
import shutil

In [70]:
# Search terms: Content in the tweet
search_terms = [
    'coronavirus',
    'covid19',
    'pandemic',
    'quarantine',
]

# Creates a search parameter for the Twitter scraper
# since_date: inclusive
# until_date: exclusive
def search_builder(terms, area, since_date, until_date):    
    s = ''
    # Concatenate all search terms
    for i in range(len(terms) - 1):
        # NOTE: Inclusive OR statement
        s += '\"' + terms[i] + '\" OR '
    s += '\"' + terms[len(terms) - 1] + '\" '    
    
    # Within or the near the area
    s += 'near:\"' + area + '\" '
    # Since the first date
    s += 'since:' + since_date + ' '
    # Until the second date
    s += 'until:' + until_date
    
    return s

# Returns a DataFrame with at most tweet_limit tweets resulting from
# the search
def scrape(search_param, tweet_limit):
    return pd.DataFrame(itertools.islice(
        sntwitter.TwitterSearchScraper(search_param).get_items(), tweet_limit))

# Search for tweets in Los Angeles mentioning COVID terms
# from the beginning to the end of 2020.
los_angeles_search = search_builder(
    search_terms,
    "Los Angeles",
    '2020-01-01',
    '2020-12-31')

print(los_angeles_search)

"coronavirus" OR "covid19" OR "pandemic" OR "quarantine" near:"Los Angeles" since:2020-01-01 until:2020-12-31


In [71]:
# ********* STATE TWEET SCRAPING *********

def state_scraper(state, abbr, limit, since_date, until_date):

    total_tweets = 0

    print("Compiling tweets (max <=" + str(limit) + ") from " + state + " counties from " + since_date + " to " + until_date)

    print("START: " + str(datetime.now()))

    counties = pd.read_csv("data/counties/" + abbr + "_counties.csv")

    for i in range(len(counties)):

        county = counties.iloc[i]['County']
        county.strip()

        county_search = search_builder(
            search_terms,
            county + " County, " + abbr,
            since_date,
            until_date)

        county_tweets = scrape(county_search, limit)

        total_tweets = total_tweets + len(county_tweets)

        # Generates JSON with respective county tweet data
        county_json = county_tweets.to_json(county + ".json", lines=True, orient='records')

        source = os.getcwd() + "/" + county + ".json"
        destination = os.getcwd() + "/data/tweets/" + abbr + "_tweets/" + county + ".json"
        shutil.move(source, destination)

        print("(" + str(i + 1) + "/" + str(len(counties)) + ") " + county + " - " + str(len(county_tweets)) + " tweets: " + str(datetime.now()))

    print("END: " + str(datetime.now()))
    print(str(total_tweets) + " tweets processed")

    
# California Tweet Scraper
def ca_scraper(tweet_limit, since_date, until_date):
    state_scraper("California", "CA", tweet_limit, since_date, until_date)

# Florida Tweet Scraper
def fl_scraper(tweet_limit, since_date, until_date):
    state_scraper("Florida", "FL", tweet_limit, since_date, until_date)

# New York Tweet Scraper
def ny_scraper(tweet_limit, since_date, until_date):
    state_scraper("New York", "NY", tweet_limit, since_date, until_date)
    
# Texas Tweet Scraper
def tx_scraper(tweet_limit, since_date, until_date):
    state_scraper("Texas", "TX", tweet_limit, since_date, until_date)

In [72]:
ca_scraper(1000, "2021-10-01", "2021-10-09")

Compiling tweets (max <=1000) from California counties from 2021-10-01 to 2021-10-09
START: 2021-10-25 13:38:44.341007
(1/58) Alameda - 21 tweets: 2021-10-25 13:38:45.881042
(2/58) Alpine - 1000 tweets: 2021-10-25 13:39:09.837050
(3/58) Amador - 0 tweets: 2021-10-25 13:39:11.102162
(4/58) Butte - 1 tweets: 2021-10-25 13:39:12.396181
(5/58) Calaveras - 0 tweets: 2021-10-25 13:39:13.496967
(6/58) Colusa - 1000 tweets: 2021-10-25 13:39:36.111556
(7/58) Contra Costa - 19 tweets: 2021-10-25 13:39:37.566732
(8/58) Del Norte - 0 tweets: 2021-10-25 13:39:38.720221
(9/58) El Dorado - 0 tweets: 2021-10-25 13:39:39.831705
(10/58) Fresno - 7 tweets: 2021-10-25 13:39:41.169946
(11/58) Glenn - 1000 tweets: 2021-10-25 13:40:02.603418
(12/58) Humboldt - 3 tweets: 2021-10-25 13:40:03.850985
(13/58) Imperial - 0 tweets: 2021-10-25 13:40:05.220279
(14/58) Inyo - 1000 tweets: 2021-10-25 13:40:27.310050
(15/58) Kern - 0 tweets: 2021-10-25 13:40:28.652667
(16/58) Kings - 0 tweets: 2021-10-25 13:40:29.886437

In [73]:
fl_scraper(1000, "2021-10-01", "2021-10-09")

Compiling tweets (max <=1000) from Florida counties from 2021-10-01 to 2021-10-09
START: 2021-10-25 13:43:53.069049
(1/67) Alachua - 2 tweets: 2021-10-25 13:43:54.396436
(2/67) Baker - 0 tweets: 2021-10-25 13:43:55.864294
(3/67) Bay - 3 tweets: 2021-10-25 13:43:57.095982
(4/67) Bradford - 2 tweets: 2021-10-25 13:43:58.506500
(5/67) Brevard - 2 tweets: 2021-10-25 13:43:59.989494
(6/67) Broward - 27 tweets: 2021-10-25 13:44:02.865191
(7/67) Calhoun - 0 tweets: 2021-10-25 13:44:04.075572
(8/67) Charlotte - 1 tweets: 2021-10-25 13:44:06.441782
(9/67) Citrus - 1 tweets: 2021-10-25 13:44:07.913851
(10/67) Clay - 3 tweets: 2021-10-25 13:44:09.322266
(11/67) Collier - 4 tweets: 2021-10-25 13:44:11.197192
(12/67) Columbia - 0 tweets: 2021-10-25 13:44:12.497461
(13/67) De Soto - 1000 tweets: 2021-10-25 13:44:33.972711
(14/67) Dixie - 0 tweets: 2021-10-25 13:44:35.301368
(15/67) Duval - 1 tweets: 2021-10-25 13:44:36.607138
(16/67) Escambia - 0 tweets: 2021-10-25 13:44:38.432001
(17/67) Flagler - 

In [74]:
ny_scraper(1000, "2021-10-01", "2021-10-09")

Compiling tweets (max <=1000) from New York counties from 2021-10-01 to 2021-10-09
START: 2021-10-25 13:50:47.127010
(1/62) Albany - 2 tweets: 2021-10-25 13:50:48.532453
(2/62) Allegany - 0 tweets: 2021-10-25 13:50:50.087874
(3/62) Bronx - 415 tweets: 2021-10-25 13:51:09.882069
(4/62) Broome - 0 tweets: 2021-10-25 13:51:11.929999
(5/62) Cattaraugus - 1000 tweets: 2021-10-25 13:51:33.535964
(6/62) Cayuga - 1000 tweets: 2021-10-25 13:51:54.554562
(7/62) Chautauqua - 1 tweets: 2021-10-25 13:51:55.916112
(8/62) Chemung - 1000 tweets: 2021-10-25 13:52:17.777283
(9/62) Chenango - 1000 tweets: 2021-10-25 13:52:39.533305
(10/62) Clinton - 0 tweets: 2021-10-25 13:52:40.740557
(11/62) Columbia - 0 tweets: 2021-10-25 13:52:42.837750
(12/62) Cortland - 0 tweets: 2021-10-25 13:52:44.629477
(13/62) Delaware - 0 tweets: 2021-10-25 13:52:45.941733
(14/62) Dutchess - 6 tweets: 2021-10-25 13:52:47.189758
(15/62) Erie - 11 tweets: 2021-10-25 13:52:48.804933
(16/62) Essex - 1000 tweets: 2021-10-25 13:53:1

In [75]:
tx_scraper(1000, "2021-10-01", "2021-10-09")

Compiling tweets (max <=1000) from Texas counties from 2021-10-01 to 2021-10-09
START: 2021-10-25 13:58:00.572143
(1/254) Anderson - 1000 tweets: 2021-10-25 13:58:22.811629
(2/254) Andrews - 1000 tweets: 2021-10-25 13:58:45.415645
(3/254) Angelina - 1000 tweets: 2021-10-25 13:59:07.836704
(4/254) Aransas - 1000 tweets: 2021-10-25 13:59:29.363299
(5/254) Archer - 1000 tweets: 2021-10-25 13:59:50.296295
(6/254) Armstrong - 1000 tweets: 2021-10-25 14:00:12.225854
(7/254) Atascosa - 1000 tweets: 2021-10-25 14:00:45.014286
(8/254) Austin - 1000 tweets: 2021-10-25 14:01:06.418058
(9/254) Bailey - 1000 tweets: 2021-10-25 14:01:27.649272
(10/254) Bandera - 1000 tweets: 2021-10-25 14:01:48.893844
(11/254) Bastrop - 0 tweets: 2021-10-25 14:01:51.990935
(12/254) Baylor - 1000 tweets: 2021-10-25 14:02:13.239977
(13/254) Bee - 1000 tweets: 2021-10-25 14:02:34.524332
(14/254) Bell - 3 tweets: 2021-10-25 14:02:36.519826
(15/254) Bexar - 33 tweets: 2021-10-25 14:02:38.871012
(16/254) Blanco - 1000 twe

(140/254) Lamb - 1000 tweets: 2021-10-25 14:38:49.309231
(141/254) Lampasas - 1000 tweets: 2021-10-25 14:39:10.762286
(142/254) La Salle - 1000 tweets: 2021-10-25 14:39:32.756414
(143/254) Lavaca - 1000 tweets: 2021-10-25 14:39:53.370343
(144/254) Lee - 1000 tweets: 2021-10-25 14:40:15.248813
(145/254) Leon - 1000 tweets: 2021-10-25 14:40:37.113497
(146/254) Liberty - 0 tweets: 2021-10-25 14:40:40.910772
(147/254) Limestone - 0 tweets: 2021-10-25 14:40:42.102977
(148/254) Lipscomb - 1000 tweets: 2021-10-25 14:41:04.252068
(149/254) Live Oak - 1000 tweets: 2021-10-25 14:41:26.000914
(150/254) Llano - 1000 tweets: 2021-10-25 14:41:48.170697
(151/254) Loving - 0 tweets: 2021-10-25 14:41:49.349953
(152/254) Lubbock - 1000 tweets: 2021-10-25 14:42:10.793298
(153/254) Lynn - 1000 tweets: 2021-10-25 14:42:32.583014
(154/254) Madison - 1000 tweets: 2021-10-25 14:42:54.320934
(155/254) Marion - 1000 tweets: 2021-10-25 14:43:16.667794
(156/254) Martin - 0 tweets: 2021-10-25 14:43:18.060368
(157/

# References

California Counties: https://www.mapsofworld.com/usa/states/california/california-county-map.html

Florida Counties: https://www.mapsofworld.com/usa/states/florida/florida-county-map.html

New York Counties: https://www.mapsofworld.com/usa/states/new-york/new-york-county-map.html

Texas Counties: https://www.mapsofworld.com/usa/states/texas/texas-county-map.html