In [1]:
# Import modules
import snscrape.modules.twitter as sntwitter

import json
import os
import datetime

## Collecting tweets matching a query during a specific time range

In [2]:
def download_query_tweets(query, date_since, date_until, max=1000):
    print(f"Downloading tweets for query: '{query}' from {date_since} to {date_until} (max of {max})")

    tweet_list = []
    
    query = f'{query} since:{date_since} until:{date_until}'
    
    for i,tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
        if i>=max:
            break
    

        tweet_dict = {
            'id': tweet.id,
            'created_at': tweet.date.strftime('%Y-%m-%d %H:%M'),
            'text': tweet.content,
            'username': tweet.username
        }

        tweet_list.append(tweet_dict)
        
    return tweet_list

## Nailing Down Time Points
https://www.ajmc.com/view/a-timeline-of-covid19-developments-in-2020  
The link above has a timeline of COVID-19 events throughtout the year, since the onset of the pandemic.  

Here is another link with similar information about H1N1:  
https://www.cdc.gov/flu/pandemic-resources/2009-pandemic-timeline.html

In [3]:
DATA_DIR = 'data/pre_covid'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2015-01-01" # TODO: change these to reflect whatever date range you want to do
until = "2016-01-01"
queries = [ '#sb277', "#cdcwhistleblower", '#vaccineswork', '#antivaccine', '#antivax', '#vaxxed', '#vaccines'] 

for query in queries:
    tweet_list = download_query_tweets(query, since, until)
    
    outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, query.replace(' ','_'), since, until)
    
    print('\t retrieved {} tweets...\n'.format(len(tweet_list)))
    with open(outfilename,'w') as out:
        out.write(json.dumps(tweet_list))

Downloading tweets for query: '#sb277' from 2015-01-01 to 2016-01-01 (max of 1000)
	 retrieved 1000 tweets...

Downloading tweets for query: '#cdcwhistleblower' from 2015-01-01 to 2016-01-01 (max of 1000)
	 retrieved 1000 tweets...

Downloading tweets for query: '#vaccineswork' from 2015-01-01 to 2016-01-01 (max of 1000)
	 retrieved 1000 tweets...

Downloading tweets for query: '#antivaccine' from 2015-01-01 to 2016-01-01 (max of 1000)
	 retrieved 1000 tweets...

Downloading tweets for query: '#antivax' from 2015-01-01 to 2016-01-01 (max of 1000)
	 retrieved 1000 tweets...

Downloading tweets for query: '#vaxxed' from 2015-01-01 to 2016-01-01 (max of 1000)
	 retrieved 13 tweets...

Downloading tweets for query: '#vaccines' from 2015-01-01 to 2016-01-01 (max of 1000)
	 retrieved 1000 tweets...

