In [2]:
import os
import json
import requests

CONTENT_TYPE_JSON = "application/json"
TWITTER_API_BASE_URL = "https://api.twitter.com/2/tweets/search/stream"
TWITTER_BEARER_TOKEN = os.environ.get("TWITTER_BEARER_TOKEN", None)

if TWITTER_BEARER_TOKEN is None:
    raise SystemExit("Environment variable 'TWITTER_BEARER_TOKEN' is not set!")

def get_auth_header(bearer_token):
    return {"Authorization": "Bearer {}".format(bearer_token)}

def get_headers_for_rules(bearer_token):
    headers = {
        "Content-type": CONTENT_TYPE_JSON,
        "Authorization": "Bearer {}".format(bearer_token),
    }
    return headers

In [3]:
# If needed: Check the currently registered filters for the data stream
response = requests.get(f"{TWITTER_API_BASE_URL}/rules",
                       headers=get_auth_header(TWITTER_BEARER_TOKEN))

if response.status_code != 200:
    raise Exception("Can't fetch filter rules (HTTP: {} {}): {}"
                    .format(response.status_code, response.reason, reponse.text))

print("Currently registered filter rules:\n\n {}"
      .format(json.dumps(response.json(), indent=4, sort_keys=True)))

Currently registered filter rules:

 {
    "data": [
        {
            "id": "1378112825051246596",
            "tag": "coronavirus",
            "value": "(pandemic OR coronavirus OR covid-19 OR #COVID-19 OR #coronavirus OR #SARSCoV2) -is:retweet"
        }
    ],
    "meta": {
        "sent": "2021-04-02T22:55:01.887Z"
    }
}


In [5]:
# If needed: Delete filter rules (by ID / IDs) from REST call in cell above
deletion_ids = ["1378111750206337024","1378094676578861056"]
payload = {"delete": {"ids": deletion_ids}}

response = requests.post(f"{TWITTER_API_BASE_URL}/rules",
                        headers=get_auth_header(TWITTER_BEARER_TOKEN),
                        json=payload)

if response.status_code != 200:
    raise Exception("Can't delete rule(s) (HTTP: {} {}): {}"
                    .format(response.status_code, response.reason, response.text))
    
print("Filter rules have been deleted.")

Filter rules have been deleted.


In [7]:
# Specify filter criteria for the stream
filter_rules = [
    {"value": "(pandemic OR coronavirus OR covid-19 OR #COVID-19 OR #coronavirus OR #SARSCoV2) -is:retweet", 
     "tag": "coronavirus"}
]

payload = {"add": filter_rules}
response = requests.post(f"{TWITTER_API_BASE_URL}/rules",
                        headers=get_headers_for_rules(TWITTER_BEARER_TOKEN),
                        json=payload)

if response.status_code != 201:
    raise Exception("Can't add filter rules (HTTP {} {}):\n {}"
                    .format(response.status_code, response.reason, response.text))

print("Filter Rules for Twitter Stream API have been added.")

Filter Rules for Twitter Stream API have been added.


In [None]:
# Fetch Tweets from the stream and save as dataset
params = {
    "tweet.fields": "created_at,public_metrics,source,lang",
    "expansions": "author_id"
}

response = requests.get(TWITTER_API_BASE_URL, 
                        headers=get_auth_header(TWITTER_BEARER_TOKEN), 
                        params=params,
                        stream=True)

if response.status_code != 200:
    raise Exception("Can't get stream (HTTP {} {}): {}"
                    .format(response.status_code, response.reason, response.text))

with open("./data/dataset-1.json", "a", encoding="utf-8") as dataset:
    for response_line in response.iter_lines():
        if response_line:
            json_response = json.loads(response_line)
            dataset.write(response_line.decode("utf-8"))
            
            print(json.dumps(json_response, indent=4, ensure_ascii=False, sort_keys=True))

{
    "data": {
        "author_id": "83379403",
        "created_at": "2021-04-02T22:56:41.000Z",
        "id": "1378119221503266817",
        "lang": "es",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "source": "Twitter for iPhone",
        "text": "¡NO SEAMOS IDIOTAS COMO LAGARTO Y SUS AMIGOS! ESTO PASA EN CHILE —DESPUES— DE HABERSE VACUNADO: https://t.co/WqcSfEh5sm"
    },
    "includes": {
        "users": [
            {
                "id": "83379403",
                "name": "Adolfo Giurfa",
                "username": "AdolfoGiurfa"
            }
        ]
    },
    "matching_rules": [
        {
            "id": 1378112825051246596,
            "tag": "coronavirus"
        }
    ]
}
{
    "data": {
        "author_id": "1329208027421974528",
        "created_at": "2021-04-02T22:56:41.000Z",
        "id": "1378119222379880450",
        "lang": "und",
    

{
    "data": {
        "author_id": "1253872025233502213",
        "created_at": "2021-04-02T22:56:44.000Z",
        "id": "1378119234979516416",
        "lang": "en",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "source": "WordPress.com",
        "text": "'Big Ang' Star Linda Torres Dead at 67, Had COVID-19 https://t.co/BUQzI7cTCE"
    },
    "includes": {
        "users": [
            {
                "id": "1253872025233502213",
                "name": "Myfashion21",
                "username": "Myfashion211"
            }
        ]
    },
    "matching_rules": [
        {
            "id": 1378112825051246596,
            "tag": "coronavirus"
        }
    ]
}
{
    "data": {
        "author_id": "1260048132697665537",
        "created_at": "2021-04-02T22:56:44.000Z",
        "id": "1378119235956838400",
        "lang": "pt",
        "public_metrics": {
     

{
    "data": {
        "author_id": "1329208027421974528",
        "created_at": "2021-04-02T22:56:46.000Z",
        "id": "1378119244391530497",
        "lang": "und",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "source": "Twitter for iPhone",
        "text": "@CorpusDelicti01 @jmbenson1491 @chuckwoolery @JackPosobiec https://t.co/q8A5dzl5O0"
    },
    "includes": {
        "users": [
            {
                "id": "1329208027421974528",
                "name": "Taro",
                "username": "005Qin"
            }
        ]
    },
    "matching_rules": [
        {
            "id": 1378112825051246596,
            "tag": "coronavirus"
        }
    ]
}
{
    "data": {
        "author_id": "1048395856616062978",
        "created_at": "2021-04-02T22:56:46.000Z",
        "id": "1378119244802580486",
        "lang": "en",
        "public_metrics": {
      

{
    "data": {
        "author_id": "934044199061778432",
        "created_at": "2021-04-02T22:56:49.000Z",
        "id": "1378119256185913344",
        "lang": "fr",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "source": "Twitter Web App",
        "text": "ils vont reprendre les concerts 😭 https://t.co/4TUzFMmCSD"
    },
    "includes": {
        "users": [
            {
                "id": "934044199061778432",
                "name": "Bella 🇫🇷⭐⭐",
                "username": "SoDakotaBella"
            }
        ]
    },
    "matching_rules": [
        {
            "id": 1378112825051246596,
            "tag": "coronavirus"
        }
    ]
}
