# Retrieving Tweets by Searchtag

In [1]:
import requests 
import json 
import pandas as pd

# imports the twitter_secrets python file in which we store the twitter API keys
from twitter_secrets import twitter_secrets as ts

# puts the bearer token in the request header
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers
        
# sets the rules on which tweets to retrieve    
def set_rules(headers, delete, bearer_token, rules):
    payload = {"add": rules}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        headers=headers,
        json=payload,
    )
    if response.status_code != 201:
        raise Exception(
            "Cannot add rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    
# retrieves the current set of rules from the API     
def get_rules(headers, bearer_token):
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream/rules", headers=headers
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot get rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    return response.json()

# tells the API to delete our current rule configuration 
def delete_all_rules(headers, bearer_token, rules):
    if rules is None or "data" not in rules:
        return None

    ids = list(map(lambda rule: rule["id"], rules["data"]))
    payload = {"delete": {"ids": ids}}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        headers=headers,
        json=payload
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot delete rules (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    print(json.dumps(response.json()))

# starts the stream, iterates through the lines of the response and for each line calls the save_tweets function
def get_stream(headers, set, bearer_token, expansions, fields):
    data = []
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream" + expansions + fields, headers=headers, stream=True,
    )
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Cannot get stream (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    i = 0
    for response_line in response.iter_lines():
        i += 1
        if i == max_results:
            break
        else:
            json_response = json.loads(response_line)
            #print(json.dumps(json_response, indent=4, sort_keys=True))
            try:
                save_tweets(json_response)
            except (json.JSONDecodeError, KeyError) as err:
                # In case the JSON fails to decode, we skip this tweet
                print(f"{i}/{max_results}: ERROR: encountered a problem with a line of data... \n")
                continue

# appends information from tweets to a dataframe                
def save_tweets(tweet):
    print(json.dumps(tweet, indent=4, sort_keys=True))
    data = tweet['data']
    public_metrics = data['public_metrics']
    tweet_list.append([data['id'], data['author_id'], data['created_at'], data['text'], public_metrics['like_count']])

# the max number of tweets that will be returned
max_results = 20

# You can adjust the rules if needed
search_rules = [
    {"value": "dog has:images", "tag": "dog pictures", "lang": "en"},
    {"value": "cat has:images -grumpy", "tag": "cat pictures", "lang": "en"},
]

# defines the fields which we want to retrieve
tweet_fields = "?tweet.fields=attachments,author_id,created_at,public_metrics"

# we only retrieve the tweet object, but if we wanted to retrieve other objects (e.g., media), we would add them to the expansions string
expansions = ""
tweet_list = []

bearer_token = ts.BEARER_TOKEN
headers = create_headers(bearer_token)
rules = get_rules(headers, bearer_token)
delete = delete_all_rules(headers, bearer_token, rules)
set = set_rules(headers, delete, bearer_token, search_rules)
get_stream(headers, set, bearer_token, expansions, tweet_fields)

df = pd.DataFrame (tweet_list, columns = ['tweetid', 'author_id' , 'created_at', 'text', 'like_count'])
df

{"data": [{"id": "1345871235985584128", "value": "dog has:images", "tag": "dog pictures"}], "meta": {"sent": "2021-01-05T12:14:43.176Z"}}
{"meta": {"sent": "2021-01-05T12:14:44.455Z", "summary": {"deleted": 1, "not_deleted": 0}}}
{"data": [{"value": "cat has:images -grumpy", "tag": "cat pictures", "id": "1346429926728232962"}, {"value": "dog has:images", "tag": "dog pictures", "id": "1346429926728232961"}], "meta": {"sent": "2021-01-05T12:14:45.743Z", "summary": {"created": 2, "not_created": 0, "valid": 2, "invalid": 0}}}
200
{
    "data": {
        "author_id": "1333943095562547202",
        "created_at": "2021-01-05T12:14:36.000Z",
        "id": "1346429891139543041",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "text": "@JuntsXCat @LauraBorras @MovEsquerres https://t.co/m9onumGTHF\nFIL sobre Albert Donaire @albertdmcat, un intolerant a les llistes de @JuntsXCat"


{
    "data": {
        "attachments": {
            "media_keys": [
                "3_1346381062885163011"
            ]
        },
        "author_id": "109838901",
        "created_at": "2021-01-05T12:14:39.000Z",
        "id": "1346429904930230273",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 1629
        },
        "text": "RT @tanu_mfmf: \u307f\u305f\u3089\u3057\u53c2\u6226\u3057\u307e\u3059\ud83d\ude38\n #\u3069\u3046\u3076\u3064\u306b\u30cf\u30c6\u30ca\u3064\u3051\u308b\u3068\u53ef\u611b\u3044\u9078\u624b\u6a29\n\n#\u732b\u306e\u3044\u308b\u66ae\u3089\u3057 #\u732b #\u306d\u3053 #cat #cats #\u4fdd\u8b77\u732b #CatsOfTwitter https://t.co/OYLvwYPzRv"
    },
    "matching_rules": [
        {
            "id": 1346429926728232962,
            "tag": "cat pictures"
        }
    ]
}
{
    "data": {
        "attachments": {
            "media_keys": [
                "3_134641805013

Unnamed: 0,tweetid,author_id,created_at,text,like_count
0,1346429891139543041,1333943095562547202,2021-01-05T12:14:36.000Z,@JuntsXCat @LauraBorras @MovEsquerres https://...,0
1,1346429891353272321,102741844,2021-01-05T12:14:36.000Z,RT @tanu_mfmf: もちろんあたちも参加しまーす🐱\n #どうぶつにハテナつけると...,0
2,1346429893928620034,1319465137909055488,2021-01-05T12:14:36.000Z,RT @LivetubeSTAR: シャドバユーザー大会 ㉚分後に開催\nローテBO3 デッ...,0
3,1346429895530967045,1243377247899222019,2021-01-05T12:14:37.000Z,RT @AgentSquirrel45: @pann4trump @GAsandwich @...,0
4,1346429896478875648,1315998706110468096,2021-01-05T12:14:37.000Z,RT @AureMagik: New NSFW from the Curious Cat s...,0
5,1346429897229537285,1192444764966113280,2021-01-05T12:14:37.000Z,RT @tanu_mfmf: みたらし参戦します😸\n #どうぶつにハテナつけると可愛い選手...,0
6,1346429899150483457,1238428065811718145,2021-01-05T12:14:38.000Z,RT @niszbeyo: @afiqxsy_ A cat lover’s feeling ...,0
7,1346429901142806528,1126268686476632064,2021-01-05T12:14:38.000Z,RT @jenniedebuts: The caption of this online r...,0
8,1346429900975124480,1142880804223225858,2021-01-05T12:14:38.000Z,Hi folks! 😊 I hope everyone is keeping safe &a...,0
9,1346429903391105024,1343227823226576897,2021-01-05T12:14:39.000Z,"RT @HamDried: You guys, my dog died today. Sh...",0
