In [144]:
import json
import time
import csv
import requests
import os
from pprint import pprint
from configparser import ConfigParser
from random import randint

In [145]:
DEBUG_MODE = True

In [146]:
# Read api_key from config.ini
api_key: str | None = None
try:
    print("Reading api-key from config.ini ...")
    config = ConfigParser()
    config.read('conf/config.ini') # adjust the path to config.ini
    api_key = config['TWITTERAPI']['API_KEY']
    print(f"API Key gefunden: {api_key[:5]} ...")

except Exception as e:
    print("Failed to read credentials. Exit with error:")
    print(e)

Reading api-key from config.ini ...
API Key gefunden: 1228a ...


In [147]:
if api_key is None:
    raise ValueError("Could not read api key.")

# Array to store tweets
all_tweets = []

# Parameters for api request
headers = {"X-API-Key": api_key}
url = "https://api.twitterapi.io/twitter/tweet/advanced_search"
cursor = ""

# counter
i = 1

In [148]:
# Endless loop based on has_next_page - break when false
while True:
    try:
        # Make https request
        response = requests.request(
            "GET",
            url,
            headers=headers,
            params={
                "query": "from:elonmusk since:2023-05-01 until:2025-05-01 -is:retweet",
                "cursor": cursor
                # querytype: "latest" by default
            }
        )

        if response.status_code == 200:
            # Debugging infos
            print(f"Status Code: {response.status_code}")

            # throw exception for http error
            response.raise_for_status()

            # Store response
            json_data = response.json()

            # Extract necessary vars
            tweets = json_data.get("tweets", [])
            has_next_page = json_data.get("has_next_page")
            next_cursor = json_data.get("next_cursor")

            # Store and debug tweets
            prev_tweet_count = len(all_tweets)
            all_tweets.extend(tweets)
            new_tweet_count = len(all_tweets) - prev_tweet_count
            print(f"New Tweets in iteration {i}: {new_tweet_count}")
            print(f"Total Tweet count: {len(all_tweets)}")

            # Break if debug mode is active
            if DEBUG_MODE:
                debug_mode_tweets = json_data["tweets"][new_tweet_count - 1]
                print(f'DEBUG_MODE active: stopping after first page. Got {new_tweet_count} tweets:')
                pprint(debug_mode_tweets)
                break

            if not has_next_page:
                print(f"Got {new_tweet_count} tweets. Finished.")
                break

            # Prepare the next iteration
            print(f"Next page detected @ '{next_cursor[:10]}'. Updating cursor ...")
            cursor = next_cursor
            i += 1

            # Wait random time to avoid rate limits
            delay = randint(1, 5)
            print(f"Starting new iteration in {delay} seconds ...")
            time.sleep(delay)

    except Exception as e:
        print("Error when requesting and processing tweets:")
        print(e)
        break



Status Code: 200
New Tweets in iteration 1: 20
Total Tweet count: 20
DEBUG_MODE active: stopping after first page. Got 20 tweets:
{'author': {'affiliatesHighlightedLabel': {},
            'automatedBy': None,
            'canDm': False,
            'canMediaTag': False,
            'coverPicture': 'https://pbs.twimg.com/profile_banners/44196397/1739948056',
            'createdAt': 'Tue Jun 02 20:12:29 +0000 2009',
            'description': '',
            'entities': {'description': {'urls': []}, 'url': {}},
            'fastFollowersCount': 0,
            'favouritesCount': 143653,
            'followers': 219954626,
            'following': 1115,
            'hasCustomTimelines': True,
            'id': '44196397',
            'isAutomated': False,
            'isBlueVerified': True,
            'isTranslator': False,
            'isVerified': False,
            'location': '',
            'mediaCount': 3813,
            'name': 'Elon Musk',
            'pinnedTweetIds': ['19170997

In [149]:
try:
    print('Finished request. Converting results to file ...')

    # Create the dir if it does not exist
    if not os.path.exists("res"):
        os.mkdir("res")

    # Target location
    filepath = "res/tweets.csv"

    # Store existing tweet ids as set and write all existing ones
    existing_ids = set()
    if os.path.exists(filepath):
        with open(filepath, "r", encoding="utf-8", newline='') as f:
            reader = csv.DictReader(f)
            for row in reader:
                existing_ids.add(row["id"])

    # Open the file in append mode
    with open(filepath, "a", encoding="utf-8", newline='') as f:
        if not all_tweets:
            print("No tweets to write. Exiting ...")
            exit()

        # Flatten tweet structure
        def flatten_tweet(tweet):
            if not tweet:
                return {}

            flat = {}
            for k, v in tweet.items():
                try:
                    if isinstance(v, (dict, list)):
                        flat[k] = json.dumps(v, ensure_ascii=False)
                    else:
                        flat[k] = v
                except Exception as e:
                    print(f"Error when processing field {k}: {e}")
                    flat[k] = None
            return flat

        flattened_tweets = [flatten_tweet(tw) for tw in all_tweets]
        if not flattened_tweets:
            print("Did not find tweets to write. Exiting ...")
            exit()

        # Derive col names from the first tweet
        fieldnames = list(flattened_tweets[0].keys())

        # Create a csv writer with fieldnames
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        if os.stat(filepath).st_size == 0:
            writer.writeheader()

        # Write new tweets to a file
        new_count = 0
        for tweet in flattened_tweets:
            if tweet["id"] not in existing_ids:
                writer.writerow(tweet)
                new_count += 1

        print(f"Wrote {new_count} new tweets to CSV.")

except Exception as e:
    print('Failed to convert tweets to file. Exit with error:')
    print(e)

Finished request. Converting results to file ...
Wrote 0 new tweets to CSV.


In [150]:
# If the previous cell says it cannot find the location, run this first
# import os
# os.chdir(os.path.expanduser("~/PycharmProjects/COIN_Repo")) # adjust path to root dir of project
# os.getcwd()