## CREST DAO SCRAPPER
This python script returns a minimum of 10 tweets containing the word web3, this script would be modified in the future to scrape data including other web3 t

In [None]:
import asyncio  # To handle asynchronous execution
from twikit import Client, TooManyRequests
import csv
from configparser import ConfigParser
from datetime import datetime
import os  # To check if the file exists
import random  # To introduce random delays

MINIMUM_TWEETS = 10
QUERY = 'web3'
DELAY_SECONDS = (2, 5)  # Tuple to represent a delay range of 2 to 5 seconds
RATE_LIMIT_DELAY = 900  # 15 minutes in seconds (Twitter's default rate limit reset time)

# Login credentials
config = ConfigParser()
config.read('config.ini')
username = config['X']['username']
password = config['X']['password']

def get_last_tweet_count(filename):
    """Read the CSV and return the last tweet count."""
    if os.path.exists(filename):  # Check if the file exists
        with open(filename, 'r', newline='', encoding='utf-8') as file:
            reader = csv.reader(file)
            rows = list(reader)  # Convert to list to count rows
            if len(rows) > 1:  # More than just the header
                return int(rows[-1][0])  # Return the last tweet count
    return 0  # If file doesn't exist or only contains a header

async def authenticate():
    # Authenticate to X.com
    client = Client(language='en-US')
    
    # Await the login since it's a coroutine
    await client.login(auth_info_1=username, password=password)
    
    # Save cookies after logging in
    client.save_cookies('cookies.json')
    
    # Get the last tweet count from the existing file (if any)
    last_tweet_count = get_last_tweet_count('Crest.csv')
    tweet_count = last_tweet_count
    tweets = None

    # Open the CSV file in append mode
    with open('Crest.csv', 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        # Check if the file is empty to write the header if needed
        if os.stat('Crest.csv').st_size == 0:
            writer.writerow(['Tweet_count', 'Username', 'Text', 'Created_At', 'Retweets', 'Views', 'Likes'])
        
        # Fetch and append new tweets
        while tweet_count < last_tweet_count + MINIMUM_TWEETS:
            # Introduce a random delay before each API call to avoid rate-limiting
            delay = random.uniform(*DELAY_SECONDS)  # Random delay between 2 to 5 seconds
            print(f'{datetime.now()} - Sleeping for {delay:.2f} seconds...')
            await asyncio.sleep(delay)

            try:
                if tweets is None:
                    # Get tweets 
                    print(f'{datetime.now()} - Getting tweets...')
                    tweets = await client.search_tweet(QUERY, product='Top')
                else:
                    print(f'{datetime.now()} - Getting next tweets...')
                    tweets = tweets.next()
            
            except TooManyRequests:
                # If rate limit is hit, wait for RATE_LIMIT_DELAY (15 minutes) before retrying
                print(f'{datetime.now()} - Rate limit hit. Sleeping for {RATE_LIMIT_DELAY // 60} minutes...')
                await asyncio.sleep(RATE_LIMIT_DELAY)
                continue  # After the delay, continue with the next iteration
            
            if not tweets:
                print(f'{datetime.now()} - No more tweets found')
                break
            
            for tweet in tweets:
                tweet_count += 1
                tweet_data = [tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.view_count, tweet.favorite_count]
                writer.writerow(tweet_data)  # Write tweet data to CSV
                print(tweet_data)  # Optionally print the tweet data

    print(f'{datetime.now()} - Done! Got {tweet_count - last_tweet_count} new tweets.')

# Run the async function
asyncio.run(authenticate())
