In [None]:
from twikit import Client, TooManyRequests
import asyncio
import warnings
warnings.filterwarnings("ignore", "error", category=DeprecationWarning, module="html.parser")
from tenacity import retry, stop_after_attempt, wait_exponential
import httpx
from datetime import datetime
from random import randint
import csv
import tracemalloc
tracemalloc.start()


# 1 - log in to twitter

client = Client('en-US')
await client.login(auth_info_1='user_name', password='password') # Comment out after first log-in
client.save_cookies('cookies.json') # Comment out after first log-in
client.load_cookies(path='cookies.json')

# 2 - Define Parameters and Scrape Data

# Query and limits
query = "(#SevenDoors OR #SevenDoorsOnNetflix OR #ThinlineTheMovie OR #Thinline OR #ThinLineTheMovie OR #EverybodyLovesJenifa) lang:en" # Specify all your queries
min_tweets = 2000
tweet_count = 0
new_tweets = None

# To cater to time-outs
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=20))
async def fetch_tweets(client, query, new_tweets=None):
    if new_tweets is None:
        print(f'{datetime.now()} - Fetching initial tweets...')
        return await client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 20)
        print(f'{datetime.now()} - Waiting {wait_time} seconds before fetching next tweets...')
        await asyncio.sleep(wait_time)
        return await new_tweets.next()

# Open the CSV file for writing
with open('file_name.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Tweet_count', 'Time_of_Creation', 'Tweet', 'Hashtags', 'Retweets', 'Likes']) # Specify information you'd like to see for each tweet

    # Main loop for fetching tweets
    while tweet_count < min_tweets:
        try:
            # Fetch tweets with retry and timeout handling
            new_tweets = await fetch_tweets(client, query, new_tweets)
            
        except TooManyRequests as e:
            # Handle rate-limiting
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            print(f'{datetime.now()} - Rate limit reached: Waiting until {rate_limit_reset}')
            wait_time = rate_limit_reset - datetime.now()
            await asyncio.sleep(wait_time.total_seconds())
            continue
        except httpx.RequestError as e:
            # Handle other HTTP errors
            print(f'{datetime.now()} - Network error occurred: {e}')
            continue

        if not new_tweets:
            print(f'{datetime.now()} - No more tweets available!')
            break

        # Process and save tweets
        batch_count = 0
        for tweet in new_tweets:
            tweet_count += 1
            batch_count += 1
            writer.writerow([
                tweet_count,
                tweet.created_at,
                tweet.text,
                tweet.hashtags,
                tweet.retweet_count,
                tweet.favorite_count
            ])

            if tweet_count >= min_tweets:
                break

        print(f'{datetime.now()} - {batch_count} tweets scraped in this batch. Total: {tweet_count}')

print(f'{datetime.now()} - Completed scraping! {tweet_count} tweets found!')