In [1]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

In [11]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
from requests.exceptions import ReadTimeout  # Import ReadTimeout exception
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'TCS.csv'  # Updated file for TCS data
SUMMARY_FILE = 'TCS_Collection_Summary.csv'  # Updated summary file for TCS

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')
cookies_file = 'cookies.json'
cookies2_file = 'cookies2.json'
current_cookies = cookies_file

# Setup logging
logging.basicConfig(filename='tcs_tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def switch_cookies():
    """Switches between cookies.json and cookies2.json."""
    global current_cookies
    if current_cookies == cookies_file:
        client.load_cookies(cookies2_file)
        current_cookies = cookies2_file
    else:
        client.load_cookies(cookies_file)
        current_cookies = cookies_file
    print(f'Switched to {current_cookies}.')  # Added print statement
    logging.info(f'Switched to {current_cookies}.')

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(TCS OR "Tata Consultancy Services" OR #TCS OR #TataConsultancy OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=5):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    retries = 0
    max_retries = 5  # Set a maximum number of retries
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            
            # Switch cookies and retry
            logging.warning(f'Rate limit reached. Switching cookies.')
            switch_cookies()  # Added print and logging when switching cookies
            
            # Immediately retry with new cookies
            try:
                tweets = get_tweets(query, tweets)
                logging.info('Successfully resumed after switching cookies.')
            except TooManyRequests as e:
                logging.warning(f'Rate limit still in place even after switching cookies.')
                if wait_time > 0:
                    logging.warning(f'Waiting until {rate_limit_reset}')
                    delay_for_minutes(5)  # Now wait for 5 minutes instead of 15
                else:
                    logging.warning(f'Rate limit reset time is in the past. Waiting for 5 minutes as a precaution.')
                    delay_for_minutes(5)  # Always wait for 5 minutes now
            continue

        # Catch ReadTimeout and retry
        except ReadTimeout:
            retries += 1
            logging.error(f'ReadTimeout occurred. Retrying {retries}/{max_retries}...')
            if retries > max_retries:
                logging.error(f'Max retries reached for {date.strftime("%Y-%m-%d")}. Moving on.')
                break  # Exit the loop after max retries
            else:
                time.sleep(2 ** retries)  # Exponential backoff
                continue  # Retry after waiting

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


Switched to cookies2.json.
Total tweets collected so far: 10
Total tweets collected so far: 20
Total tweets collected so far: 30
Total tweets collected so far: 40
Total tweets collected so far: 50
Total tweets collected so far: 60
Finished collecting tweets. Total tweets collected: 60


# HDFC Bank

In [12]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'HDFC_Bank.csv'  # Updated file for HDFC Bank data
SUMMARY_FILE = 'HDFC_Bank_Collection_Summary.csv'  # Updated summary file for HDFC Bank

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')
cookies_file = 'cookies.json'
cookies2_file = 'cookies2.json'
current_cookies = cookies_file

# Setup logging
logging.basicConfig(filename='hdfc_bank_tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def switch_cookies():
    """Switches between cookies.json and cookies2.json."""
    global current_cookies
    if current_cookies == cookies_file:
        client.load_cookies(cookies2_file)
        current_cookies = cookies2_file
    else:
        client.load_cookies(cookies_file)
        current_cookies = cookies_file
    logging.info(f'Switched to {current_cookies}.')

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(HDFC OR "HDFC Bank" OR #HDFC OR #HDFCBank OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=5):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    daily_tweet_data = []
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            
            # Switch cookies and retry
            logging.warning(f'Rate limit reached. Switching cookies.')
            switch_cookies()  # Logging cookie switch

            # Retry immediately with new cookies
            try:
                tweets = get_tweets(query, tweets)
                logging.info('Successfully resumed after switching cookies.')
            except TooManyRequests as e:
                logging.warning(f'Rate limit still in place even after switching cookies.')
                if wait_time > 0:
                    logging.warning(f'Waiting until {rate_limit_reset}')
                    delay_for_minutes(5)  # Reduced to 5 minutes
                else:
                    logging.warning(f'Rate limit reset time is in the past. Waiting for 5 minutes as a precaution.')
                    delay_for_minutes(5)
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            daily_tweet_data.append([date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count])

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    # Write daily tweet data to CSV after processing all tweets for the day
    with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerows(daily_tweet_data)

    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


Total tweets collected so far: 10
Total tweets collected so far: 20
Total tweets collected so far: 30
Total tweets collected so far: 40
Total tweets collected so far: 50
Total tweets collected so far: 60
Total tweets collected so far: 70
Total tweets collected so far: 80
Total tweets collected so far: 90
Total tweets collected so far: 100
Total tweets collected so far: 110
Total tweets collected so far: 120
Total tweets collected so far: 130
Total tweets collected so far: 140
Total tweets collected so far: 150
Total tweets collected so far: 160
Total tweets collected so far: 170
Total tweets collected so far: 180
Total tweets collected so far: 180
Total tweets collected so far: 190
Total tweets collected so far: 200
Total tweets collected so far: 210
Total tweets collected so far: 220
Total tweets collected so far: 230
Total tweets collected so far: 240
Total tweets collected so far: 250
Total tweets collected so far: 260
Total tweets collected so far: 270
Total tweets collected so far

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]
Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 870
Total tweets collected so far: 880
Total tweets collected so far: 890
Total tweets collected so far: 900
Total tweets collected so far: 910
Total tweets collected so far: 920
Total tweets collected so far: 930
Total tweets collected so far: 940
Total tweets collected so far: 950
Total tweets collected so far: 960
Total tweets collected so far: 970
Total tweets collected so far: 980
Total tweets collected so far: 980
Total tweets collected so far: 990
Total tweets collected so far: 1000
Total tweets collected so far: 1010
Total tweets collected so far: 1020
Total tweets collected so far: 1030
Total tweets collected so far: 1040
Total tweets collected so far: 1050
Total tweets collected so far: 1060
Total tweets collected so far: 1070
Total tweets collected so far: 1080
Total tweets collected so far: 1090
Total tweets collected so far: 1100
Total tweets collected so far: 1110
Total tweets collected so far: 1120
Total tweets collected so far: 1130
Total 

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 1350
Total tweets collected so far: 1360
Total tweets collected so far: 1370
Total tweets collected so far: 1380
Total tweets collected so far: 1390
Total tweets collected so far: 1400
Total tweets collected so far: 1410
Total tweets collected so far: 1420
Total tweets collected so far: 1430
Total tweets collected so far: 1440
Total tweets collected so far: 1450
Total tweets collected so far: 1460
Total tweets collected so far: 1470
Total tweets collected so far: 1480
Total tweets collected so far: 1490
Total tweets collected so far: 1500
Total tweets collected so far: 1510
Total tweets collected so far: 1520
Total tweets collected so far: 1530
Total tweets collected so far: 1540
Total tweets collected so far: 1550
Total tweets collected so far: 1560
Total tweets collected so far: 1570
Total tweets collected so far: 1580
Total tweets collected so far: 1590
Total tweets collected so far: 1600
Total tweets collected so far: 1610
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]
Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 1850
Total tweets collected so far: 1860
Total tweets collected so far: 1870
Total tweets collected so far: 1880
Total tweets collected so far: 1890
Total tweets collected so far: 1900
Total tweets collected so far: 1910
Total tweets collected so far: 1920
Total tweets collected so far: 1920
Total tweets collected so far: 1930
Total tweets collected so far: 1940
Total tweets collected so far: 1950
Total tweets collected so far: 1960
Total tweets collected so far: 1970
Total tweets collected so far: 1980
Total tweets collected so far: 1990
Total tweets collected so far: 2000
Total tweets collected so far: 2010
Total tweets collected so far: 2020
Total tweets collected so far: 2030
Total tweets collected so far: 2040
Total tweets collected so far: 2050
Total tweets collected so far: 2060
Total tweets collected so far: 2070
Total tweets collected so far: 2080
Total tweets collected so far: 2090
Total tweets collected so far: 2100
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 2320
Total tweets collected so far: 2330
Total tweets collected so far: 2340
Total tweets collected so far: 2350
Total tweets collected so far: 2360
Total tweets collected so far: 2370
Total tweets collected so far: 2380
Total tweets collected so far: 2390
Total tweets collected so far: 2400
Total tweets collected so far: 2410
Total tweets collected so far: 2420
Total tweets collected so far: 2430
Total tweets collected so far: 2440
Total tweets collected so far: 2450
Total tweets collected so far: 2460
Total tweets collected so far: 2470
Total tweets collected so far: 2480
Total tweets collected so far: 2490
Total tweets collected so far: 2500
Total tweets collected so far: 2510
Total tweets collected so far: 2520
Total tweets collected so far: 2530
Total tweets collected so far: 2540
Total tweets collected so far: 2550
Total tweets collected so far: 2560
Total tweets collected so far: 2570
Total tweets collected so far: 2580
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]
Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 2820
Total tweets collected so far: 2830
Total tweets collected so far: 2840
Total tweets collected so far: 2850
Total tweets collected so far: 2860
Total tweets collected so far: 2870
Total tweets collected so far: 2880
Total tweets collected so far: 2890
Total tweets collected so far: 2900
Total tweets collected so far: 2910
Total tweets collected so far: 2920
Total tweets collected so far: 2930
Total tweets collected so far: 2940
Total tweets collected so far: 2950
Total tweets collected so far: 2960
Total tweets collected so far: 2970
Total tweets collected so far: 2980
Total tweets collected so far: 2990
Total tweets collected so far: 3000
Total tweets collected so far: 3010
Total tweets collected so far: 3020
Total tweets collected so far: 3030
Total tweets collected so far: 3040
Total tweets collected so far: 3050
Total tweets collected so far: 3060
Total tweets collected so far: 3070
Total tweets collected so far: 3080
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 3310
Total tweets collected so far: 3320
Total tweets collected so far: 3330
Total tweets collected so far: 3340
Total tweets collected so far: 3350
Total tweets collected so far: 3360
Total tweets collected so far: 3370
Total tweets collected so far: 3380
Total tweets collected so far: 3390
Total tweets collected so far: 3400
Total tweets collected so far: 3410
Total tweets collected so far: 3420
Total tweets collected so far: 3430
Total tweets collected so far: 3440
Total tweets collected so far: 3450
Total tweets collected so far: 3460
Total tweets collected so far: 3470
Total tweets collected so far: 3480
Total tweets collected so far: 3490
Total tweets collected so far: 3500
Total tweets collected so far: 3510
Total tweets collected so far: 3520
Total tweets collected so far: 3520
Total tweets collected so far: 3530
Total tweets collected so far: 3540
Total tweets collected so far: 3550
Total tweets collected so far: 3560
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]
Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 3790
Total tweets collected so far: 3800
Total tweets collected so far: 3810
Total tweets collected so far: 3820
Total tweets collected so far: 3830
Total tweets collected so far: 3840
Total tweets collected so far: 3850
Total tweets collected so far: 3860
Total tweets collected so far: 3870
Total tweets collected so far: 3880
Total tweets collected so far: 3890
Total tweets collected so far: 3900
Total tweets collected so far: 3910
Total tweets collected so far: 3920
Total tweets collected so far: 3930
Total tweets collected so far: 3940
Total tweets collected so far: 3950
Total tweets collected so far: 3960
Total tweets collected so far: 3970
Total tweets collected so far: 3980
Total tweets collected so far: 3990
Total tweets collected so far: 4000
Total tweets collected so far: 4010
Total tweets collected so far: 4020
Total tweets collected so far: 4030
Total tweets collected so far: 4040
Total tweets collected so far: 4050
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 4270
Total tweets collected so far: 4280
Total tweets collected so far: 4290
Total tweets collected so far: 4300
Total tweets collected so far: 4310
Total tweets collected so far: 4320
Total tweets collected so far: 4330
Total tweets collected so far: 4340
Total tweets collected so far: 4350
Total tweets collected so far: 4360
Total tweets collected so far: 4370
Total tweets collected so far: 4380
Total tweets collected so far: 4390
Total tweets collected so far: 4400
Total tweets collected so far: 4410
Total tweets collected so far: 4420
Total tweets collected so far: 4430
Total tweets collected so far: 4440
Total tweets collected so far: 4450
Total tweets collected so far: 4460
Total tweets collected so far: 4470
Total tweets collected so far: 4480
Total tweets collected so far: 4490
Total tweets collected so far: 4500
Total tweets collected so far: 4500
Total tweets collected so far: 4510
Total tweets collected so far: 4520
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]
Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 4750
Total tweets collected so far: 4760
Total tweets collected so far: 4770
Total tweets collected so far: 4780
Total tweets collected so far: 4790
Total tweets collected so far: 4800
Total tweets collected so far: 4810
Total tweets collected so far: 4820
Total tweets collected so far: 4830
Total tweets collected so far: 4840
Total tweets collected so far: 4850
Total tweets collected so far: 4860
Total tweets collected so far: 4870
Total tweets collected so far: 4880
Total tweets collected so far: 4890
Total tweets collected so far: 4900
Total tweets collected so far: 4910
Total tweets collected so far: 4920
Total tweets collected so far: 4930
Total tweets collected so far: 4930
Total tweets collected so far: 4940
Total tweets collected so far: 4950
Total tweets collected so far: 4960
Total tweets collected so far: 4970
Total tweets collected so far: 4980
Total tweets collected so far: 4990
Total tweets collected so far: 5000
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 5220
Total tweets collected so far: 5230
Total tweets collected so far: 5240
Total tweets collected so far: 5250
Total tweets collected so far: 5260
Total tweets collected so far: 5270
Total tweets collected so far: 5280
Total tweets collected so far: 5290
Total tweets collected so far: 5300
Total tweets collected so far: 5310
Total tweets collected so far: 5320
Total tweets collected so far: 5330
Total tweets collected so far: 5340
Total tweets collected so far: 5350
Total tweets collected so far: 5360
Total tweets collected so far: 5370
Total tweets collected so far: 5380
Total tweets collected so far: 5390
Total tweets collected so far: 5400
Total tweets collected so far: 5410
Total tweets collected so far: 5420
Total tweets collected so far: 5430
Total tweets collected so far: 5440
Total tweets collected so far: 5450
Total tweets collected so far: 5460
Total tweets collected so far: 5470
Total tweets collected so far: 5480
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]
Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 5690
Total tweets collected so far: 5700
Total tweets collected so far: 5710
Total tweets collected so far: 5720
Total tweets collected so far: 5730
Total tweets collected so far: 5730
Total tweets collected so far: 5740
Total tweets collected so far: 5740
Total tweets collected so far: 5750
Total tweets collected so far: 5760
Total tweets collected so far: 5770
Total tweets collected so far: 5780
Total tweets collected so far: 5790
Total tweets collected so far: 5800
Total tweets collected so far: 5810
Total tweets collected so far: 5820
Total tweets collected so far: 5830
Total tweets collected so far: 5840
Total tweets collected so far: 5850
Total tweets collected so far: 5860
Total tweets collected so far: 5870
Total tweets collected so far: 5880
Total tweets collected so far: 5890
Total tweets collected so far: 5900
Total tweets collected so far: 5910
Total tweets collected so far: 5920
Total tweets collected so far: 5930
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 6140
Total tweets collected so far: 6150
Total tweets collected so far: 6160
Total tweets collected so far: 6170
Total tweets collected so far: 6180
Total tweets collected so far: 6190
Total tweets collected so far: 6190
Total tweets collected so far: 6200
Total tweets collected so far: 6210
Total tweets collected so far: 6220
Total tweets collected so far: 6230
Total tweets collected so far: 6240
Total tweets collected so far: 6250
Total tweets collected so far: 6260
Total tweets collected so far: 6270
Total tweets collected so far: 6280
Total tweets collected so far: 6290
Total tweets collected so far: 6290
Total tweets collected so far: 6300
Total tweets collected so far: 6310
Total tweets collected so far: 6320
Total tweets collected so far: 6330
Total tweets collected so far: 6340
Total tweets collected so far: 6350
Total tweets collected so far: 6360
Total tweets collected so far: 6370
Total tweets collected so far: 6380
Total tweets collected so fa

Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]
Waiting for 5 minutes: 100%|██████████████████████████████████████████████████████████| 300/300 [05:00<00:00,  1.00s/s]


Total tweets collected so far: 6590
Total tweets collected so far: 6590
Total tweets collected so far: 6600
Total tweets collected so far: 6610
Total tweets collected so far: 6620
Total tweets collected so far: 6630
Total tweets collected so far: 6640
Total tweets collected so far: 6650
Total tweets collected so far: 6660
Total tweets collected so far: 6670
Total tweets collected so far: 6680
Total tweets collected so far: 6690
Total tweets collected so far: 6700
Total tweets collected so far: 6710
Total tweets collected so far: 6710
Total tweets collected so far: 6720
Total tweets collected so far: 6730
Total tweets collected so far: 6740
Total tweets collected so far: 6750
Total tweets collected so far: 6760
Total tweets collected so far: 6770
Total tweets collected so far: 6780
Total tweets collected so far: 6790
Total tweets collected so far: 6800
Total tweets collected so far: 6810
Total tweets collected so far: 6820
Total tweets collected so far: 6830
Total tweets collected so fa

# INFY

In [4]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'INFY.csv'  # Updated file for Infosys data
SUMMARY_FILE = 'INFY_Collection_Summary.csv'  # Updated summary file for Infosys

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
cookies_files = ['cookies.json', 'cookies2.json', 'cookies3.json', 'cookies4.json']
current_cookie_index = 0

# Setup logging
logging.basicConfig(filename='infy_tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def switch_cookies():
    """Switches between cookies."""
    global current_cookie_index
    current_cookie_index = (current_cookie_index + 1) % len(cookies_files)
    client.load_cookies(cookies_files[current_cookie_index])
    logging.info(f'Switched to {cookies_files[current_cookie_index]}.')

def get_tweets(query, tweets):
    try:
        if tweets is None:
            logging.info(f'Getting tweets for query: {query}...')
            tweets = client.search_tweet(query, product='Top')
        else:
            wait_time = randint(5, 10)
            logging.info(f'Getting next tweets after {wait_time} seconds ...')
            time.sleep(wait_time)
            tweets = tweets.next()
    except TooManyRequests as e:
        rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
        current_time = datetime.now()
        wait_time = (rate_limit_reset - current_time).total_seconds()
        
        logging.warning(f'Rate limit reached. Switching cookies.')
        switch_cookies()  # Logging cookie switch

        # Retry with new cookies
        try:
            tweets = get_tweets(query, tweets)
            logging.info('Successfully resumed after switching cookies.')
        except TooManyRequests as e:
            logging.warning(f'Rate limit still in place even after switching cookies.')
            if wait_time > 0:
                logging.warning(f'Waiting until {rate_limit_reset}')
                delay_for_minutes(5)  # Reduced to 5 minutes
            else:
                logging.warning(f'Rate limit reset time is in the past. Waiting for 5 minutes as a precaution.')
                delay_for_minutes(5)
    except Exception as e:
        logging.error(f'An error occurred: {e}')
        if '403' in str(e):
            logging.error('403 Forbidden error. Check your cookies and API permissions.')
            switch_cookies()
    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(INFY OR Infosys OR "Infosys Limited" OR #INFY OR #Infosys OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=5):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    daily_tweet_data = []
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests:
            # Handle rate limit and cookie switch internally
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            daily_tweet_data.append([date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count])

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    # Write daily tweet data to CSV after processing all tweets for the day
    with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerows(daily_tweet_data)

    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)

Total tweets collected so far: 0
Total tweets collected so far: 10
Total tweets collected so far: 20
Total tweets collected so far: 30
Total tweets collected so far: 40
Total tweets collected so far: 50
Total tweets collected so far: 50
Total tweets collected so far: 60
Total tweets collected so far: 70
Total tweets collected so far: 80
Total tweets collected so far: 90
Total tweets collected so far: 100
Total tweets collected so far: 110
Total tweets collected so far: 120
Total tweets collected so far: 130
Total tweets collected so far: 140
Total tweets collected so far: 140
Total tweets collected so far: 150
Total tweets collected so far: 160
Total tweets collected so far: 160
Total tweets collected so far: 170
Total tweets collected so far: 180
Total tweets collected so far: 190
Total tweets collected so far: 200
Total tweets collected so far: 210
Total tweets collected so far: 210
Total tweets collected so far: 220
Total tweets collected so far: 230
Total tweets collected so far: 2

# ICICI Bank

In [5]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'ICICI.csv'  # Updated file for ICICI Bank data
SUMMARY_FILE = 'ICICI_Collection_Summary.csv'  # Updated summary file for ICICI Bank

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
cookies_files = ['cookies.json', 'cookies2.json', 'cookies3.json', 'cookies4.json']
current_cookie_index = 0

# Setup logging
logging.basicConfig(filename='icici_tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def switch_cookies():
    """Switches between cookies."""
    global current_cookie_index
    current_cookie_index = (current_cookie_index + 1) % len(cookies_files)
    client.load_cookies(cookies_files[current_cookie_index])
    logging.info(f'Switched to {cookies_files[current_cookie_index]}.')

def get_tweets(query, tweets):
    try:
        if tweets is None:
            logging.info(f'Getting tweets for query: {query}...')
            tweets = client.search_tweet(query, product='Top')
        else:
            wait_time = randint(5, 10)
            logging.info(f'Getting next tweets after {wait_time} seconds ...')
            time.sleep(wait_time)
            tweets = tweets.next()
    except TooManyRequests as e:
        rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
        current_time = datetime.now()
        wait_time = (rate_limit_reset - current_time).total_seconds()
        
        logging.warning(f'Rate limit reached. Switching cookies.')
        switch_cookies()  # Logging cookie switch

        # Retry with new cookies
        try:
            tweets = get_tweets(query, tweets)
            logging.info('Successfully resumed after switching cookies.')
        except TooManyRequests as e:
            logging.warning(f'Rate limit still in place even after switching cookies.')
            if wait_time > 0:
                logging.warning(f'Waiting until {rate_limit_reset}')
                delay_for_minutes(5)  # Reduced to 5 minutes
            else:
                logging.warning(f'Rate limit reset time is in the past. Waiting for 5 minutes as a precaution.')
                delay_for_minutes(5)
    except Exception as e:
        logging.error(f'An error occurred: {e}')
        if '403' in str(e):
            logging.error('403 Forbidden error. Check your cookies and API permissions.')
            switch_cookies()
    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(ICICI OR ICICIBank OR "ICICI Bank" OR #ICICIBank OR #ICICI OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=5):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    daily_tweet_data = []
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests:
            # Handle rate limit and cookie switch internally
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            daily_tweet_data.append([date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count])

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    # Write daily tweet data to CSV after processing all tweets for the day
    with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerows(daily_tweet_data)

    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')


def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()

# Calculate total number of days to collect tweets for progress bar
total_days = (END_DATE - start_date).days + 1

if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range with progress bar
    with tqdm(total=total_days, desc="Collecting Tweets", unit="day") as pbar:
        current_date = start_date
        while current_date <= END_DATE:
            collect_tweets_for_day(current_date)
            current_date += timedelta(days=1)
            pbar.update(1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


Collecting Tweets: 100%|████████████████████████████████████████████████████████████| 730/730 [47:44<00:00,  3.92s/day]

Finished collecting tweets. Total tweets collected: 6640





# HINDUNILVR

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'HINDUNILVR.csv'  # File to store Hindustan Unilever tweets
SUMMARY_FILE = 'HINDUNILVR_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(HUL OR HINDUNILVR OR "Hindustan Unilever" OR #HUL OR #HINDUNILVR OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


# ITC

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'ITC.csv'  # File to store ITC tweets
SUMMARY_FILE = 'ITC_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(ITC OR #ITC OR "ITC Ltd" OR #ITCLimited OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


# SBIN

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'SBIN.csv'  # File to store SBIN tweets
SUMMARY_FILE = 'SBIN_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(SBIN OR "State Bank of India" OR #SBI OR #SBIN OR "SBI Bank" OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)
