In [1]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

In [7]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm
from requests.exceptions import ReadTimeout

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'TCS.csv'  # Updated file for TCS data
SUMMARY_FILE = 'TCS_Collection_Summary.csv'  # Updated summary file for TCS

# Initialize Twitter client with the first cookie set
client = Client(timeout=10)  # Set timeout to 10 seconds
current_cookies = 'cookies.json'
client.load_cookies(current_cookies)

# Setup logging
logging.basicConfig(filename='tcs_tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(TCS OR "Tata Consultancy Services" OR #TCS OR #TataConsultancy OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def switch_cookies():
    global current_cookies
    if current_cookies == 'cookies.json':
        current_cookies = 'cookies2.json'
    else:
        current_cookies = 'cookies.json'
    
    logging.info(f'Switching to {current_cookies} due to rate limit...')
    client.load_cookies(current_cookies)

def delay_for_minutes(minutes=5):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            
            # Switch cookies and retry
            logging.warning(f'Rate limit reached. Switching cookies.')
            switch_cookies()
            
            # Immediately retry with new cookies
            try:
                tweets = get_tweets(query, tweets)
                logging.info('Successfully resumed after switching cookies.')
            except TooManyRequests as e:
                logging.warning(f'Rate limit still in place even after switching cookies.')
                if wait_time > 0:
                    logging.warning(f'Waiting until {rate_limit_reset}')
                    delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
                else:
                    logging.warning(f'Rate limit reset time is in the past. Waiting for 15 minutes as a precaution.')
                    delay_for_minutes(5)  # 15-minute delay in case of past reset time
            continue

        except ReadTimeout:
            logging.error(f'ReadTimeout occurred. Retrying in a few seconds...')
            time.sleep(5)  # Small delay before retrying
            continue  # Retry the loop to fetch the tweets again

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


Total tweets collected so far: 10
Total tweets collected so far: 20
Total tweets collected so far: 30
Total tweets collected so far: 40
Total tweets collected so far: 50
Total tweets collected so far: 60
Total tweets collected so far: 70
Total tweets collected so far: 70
Total tweets collected so far: 80
Total tweets collected so far: 90
Total tweets collected so far: 100
Total tweets collected so far: 110
Total tweets collected so far: 120
Total tweets collected so far: 130


Waiting for 15 minutes:   1%|▌                                                          | 9/900 [00:10<16:33,  1.11s/s]


KeyboardInterrupt: 

# HDFC Bank

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'HDFC_Bank.csv'  # File to store HDFC Bank tweets
SUMMARY_FILE = 'HDFC_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(HDFC OR "HDFC Bank" OR #HDFC OR #HDFCBank OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)

# INFY

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'INFY.csv'  # File to store Infosys (INFY) tweets
SUMMARY_FILE = 'INFY_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(Infosys OR INFY OR "Infosys Ltd" OR #Infosys OR #INFY OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


# ICICI Bank

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'ICICIBANK.csv'  # File to store ICICI Bank tweets
SUMMARY_FILE = 'ICICIBANK_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(ICICI OR ICICIBank OR "ICICI Bank" OR #ICICIBank OR #ICICI OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


# HINDUNILVR

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'HINDUNILVR.csv'  # File to store Hindustan Unilever tweets
SUMMARY_FILE = 'HINDUNILVR_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(HUL OR HINDUNILVR OR "Hindustan Unilever" OR #HUL OR #HINDUNILVR OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


# ITC

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'ITC.csv'  # File to store ITC tweets
SUMMARY_FILE = 'ITC_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(ITC OR #ITC OR "ITC Ltd" OR #ITCLimited OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)


# SBIN

In [None]:
import csv
import time
import logging
from datetime import datetime, timedelta
from random import randint
from twikit import Client, TooManyRequests
import os
from tqdm import tqdm

# Configuration
MINIMUM_TWEETS = 10
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
DELAY_BETWEEN_REQUESTS = 1  # Delay between requests in seconds
CSV_FILE = 'SBIN.csv'  # File to store SBIN tweets
SUMMARY_FILE = 'SBIN_Collection_Summary.csv'  # File to store collection summary

# Initialize Twitter client
client = Client()  # Initialize your Twitter client
client.load_cookies('cookies.json')

# Setup logging
logging.basicConfig(filename='tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize counters
total_tweets_collected = 0
daily_stats = []

def get_tweets(query, tweets):
    if tweets is None:
        logging.info(f'Getting tweets for query: {query}...')
        tweets = client.search_tweet(query, product='Top')
    else:
        wait_time = randint(5, 10)
        logging.info(f'Getting next tweets after {wait_time} seconds ...')
        time.sleep(wait_time)
        tweets = tweets.next()

    return tweets

def construct_query(date):
    next_day = date + timedelta(days=1)
    query = (f'(SBIN OR "State Bank of India" OR #SBI OR #SBIN OR "SBI Bank" OR '
             f'Stock OR Market OR #StockMarket OR #MarketAnalysis) '
             f'lang:en until:{next_day.strftime("%Y-%m-%d")} since:{date.strftime("%Y-%m-%d")}')
    return query

def delay_for_minutes(minutes=15):
    """Delays for the specified number of minutes, showing progress with tqdm."""
    wait_time = minutes * 60  # Convert minutes to seconds
    with tqdm(total=wait_time, desc=f'Waiting for {minutes} minutes', unit='s', leave=True) as rate_pbar:
        for _ in range(wait_time):
            time.sleep(1)
            rate_pbar.update(1)

def collect_tweets_for_day(date):
    global total_tweets_collected
    query = construct_query(date)
    tweet_count = 0
    tweets = None
    
    while tweet_count < MINIMUM_TWEETS:
        try:
            tweets = get_tweets(query, tweets)
        except TooManyRequests as e:
            rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)
            current_time = datetime.now()
            wait_time = (rate_limit_reset - current_time).total_seconds()
            if wait_time > 0:
                logging.warning(f'Rate limit reached. Waiting until {rate_limit_reset}')
                delay_for_minutes(wait_time // 60)  # Convert wait_time to minutes for tqdm
            else:
                logging.warning(f'Rate limit reset time is in the past. Continuing immediately.')
                delay_for_minutes(15)  # 15-minute delay in case of past reset time
            continue

        if not tweets:
            logging.info(f'No more tweets found for {date.strftime("%Y-%m-%d")}')
            break

        for tweet in tweets:
            if tweet_count >= MINIMUM_TWEETS:
                break
            tweet_count += 1
            total_tweets_collected += 1
            tweet_data = [date.strftime('%Y-%m-%d'), tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]
            
            with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(tweet_data)

        logging.info(f'Got {tweet_count} tweets for {date.strftime("%Y-%m-%d")}')
        daily_stats.append([date.strftime('%Y-%m-%d'), tweet_count])
    
    if tweet_count < MINIMUM_TWEETS:
        logging.warning(f'Collected {tweet_count} tweets for {date.strftime("%Y-%m-%d")}, which is less than the minimum of {MINIMUM_TWEETS}')
    
    logging.info(f'Done for {date.strftime("%Y-%m-%d")}')
    print(f'Total tweets collected so far: {total_tweets_collected}')

def get_last_collected_date():
    """Reads the CSV file to find the last collected date."""
    if not os.path.exists(CSV_FILE):
        return START_DATE
    with open(CSV_FILE, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        rows = list(reader)
        if len(rows) > 1:
            last_date = rows[-1][0]
            return datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)
    return START_DATE

# Write header to the CSV file if it doesn't exist
if not os.path.exists(CSV_FILE):
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweet Count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])

# Get the starting date
start_date = get_last_collected_date()
if start_date > END_DATE:
    print(f'All data has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
else:
    # Collect tweets for the specified date range
    current_date = start_date
    while current_date <= END_DATE:
        collect_tweets_for_day(current_date)
        current_date += timedelta(days=1)

    print(f'Finished collecting tweets. Total tweets collected: {total_tweets_collected}')

    # Save summary to a CSV file
    with open(SUMMARY_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Date', 'Tweets Collected'])
        writer.writerows(daily_stats)
