In [1]:
import logging
from datetime import datetime, timedelta
from tqdm import tqdm
from scraper import collect_tweets_for_day, get_last_collected_date

In [None]:
import logging
from datetime import datetime, timedelta
from tqdm import tqdm
from scraper import collect_tweets_for_day, get_last_collected_date

# Configuration
START_DATE = datetime(2022, 1, 1)  # Start date of the range
END_DATE = datetime(2023, 12, 31)  # End date of the range
STOCKS = ["ITC.NS", "SBIN.NS", "KOTAKBANK.NS", "BHARTIARTL.NS", "HCLTECH.NS", "LT.NS", "AXISBANK.NS",
          "ASIANPAINT.NS", "BAJFINANCE.NS", "MARUTI.NS", "M&M.NS", "SUNPHARMA.NS", "TITAN.NS", "ULTRACEMCO.NS",
          "NESTLEIND.NS", "INDUSINDBK.NS", "ADANIENT.NS", "POWERGRID.NS", "NTPC.NS", "TATASTEEL.NS", "JSWSTEEL.NS",
          "ONGC.NS", "BAJAJFINSV.NS", "DIVISLAB.NS", "TECHM.NS", "WIPRO.NS", "GRASIM.NS", "BRITANNIA.NS", 
          "CIPLA.NS", "ADANIGREEN.NS", "ADANIPORTS.NS", "HEROMOTOCO.NS", "COALINDIA.NS", "BPCL.NS", 
          "APOLLOHOSP.NS", "TATAMOTORS.NS", "HDFCLIFE.NS", "DABUR.NS", "BAJAJ-AUTO.NS", "DRREDDY.NS", 
          "SBILIFE.NS", "EICHERMOT.NS", "HINDALCO.NS", "ICICIGI.NS"]

QUERY_BASE_TEMPLATE = '{} OR Stock OR Market OR #StockMarket OR #MarketAnalysis'  # Base query

# Setup logging
logging.basicConfig(filename='stock_tweet_collection.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

for stock in STOCKS:
    CSV_FILE = f'{stock.replace(".NS", "")}.csv'  # Create a unique CSV file for each stock
    QUERY_BASE = QUERY_BASE_TEMPLATE.format(stock)
    
    # Get the starting date
    start_date = get_last_collected_date(CSV_FILE, START_DATE)

    # Calculate total number of days to collect tweets for progress bar
    total_days = (END_DATE - start_date).days + 1

    if start_date > END_DATE:
        print(f'All data for {stock} has already been collected up to {END_DATE.strftime("%Y-%m-%d")}.')
        continue
    else:
        # Collect tweets for the specified date range with progress bar
        with tqdm(total=total_days, desc=f"Collecting Tweets for {stock}", unit="day") as pbar:
            current_date = start_date
            while current_date <= END_DATE:
                collect_tweets_for_day(QUERY_BASE, current_date, CSV_FILE)
                current_date += timedelta(days=1)
                pbar.update(1)

        print(f'Finished collecting tweets for {stock}.')