IMPORT LIBRARIES

INSTALLING SNSCRAPE - ALTERNATIVE FOR TWITTER API BECAUSE OF FEW RESTRICTIONS

In [35]:
from config import TWITTER_CREDS, STOCK_SYMBOL, LOOKBACK_DAYS
import tweepy
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta,time
import os
import snscrape.modules.twitter as sntwitter
from dotenv import load_dotenv

CONFIGURATION

In [26]:
STOCK_SYMBOL = "SBIN.NS"  # SBI NSE symbol
LOOKBACK_DAYS = 30
os.makedirs("data/raw", exist_ok=True)

TWITTER AUTHENTICATION

In [36]:
# Load config
load_dotenv()
TWITTER_API_KEY = os.getenv('TWITTER_API_KEY')
TWITTER_API_SECRET = os.getenv('TWITTER_API_SECRET')
TWITTER_ACCESS_TOKEN = os.getenv('TWITTER_ACCESS_TOKEN')
TWITTER_ACCESS_SECRET = os.getenv('TWITTER_ACCESS_SECRET')

SEARCH QUERY

In [40]:
start_date = (datetime.now() - timedelta(days=LOOKBACK_DAYS)).strftime('%Y-%m-%d')
end_date = datetime.now().strftime('%Y-%m-%d')
query = f"""({STOCK_SYMBOL} OR SBI OR "State Bank of India" OR #SBI) 
since:{start_date} until:{end_date} -is:retweet -is:reply lang:en"""

print(f"🔄 Collecting tweets for {STOCK_SYMBOL}...")

🔄 Collecting tweets for HDFCBANK.NS...


FETCH TWEETS

===== OPTION 1: TWEEPY (Twitter API) =====

In [None]:

try:
    print("\nAttempting Tweepy (Twitter API)...")
    auth = tweepy.OAuthHandler(TWITTER_API_KEY, TWITTER_API_SECRET)
    auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_SECRET)
    api = tweepy.API(auth, wait_on_rate_limit=True)
    
    tweets = []
    for tweet in tweepy.Cursor(api.search_tweets,
                              q=query,
                              lang="en",
                              tweet_mode='extended').items(500):  # Reduced for testing
        tweets.append({
            'date': tweet.created_at,
            'text': tweet.full_text,
            'user': tweet.user.username  # Changed from screen_name to username
        })
    
    # Standardized filename: SBIN.NS_tweets.csv
    tweets_df = pd.DataFrame(tweets)
    tweets_df.to_csv(f"data/raw/{STOCK_SYMBOL}_tweets.csv", index=False)
    print(f"✅ Tweepy: Saved {len(tweets_df)} tweets to {STOCK_SYMBOL}_tweets.csv")

except Exception as e:
    print(f"❌ Tweepy failed: {e}")
    print("Falling back to snscrape...")


Attempting Tweepy (Twitter API)...
❌ Tweepy failed: 403 Forbidden
453 - You currently have access to a subset of X API V2 endpoints and limited v1.1 endpoints (e.g. media post, oauth) only. If you need access to this endpoint, you may need a different access level. You can learn more here: https://developer.x.com/en/portal/product
Falling back to snscrape...


 ===== OPTION 2: SNSCRAPE (NO API) =====

In [None]:

try:
        print("\nTrying snscrape...")
        tweets = []
        for i, tweet in enumerate(
            sntwitter.TwitterSearchScraper(query).get_items()
        ):
            if i >= 500:  # Limit to 500 tweets
                break
            tweets.append({
                'date': tweet.date,
                'text': tweet.content,
                'user': tweet.user.username
            })

        # Standardized filename: SBIN.NS_tweets.csv
        tweets_df = pd.DataFrame(tweets)
        tweets_df.to_csv(f"data/raw/{STOCK_SYMBOL}_tweets.csv", index=False)
        print(f"✅ snscrape: Saved {len(tweets_df)} tweets to {STOCK_SYMBOL}_tweets.csv")

except Exception as e:
        print(f"❌ snscrape failed: {e}")


Trying snscrape...


Errors: blocked (403), blocked (404), blocked (404), blocked (404)




===== OPTION 3: YFINANCE =====

In [44]:
# ===== FETCH STOCK DATA (YFINANCE) =====
try:
    print("\nFetching stock data...")
    stock_df = yf.download(STOCK_SYMBOL, period=f"{LOOKBACK_DAYS}d")
    # Standardized to use _tweets.csv pattern for all files
    stock_df.to_csv(f"data/raw/{STOCK_SYMBOL}_tweets.csv")  # Now using _tweets.csv
    print(f"✅ Stock data saved to {STOCK_SYMBOL}_tweets.csv")
except Exception as e:
    print(f"❌ Yahoo Finance Error: {e}")


Fetching stock data...


[*********************100%***********************]  1 of 1 completed

✅ Stock data saved to HDFCBANK.NS_tweets.csv



