In [3]:
import tweepy
import re
import pickle
import numpy as np
import time
import tensorflow as tf
from transformers import BertTokenizer
import torch
from tensorflow import keras

In [4]:
# Load the model without the optimizer
model = keras.models.load_model("best_bilstm_model.keras", compile=False)

# Recompile with the same optimizer (optional, if you plan to retrain)
model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"])

In [5]:
# Load pre-trained BERT embeddings
with open("bert_embeddings.pkl", "rb") as f:
    bert_embeddings = pickle.load(f)  # Load actual BERT embeddings

In [6]:
# Load the same BERT model used for training
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [None]:
BEARER_TOKEN = ""
client = tweepy.Client(bearer_token=BEARER_TOKEN)

In [8]:
# Initialize Tweepy Client
client = tweepy.Client(bearer_token=BEARER_TOKEN)

In [9]:
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'@\w+', '', text)  # Remove mentions (@user)
    text = re.sub(r'#\w+', '', text)  # Remove hashtags
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters & punctuation
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text

In [10]:
def get_bert_embedding(text):
    """Convert a sentence into its BERT embedding."""
    words = text.split()
    word_vectors = [bert_embeddings[word] for word in words if word in bert_embeddings]
    
    if not word_vectors:
        return np.zeros((768,))  # Return zero vector if no words found
    
    return np.mean(word_vectors, axis=0)

In [11]:
def fetch_tweets_and_replies(keyword, num_tweets=10):
    tweets_data = []

    try:
        # Fetch recent tweets
        tweets = client.search_recent_tweets(query=keyword, max_results=num_tweets, tweet_fields=["conversation_id", "author_id"])

        if tweets.data:
            for tweet in tweets.data:
                conversation_id = tweet.id
                tweet_text = clean_text(tweet.text)  # Clean tweet text
                replies = []
                
                # Fetch replies for the tweet
                reply_query = f"conversation_id:{conversation_id} -is:retweet"
                
                try:
                    reply_tweets = client.search_recent_tweets(query=reply_query, max_results=5)
                    if reply_tweets.data:
                        replies = [clean_text(reply.text) for reply in reply_tweets.data]  # Clean replies
                except tweepy.TooManyRequests:
                    print("Rate limit exceeded while fetching replies. Waiting for 15 minutes...")
                    time.sleep(900)  # Wait for 15 minutes
                    return fetch_tweets_and_replies(keyword, num_tweets)

                tweets_data.append({"tweet": tweet_text, "replies": replies})

    except tweepy.TooManyRequests:
        print("Rate limit exceeded. Waiting for 15 minutes...")
        time.sleep(900)  # Wait for 15 minutes
        return fetch_tweets_and_replies(keyword, num_tweets)

    return tweets_data

In [12]:
def classify_comments(comments):
    """Convert comments into BERT embeddings and predict sentiment using BiLSTM model."""
    processed_comments = [clean_text(comment) for comment in comments]
    embeddings = np.array([get_bert_embedding(comment) for comment in processed_comments])
    
    predictions = model.predict(embeddings)  # Get model predictions
    sentiment_counts = {"positive": 0, "negative": 0, "neutral": 0}
    
    for pred in predictions:
        if pred >= 0.6:  # Positive sentiment
            sentiment_counts["positive"] += 1
        elif pred <= 0.4:  # Negative sentiment
            sentiment_counts["negative"] += 1
        else:  # Neutral sentiment
            sentiment_counts["neutral"] += 1
    
    total = sum(sentiment_counts.values())
    sentiment_percentages = {k: round((v / total) * 100, 2) for k, v in sentiment_counts.items()}

    return sentiment_percentages

In [None]:
# Fetch tweets and classify their sentiment
keyword = "AI technology"  # Change keyword
tweets = fetch_tweets_and_replies(keyword)

if not tweets:
    print("No tweets found.")
else:
    all_comments = []
    for tweet in tweets:
        all_comments.append(tweet["tweet"])
        all_comments.extend(tweet["replies"])

    if all_comments:
        sentiment_result = classify_comments(all_comments)

        # Display Sentiment Analysis Result
        print("\nSentiment Breakdown:")
        for sentiment, percentage in sentiment_result.items():
            print(f"{sentiment.capitalize()}: {percentage}%")
    else:
        print("No comments to analyze.")

Rate limit exceeded. Waiting for 15 minutes...
