# Global Tweet Sentiment Analysis - Mood of the World { Free API Version }

# Install Required Packages

In [None]:
!pip install tweepy textblob plotly pandas numpy wordcloud matplotlib seaborn python-dotenv scikit-learn --quiet
!python -m textblob.download_corpora

# Import Libraries

In [None]:
import tweepy
import pandas as pd
import numpy as np
from textblob import TextBlob
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline as pyo
from datetime import datetime, timedelta
import re
import json
from collections import Counter
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
pyo.init_notebook_mode(connected=True)

# Twitter API Configuration
## Note : get API from https://developer.twitter.com/


In [None]:
class TwitterConfig:
    def __init__(self):
        # Replace with your actual API credentials
        self.API_KEY = "YOUR_API_KEY"
        self.API_SECRET = "YOUR_API_SECRET"
        self.ACCESS_TOKEN = "YOUR_ACCESS_TOKEN"
        self.ACCESS_TOKEN_SECRET = "YOUR_ACCESS_TOKEN_SECRET"
        self.BEARER_TOKEN = "YOUR_BEARER_TOKEN"

        # Rate limit settings
        self.FREE_TIER_MONTHLY_LIMIT = 100
        self.DAILY_BUDGET = 10
        self.BATCH_SIZE = 25
        self.REQUEST_DELAY = 1.5

    def get_api_v1(self):
        """Get Twitter API v1.1 client"""
        auth = tweepy.OAuthHandler(self.API_KEY, self.API_SECRET)
        auth.set_access_token(self.ACCESS_TOKEN, self.ACCESS_TOKEN_SECRET)
        return tweepy.API(auth, wait_on_rate_limit=True)

    def get_api_v2(self):
        """Get Twitter API v2 client"""
        return tweepy.Client(
            bearer_token=self.BEARER_TOKEN,
            consumer_key=self.API_KEY,
            consumer_secret=self.API_SECRET,
            access_token=self.ACCESS_TOKEN,
            access_token_secret=self.ACCESS_TOKEN_SECRET,
            wait_on_rate_limit=True
        )
config = TwitterConfig()

# Data Collection Via API or Sample Data for Demonstration

In [None]:
class GlobalTweetCollector:
    def __init__(self, api_v1=None, api_v2=None):
        self.api_v1 = api_v1
        self.api_v2 = api_v2
        self.tweets_data = []

    def clean_tweet(self, text):
        """tweet cleaning"""
        text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
        text = re.sub(r'@\w+|#\w+', '', text)
        text = re.sub(r'\s+', ' ', text)
        text = re.sub(r'[^\w\s\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF]', '', text)
        return text.strip()

    def get_enhanced_sentiment(self, text):
        """sentiment analysis with confidence scores"""
        try:
            blob = TextBlob(text)
            polarity = blob.sentiment.polarity
            subjectivity = blob.sentiment.subjectivity
            if polarity > 0.2:
                sentiment = 'Positive'
                confidence = abs(polarity)
            elif polarity < -0.2:
                sentiment = 'Negative'
                confidence = abs(polarity)
            else:
                sentiment = 'Neutral'
                confidence = 1 - abs(polarity)
            emotion = self.detect_emotion(text.lower())

            return {
                'sentiment': sentiment,
                'polarity': polarity,
                'subjectivity': subjectivity,
                'confidence': confidence,
                'emotion': emotion,
                'word_count': len(text.split())
            }
        except Exception as e:
            return {
                'sentiment': 'Neutral',
                'polarity': 0.0,
                'subjectivity': 0.0,
                'confidence': 0.0,
                'emotion': 'neutral',
                'word_count': 0
            }

    def detect_emotion(self, text):
        """Detect basic emotions from text"""
        emotion_keywords = {
            'joy': ['happy', 'excited', 'great', 'wonderful', 'amazing', 'love', 'perfect'],
            'sadness': ['sad', 'depressed', 'down', 'upset', 'disappointed', 'lonely'],
            'anger': ['angry', 'mad', 'furious', 'annoyed', 'frustrated', 'hate'],
            'fear': ['scared', 'afraid', 'worried', 'anxious', 'nervous', 'panic'],
            'surprise': ['surprised', 'shocked', 'unexpected', 'wow', 'omg'],
            'gratitude': ['grateful', 'thankful', 'blessed', 'appreciate', 'lucky']
        }

        emotion_scores = {}
        for emotion, keywords in emotion_keywords.items():
            score = sum(1 for keyword in keywords if keyword in text)
            if score > 0:
                emotion_scores[emotion] = score

        if emotion_scores:
            return max(emotion_scores, key=emotion_scores.get)
        return 'neutral'

    def collect_tweets_with_api(self, keywords=None, count=50):
        """Collect tweets using Twitter API"""
        if not self.api_v2:
            print("Twitter API not configured")
            return pd.DataFrame()

        if keywords is None:
            keywords = ["feeling good", "having a great day", "feeling down", "stressed out", "grateful today"]

        print(f"Collecting tweets using Twitter API...")
        print(f"Target: {count} tweets across {len(keywords)} keywords")

        tweets_per_keyword = max(1, count // len(keywords))

        for i, keyword in enumerate(keywords):
            if len(self.tweets_data) >= count:
                break

            try:
                print(f"Searching: '{keyword}' ({i+1}/{len(keywords)})")

                tweets = self.api_v2.search_recent_tweets(
                    query=f'"{keyword}" -is:retweet lang:en',
                    tweet_fields=['created_at', 'author_id', 'public_metrics', 'context_annotations'],
                    max_results=min(tweets_per_keyword, 100)
                )

                if tweets.data:
                    for tweet in tweets.data:
                        if len(self.tweets_data) >= count:
                            break

                        cleaned_text = self.clean_tweet(tweet.text)
                        if len(cleaned_text) > 15:
                            sentiment_data = self.get_enhanced_sentiment(cleaned_text)

                            tweet_data = {
                                'id': tweet.id,
                                'text': cleaned_text,
                                'original_text': tweet.text,
                                'created_at': tweet.created_at,
                                'author_id': tweet.author_id,
                                'keyword': keyword,
                                'source': 'twitter_api',
                                **sentiment_data,
                                'retweet_count': tweet.public_metrics.get('retweet_count', 0) if tweet.public_metrics else 0,
                                'like_count': tweet.public_metrics.get('like_count', 0) if tweet.public_metrics else 0,
                            }

                            self.tweets_data.append(tweet_data)

                import time
                time.sleep(config.REQUEST_DELAY)

            except Exception as e:
                print(f"Error with '{keyword}': {str(e)}")
                if "rate limit" in str(e).lower():
                    print("Rate limit reached - switching to sample data")
                    break

        print(f"Collected {len(self.tweets_data)} real tweets")
        return pd.DataFrame(self.tweets_data)

    def generate_realistic_sample_data(self, size=1000):
        """Generate realistic sample data with patterns"""
        print(f" Generating {size} realistic sample tweets...")

        tweet_templates = {
            'positive': [
                "Just had the most amazing coffee this morning! ☀️",
                "Feeling incredibly grateful for my family today 💕",
                "Beautiful sunset tonight, life is good! 🌅",
                "Finally finished that project I've been working on! 🎉",
                "Met up with old friends today, such a perfect day! 😊",
                "Got some great news today, feeling so blessed! ✨",
                "Nothing beats a good workout to start the day! 💪",
                "Weekend vibes are hitting different today! 🌟",
                "Sometimes the little things make me the happiest 💝",
                "Accomplished so much today, feeling proud! 🏆"
            ],
            'negative': [
                "Having one of those days where nothing goes right 😔",
                "Really struggling to find motivation lately",
                "Work has been incredibly stressful this week",
                "Feeling overwhelmed with everything going on",
                "Just need a break from all this chaos",
                "When will things start looking up? 😞",
                "Another sleepless night ahead of me",
                "Can't seem to catch a break these days",
                "Feeling disconnected from everyone lately",
                "This weather is really getting me down"
            ],
            'neutral': [
                "Just another typical Monday morning",
                "Regular day at the office, nothing special",
                "Weather looks okay for the weekend",
                "Going through my usual routine today",
                "Standard Tuesday, same as always",
                "Just checking in, hope everyone's well",
                "Another day, another dollar as they say",
                "Normal evening at home tonight",
                "Regular coffee meeting this afternoon",
                "Just going with the flow today"
            ]
        }

        emotions_dist = {'positive': 0.45, 'negative': 0.25, 'neutral': 0.30}
        sample_data = []
        end_date = datetime.now()
        start_date = end_date - timedelta(days=7)
        timestamps = pd.date_range(start=start_date, end=end_date, periods=size)

        for i, timestamp in enumerate(timestamps):
            hour = timestamp.hour
            day_of_week = timestamp.weekday()

            if 7 <= hour <= 10:
                pos_boost = 0.15
            elif 18 <= hour <= 21:
                pos_boost = 0.10
            elif 23 <= hour <= 2:
                pos_boost = -0.20
            elif day_of_week >= 5:
                pos_boost = 0.12
            else:
                pos_boost = 0

            adjusted_pos = min(0.7, emotions_dist['positive'] + pos_boost)
            adjusted_neg = max(0.1, emotions_dist['negative'] - pos_boost/2)
            adjusted_neu = 1 - adjusted_pos - adjusted_neg

            sentiment_choice = np.random.choice(
                ['positive', 'negative', 'neutral'],
                p=[adjusted_pos, adjusted_neg, adjusted_neu]
            )

            template = np.random.choice(tweet_templates[sentiment_choice])

            variations = {
                'positive': ['!', ' 🙂', ' Today was good.', ' Feeling blessed.'],
                'negative': ['...', ' 😕', ' Not my best day.', ' Hope tomorrow is better.'],
                'neutral': ['.', ' Just saying.', ' That\'s life.', ' Moving on.']
            }

            text = template + np.random.choice(variations[sentiment_choice])

            sentiment_data = self.get_enhanced_sentiment(text)

            if sentiment_choice == 'positive':
                likes = np.random.poisson(8)
                retweets = np.random.poisson(2)
            elif sentiment_choice == 'negative':
                likes = np.random.poisson(3)
                retweets = np.random.poisson(1)
            else:
                likes = np.random.poisson(4)
                retweets = np.random.poisson(1)

            sample_data.append({
                'id': f'sample_{i}',
                'text': text,
                'original_text': text,
                'created_at': timestamp,
                'author_id': f'user_{i % 100}',
                'keyword': np.random.choice(['mood', 'feeling', 'day', 'life', 'today']),
                'source': 'sample_data',
                **sentiment_data,
                'like_count': likes,
                'retweet_count': retweets,
            })

        print(f"Generated {len(sample_data)} realistic sample tweets")
        return pd.DataFrame(sample_data)

# Initialize APIs and Collect Data

In [None]:
def initialize_data_collection():
    """Initialize data collection with fallback options"""
    collector = GlobalTweetCollector()
    df_final = pd.DataFrame()

    try:
        print("Attempting Twitter API connection...")
        api_v1 = config.get_api_v1()
        api_v2 = config.get_api_v2()

        me = api_v1.verify_credentials()
        if me:
            print(f"Connected as: @{me.screen_name}")
            collector.api_v1 = api_v1
            collector.api_v2 = api_v2

            df_real = collector.collect_tweets_with_api(count=config.DAILY_BUDGET)
            if len(df_real) > 0:
                df_final = pd.concat([df_final, df_real], ignore_index=True)
                print(f"Added {len(df_real)} real tweets")

    except Exception as e:
        print(f"Twitter API issue: {str(e)[:100]}...")
        print("Continuing with sample data only")

    print("\nGenerating comprehensive sample dataset...")
    df_sample = collector.generate_realistic_sample_data(1200)
    df_final = pd.concat([df_final, df_sample], ignore_index=True)

    df_final['created_at'] = pd.to_datetime(df_final['created_at'])
    df_final['hour'] = df_final['created_at'].dt.hour
    df_final['day_of_week'] = df_final['created_at'].dt.day_name()
    df_final['date'] = df_final['created_at'].dt.date
    df_final['is_weekend'] = df_final['created_at'].dt.weekday >= 5

    print(f"\n Final dataset ready!")
    print(f"Total tweets: {len(df_final):,}")
    print(f"Real tweets: {len(df_final[df_final['source'] == 'twitter_api']):,}")
    print(f"Sample tweets: {len(df_final[df_final['source'] == 'sample_data']):,}")
    print(f"Date range: {df_final['created_at'].min().date()} to {df_final['created_at'].max().date()}")

    return df_final, collector

df_tweets, collector = initialize_data_collection()