# Twitter Mood Detection System - Experimentation Notebook

This notebook provides a playground for experimenting with the Twitter Mood Detection System.

## Features:
- Tweet collection and preprocessing
- Multiple sentiment analysis methods
- Data visualization and analysis
- Model comparison and evaluation


In [1]:
# Import required libraries
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Add src directory to path
sys.path.append(os.path.join('..', 'src'))

from config import Config
from twitter_collector import TwitterCollector
from data_preprocessor import DataPreprocessor
from sentiment_analyzer import SentimentAnalyzer
from analyzer import MoodAnalyzer

print("‚úÖ All modules imported successfully!")


ModuleNotFoundError: No module named 'pandas'

In [None]:
# Configuration check
try:
    Config.validate_config()
    print("‚úÖ Configuration valid")
except ValueError as e:
    print(f"‚ùå Configuration error: {e}")
    print("Please set your TWITTER_BEARER_TOKEN in the .env file")


## Example: Tweet Collection and Analysis

Let's collect some tweets and analyze their sentiment:


In [None]:
# Initialize components
collector = TwitterCollector()
preprocessor = DataPreprocessor()
sentiment_analyzer = SentimentAnalyzer()
mood_analyzer = MoodAnalyzer()

# Define keywords for analysis
keywords = ["happy", "sad", "excited", "angry"]
print(f"Keywords: {keywords}")

# Collect tweets (small sample for experimentation)
print("Collecting tweets...")
tweets_df = collector.collect_mood_tweets(keywords, max_tweets_per_keyword=25)
print(f"Collected {len(tweets_df)} tweets")

# Display sample tweets
if not tweets_df.empty:
    print("\nSample tweets:")
    display(tweets_df[['text', 'keyword']].head())
else:
    print("No tweets collected. Check your API credentials or try different keywords.")


In [None]:
# Preprocess tweets
if not tweets_df.empty:
    print("Preprocessing tweets...")
    tweets_df = preprocessor.preprocess_tweets(tweets_df)
    print(f"Preprocessed {len(tweets_df)} tweets")
    
    # Show preprocessing results
    print("\nPreprocessing example:")
    sample = tweets_df.iloc[0]
    print(f"Original: {sample['text']}")
    print(f"Cleaned: {sample['cleaned_text']}")
    print(f"Lemmatized: {sample['lemmatized_text']}")
else:
    print("No tweets to preprocess")


In [None]:
# Sentiment Analysis
if not tweets_df.empty:
    print("Analyzing sentiment with multiple methods...")
    
    # Analyze with all methods
    sentiment_results = sentiment_analyzer.analyze_sentiment_batch(
        tweets_df['lemmatized_text'].tolist(),
        method='all'
    )
    
    # Merge results
    for col in sentiment_results.columns:
        if col != 'text':
            tweets_df[col] = sentiment_results[col]
    
    print("Sentiment analysis complete!")
    
    # Show sample results
    print("\nSample sentiment results:")
    display(tweets_df[['text', 'vader_sentiment', 'textblob_sentiment', 'transformer_sentiment']].head())
else:
    print("No tweets to analyze")


In [None]:
# Visualization and Analysis
if not tweets_df.empty and 'vader_sentiment' in tweets_df.columns:
    print("Creating visualizations...")
    
    # Sentiment distribution
    distribution = mood_analyzer.calculate_sentiment_distribution(tweets_df, 'vader_sentiment')
    print("\nSentiment Distribution:")
    print(distribution)
    
    # Create plots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Pie chart
    distribution.plot.pie(ax=axes[0,0], autopct='%1.1f%%')
    axes[0,0].set_title('Sentiment Distribution')
    
    # Bar chart by keyword
    if 'keyword' in tweets_df.columns:
        keyword_sentiment = tweets_df.groupby(['keyword', 'vader_sentiment']).size().unstack(fill_value=0)
        keyword_sentiment.plot.bar(ax=axes[0,1])
        axes[0,1].set_title('Sentiment by Keyword')
        axes[0,1].tick_params(axis='x', rotation=45)
    
    # Method comparison
    sentiment_cols = [col for col in tweets_df.columns if 'sentiment' in col and col != 'vader_sentiment']
    if sentiment_cols:
        comparison_data = []
        for col in ['vader_sentiment'] + sentiment_cols:
            counts = tweets_df[col].value_counts()
            comparison_data.append(counts)
        
        comparison_df = pd.concat(comparison_data, axis=1, keys=['vader_sentiment'] + sentiment_cols)
        comparison_df.plot.bar(ax=axes[1,0])
        axes[1,0].set_title('Method Comparison')
        axes[1,0].tick_params(axis='x', rotation=45)
    
    # Word frequency
    word_freq = preprocessor.get_word_frequency(tweets_df)
    word_freq.head(20).plot.bar(ax=axes[1,1])
    axes[1,1].set_title('Top 20 Words')
    axes[1,1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    # Generate report
    report = mood_analyzer.generate_summary_report(tweets_df, 'vader_sentiment')
    print("\nüìä Summary Report:")
    print(f"Total tweets: {report['basic_metrics']['total_tweets']}")
    print(f"Dominant sentiment: {report['basic_metrics']['dominant_sentiment']}")
    print(f"Sentiment percentages: {report['basic_metrics']['sentiment_percentages']}")
else:
    print("No sentiment data available for visualization")
