<a href="https://colab.research.google.com/github/Rishpraveen/Tamil-English-Sentiment-Analysis-using-Llama-3-Zero-Shot-/blob/main/Tamil_yt_comment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


Tamil YouTube Comment Sentiment Analysis - Fixed Version

### Cell 1: Install Required Libraries

In [None]:
!pip install --upgrade transformers accelerate torch bitsandbytes flash-attn --no-build-isolation
!pip install google-api-python-client pandas matplotlib seaborn wordcloud ipywidgets
!pip install huggingface_hub
!pip install torchvision==0.18.1  # Explicitly install a compatible torchvision version

### Cell 2: Import Librariesy

In [None]:
!pip install flash-attn --no-build-isolation

In [None]:
import os
import re
import time
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from wordcloud import WordCloud
import warnings
warnings.filterwarnings('ignore')

# Transformers and PyTorch
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    BitsAndBytesConfig
)

# YouTube API
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from urllib.parse import urlparse, parse_qs

# Hugging Face Hub & Colab
from huggingface_hub import login
from google.colab import userdata

# Interactive Dashboard
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output


### Cell 3: Configuration and Setup

In [None]:
class Config:
    """Configuration class for the sentiment analysis project."""
    MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
    MAX_NEW_TOKENS = 5
    TEMPERATURE = 0.1
    TOP_P = 0.5
    MAX_COMMENTS_TO_FETCH = 100
    COMMENTS_PER_PAGE = 50
    SENTIMENT_LABELS = {'Positive', 'Negative', 'Neutral'}
    SENTIMENT_COLORS = {
        "Positive": "#4CAF50", "Negative": "#F44336", "Neutral": "#2196F3",
        "Unknown": "#9E9E9E", "Error": "#FFC107"
    }

def check_gpu():
    """Check GPU availability and specifications."""
    if torch.cuda.is_available():
        print(f"✅ GPU Available: {torch.cuda.get_device_name(0)}")
        return torch.device("cuda")
    else:
        print("⚠️ GPU not available, using CPU.")
        return torch.device("cpu")

device = check_gpu()

### Cell 4: Authentication Setup

In [None]:
def setup_authentication():
    """Setup Hugging Face and get YouTube API key."""
    try:
        hf_token = userdata.get('HF_TOKEN')
        print("🔑 Authenticating with Hugging Face...")
        login(token=hf_token)
        print("✅ Hugging Face login successful.")
    except Exception as e:
        print(f"❌ Hugging Face login failed: {e}")

    try:
        api_key = userdata.get('YOUTUBE_API_KEY')
        if not api_key:
            raise ValueError("YOUTUBE_API_KEY not found in Colab secrets.")
        print("✅ YouTube API Key loaded successfully.")
        return api_key
    except Exception as e:
        print(f"❌ Error retrieving YouTube API key: {e}")
        return None

setup_authentication()


### Cell 5: Model Loading and Setup

In [None]:
class SentimentAnalyzer:
    """Main sentiment analysis class using Llama 3 with robust fallbacks."""

    def __init__(self, model_id=Config.MODEL_ID):
        self.model_id = model_id
        self.tokenizer = None
        self.model = None
        self.text_generator = None
        self._load_model()

    def _load_model(self):
        """Load the Llama 3 model with optimizations."""
        try:
            print(f"🔄 Loading model: {self.model_id}")
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.bfloat16
            )

            flash_attn_available = 'flash_attn' in globals()
            attn_implementation = "flash_attention_2" if flash_attn_available and torch.cuda.is_available() else "sdpa"
            if flash_attn_available:
                 print("⚡ Flash Attention 2 detected and will be used.")

            self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token

            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_id,
                quantization_config=quantization_config,
                device_map="auto",
                torch_dtype=torch.bfloat16,
                attn_implementation=attn_implementation
            )
            self.text_generator = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto"
            )
            print("✅ Model and pipeline loaded successfully!")
        except Exception as e:
            print(f"❌ Error loading model: {e}")
            raise e # Re-raise the exception to stop execution and show the root cause

    def _analyze_sentiment_fallback(self, text):
        """Robust keyword-based fallback for sentiment analysis."""
        text_lower = text.lower()

        positive_keywords = [
            'நன்றாக', 'அற்புதம்', 'அருமை', 'சூப்பர்', 'கலக்கல்', 'பிரமாதம்', 'சந்தோஷம்',
            'பிடித்திருக்கிறது', 'சிறந்த', 'வெற்றி', 'good', 'great', 'excellent',
            'amazing', 'super', 'awesome', 'love', 'fantastic', 'best', 'nice', 'wow'
        ]
        negative_keywords = [
            'மோசம்', 'சரியில்லை', 'சுமார்', 'கேவலம்', 'குப்பை', 'வேஸ்ட்', 'நேரம் வீண்',
            'ஏமாற்றம்', 'சோகம்', 'கோபம்', 'bad', 'terrible', 'awful', 'waste',
            'disappointing', 'hate', 'worst', 'boring', 'poor', 'sad'
        ]

        pos_score = sum(text_lower.count(word) for word in positive_keywords)
        neg_score = sum(text_lower.count(word) for word in negative_keywords)

        pos_score += text.count('👍') + text.count('❤️') + text.count('😍') + text.count('🔥')
        neg_score += text.count('👎') + text.count('😠') + text.count('😡')

        if pos_score > neg_score: return "Positive"
        if neg_score > pos_score: return "Negative"
        return "Neutral"

    def classify_sentiment(self, text):
        """Classify sentiment using Llama 3, with a fallback mechanism."""
        if not self.text_generator or not text or not text.strip():
            return "Neutral"

        messages = [{
            "role": "system",
            "content": """You are an expert at analyzing Tamil and Tamil-English (Tanglish) YouTube comments.
Classify the sentiment as exactly one word: Positive, Negative, or Neutral.
- YOUTUBE CONTEXT: Comments are informal, use mixed language, emojis (👍❤️🔥=positive, 👎😠=negative), and ALL CAPS.
- TAMIL/TANGLISH PATTERNS: "Super", "Arumai", "Nalla iruku" = Positive. "Mosam", "Waste", "Sariyilla" = Negative. "Paravalla", "Okay" = Neutral.
- YOUR TASK: Focus on the overall emotional tone. Respond with ONLY ONE WORD: Positive, Negative, or Neutral."""
        }, {
            "role": "user", "content": f"Sentiment of this comment: \"{text}\""
        }]

        try:
            outputs = self.text_generator(
                messages,
                max_new_tokens=Config.MAX_NEW_TOKENS,
                do_sample=False,
                temperature=Config.TEMPERATURE,
                top_p=Config.TOP_P,
                pad_token_id=self.tokenizer.eos_token_id
            )
            response = outputs[0]['generated_text'][-1]['content'].strip().capitalize()

            if response in Config.SENTIMENT_LABELS:
                return response
            else:
                # If the response is not clean, parse it aggressively
                response_lower = response.lower()
                if 'positive' in response_lower: return "Positive"
                if 'negative' in response_lower: return "Negative"
                if 'neutral' in response_lower: return "Neutral"
                return self._analyze_sentiment_fallback(text)
        except Exception:
            return self._analyze_sentiment_fallback(text)

# Initialize analyzer globally to be used by the dashboard
analyzer = SentimentAnalyzer()

### Cell 6: Test the Model

In [None]:
def test_model():
    """Test the sentiment analysis model with sample texts"""

    test_texts = [
        "இந்த படம் மிகவும் அருமையாக இருந்தது!",  # Positive
        "சேவை மிகவும் மோசம், நான் திருப்தி அடையவில்லை.",  # Negative
        "Super padam! Vera level acting.",  # Code-mixed Positive
        "Waste of time and money. Highly disappointing.",  # Negative
        "அவன் ஒரு சரியான முட்டாள்.",  # Negative
        "வானிலை இன்று சாதாரணமாக உள்ளது.",  # Neutral
        "enna service idhu? very bad experience da.",  # Code-mixed Negative
        "Decent attempt, but could be better.",  # Neutral
        "நல்ல முயற்சி, ஆனால் இன்னும் நன்றாக இருக்கலாம்.",  # Neutral
        "Absolutely fantastic! Best movie ever!"  # Positive
    ]

    print("🧪 Testing Sentiment Analysis Model")
    print("=" * 50)

    results = []
    for i, text in enumerate(test_texts, 1):
        print(f"\n{i}. Text: \"{text}\"")
        sentiment = analyzer.classify_sentiment(text)
        print(f"   Sentiment: {sentiment}")
        results.append({"Text": text, "Sentiment": sentiment})

    return pd.DataFrame(results)

# Run tests
test_results = test_model()
print("\n📊 Test Results Summary:")
print(test_results['Sentiment'].value_counts())

### Cell 7: YouTube Comment Extraction

In [None]:
class YouTubeCommentExtractor:
    """Extracts comments and video info from YouTube."""

    def __init__(self, api_key):
        self.api_key = api_key
        if not api_key:
            raise ValueError("API Key is missing.")
        self.youtube = build('youtube', 'v3', developerKey=api_key)

    def extract_video_id(self, url):
        """Extracts video ID from a YouTube URL."""
        parsed_url = urlparse(url)
        if parsed_url.hostname == 'youtu.be':
            return parsed_url.path[1:]
        if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
            if parsed_url.path == '/watch':
                return parse_qs(parsed_url.query).get('v', [None])[0]
        return None

    def get_video_info(self, video_id):
        """Fetches video title, channel, and stats."""
        try:
            request = self.youtube.videos().list(part="snippet,statistics", id=video_id)
            response = request.execute()
            if not response["items"]: return None
            video = response["items"][0]
            stats = video.get("statistics", {})
            return {
                "title": video["snippet"]["title"],
                "channel": video["snippet"]["channelTitle"],
                "view_count": f'{int(stats.get("viewCount", 0)):,}',
                "comment_count": f'{int(stats.get("commentCount", 0)):,}'
            }
        except Exception as e:
            print(f"⚠️ Could not fetch video info: {e}")
            return None

    def fetch_comments(self, video_id, max_comments):
        """Generator function to fetch comments page by page."""
        next_page_token = None
        comments_fetched = 0
        while comments_fetched < max_comments:
            try:
                request = self.youtube.commentThreads().list(
                    part="snippet",
                    videoId=video_id,
                    maxResults=min(Config.COMMENTS_PER_PAGE, max_comments - comments_fetched),
                    textFormat="plainText",
                    pageToken=next_page_token
                )
                response = request.execute()

                for item in response.get("items", []):
                    if comments_fetched >= max_comments: break
                    comment_data = item["snippet"]["topLevelComment"]["snippet"]
                    comment_text = re.sub(r'\s+', ' ', comment_data["textDisplay"]).strip()
                    if comment_text:
                        yield comment_text
                        comments_fetched += 1

                next_page_token = response.get("nextPageToken")
                if not next_page_token: break
                time.sleep(0.2)
            except HttpError as e:
                raise e # Re-raise to be caught by the main analysis loop
            except Exception as e:
                print(f"An unexpected error occurred during comment fetching: {e}")
                break

### Cell 8: Sentiment Analysis Pipeline

In [None]:
def analyze_youtube_video(video_url, max_comments=100):
    """Complete pipeline to analyze YouTube video sentiment"""

    # Get API key
    api_key = get_youtube_api_key()
    if not api_key:
        print("❌ YouTube API key not found")
        return None

    # Initialize extractor
    extractor = YouTubeCommentExtractor(api_key)

    # Extract video ID
    video_id = extractor.extract_video_id(video_url)
    if not video_id:
        print("❌ Could not extract video ID")
        return None

    # Get video info
    video_info = extractor.get_video_info(video_id)
    if video_info:
        print(f"📺 Video: {video_info['title']}")
        print(f"📺 Channel: {video_info['channel']}")
        print(f"👁️  Views: {video_info['view_count']}")
        print(f"💬 Comments: {video_info['comment_count']}")

    # Extract comments
    comments = extractor.extract_comments(video_id, max_comments)

    if not comments:
        print("❌ No comments found")
        return None

    # Analyze sentiment
    print(f"\n🔄 Analyzing sentiment for {len(comments)} comments...")

    results = []
    for i, comment in enumerate(comments, 1):
        print(f"Analyzing comment {i}/{len(comments)}: {comment['comment'][:50]}...")

        sentiment = analyzer.classify_sentiment(comment['comment'])

        results.append({
            'Author': comment['author'],
            'Comment': comment['comment'],
            'Published_At': comment['published_at'],
            'Likes': comment['like_count'],
            'Replies': comment['reply_count'],
            'Sentiment': sentiment,
            'Comment_Length': len(comment['comment'])
        })

    df = pd.DataFrame(results)
    return df, video_info

### Cell 9: Visualization Functions

In [None]:
class SentimentVisualizer:
    """Visualization class for sentiment analysis results"""

    def __init__(self, df):
        self.df = df
        self.colors = Config.COLORS

    def plot_sentiment_distribution(self):
        """Plot sentiment distribution"""
        plt.figure(figsize=Config.FIGSIZE)

        sentiment_counts = self.df['Sentiment'].value_counts()

        plt.subplot(2, 2, 1)
        sentiment_counts.plot(kind='bar', color=self.colors)
        plt.title('Sentiment Distribution')
        plt.xlabel('Sentiment')
        plt.ylabel('Count')
        plt.xticks(rotation=45)

        plt.subplot(2, 2, 2)
        plt.pie(sentiment_counts.values, labels=sentiment_counts.index,
                autopct='%1.1f%%', colors=self.colors)
        plt.title('Sentiment Distribution (Pie Chart)')

        plt.tight_layout()
        plt.show()

    def plot_sentiment_over_time(self):
        """Plot sentiment over time"""
        plt.figure(figsize=Config.FIGSIZE)

        # Convert timestamp to datetime
        self.df['Published_At'] = pd.to_datetime(self.df['Published_At'])

        # Group by date and sentiment
        daily_sentiment = self.df.groupby([
            self.df['Published_At'].dt.date, 'Sentiment'
        ]).size().unstack(fill_value=0)

        daily_sentiment.plot(kind='line', marker='o', figsize=(12, 6))
        plt.title('Sentiment Trends Over Time')
        plt.xlabel('Date')
        plt.ylabel('Number of Comments')
        plt.legend(title='Sentiment')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

    def plot_engagement_analysis(self):
        """Plot engagement analysis"""
        plt.figure(figsize=Config.FIGSIZE)

        plt.subplot(2, 2, 1)
        self.df.boxplot(column='Likes', by='Sentiment', ax=plt.gca())
        plt.title('Likes by Sentiment')
        plt.suptitle('')

        plt.subplot(2, 2, 2)
        self.df.boxplot(column='Comment_Length', by='Sentiment', ax=plt.gca())
        plt.title('Comment Length by Sentiment')
        plt.suptitle('')

        plt.subplot(2, 2, 3)
        avg_likes = self.df.groupby('Sentiment')['Likes'].mean()
        avg_likes.plot(kind='bar', color=self.colors)
        plt.title('Average Likes by Sentiment')
        plt.xticks(rotation=45)

        plt.subplot(2, 2, 4)
        avg_length = self.df.groupby('Sentiment')['Comment_Length'].mean()
        avg_length.plot(kind='bar', color=self.colors)
        plt.title('Average Comment Length by Sentiment')
        plt.xticks(rotation=45)

        plt.tight_layout()
        plt.show()

    def generate_word_cloud(self):
        """Generate word clouds for each sentiment"""
        sentiments = self.df['Sentiment'].unique()

        fig, axes = plt.subplots(1, len(sentiments), figsize=(15, 5))
        if len(sentiments) == 1:
            axes = [axes]

        for i, sentiment in enumerate(sentiments):
            comments = self.df[self.df['Sentiment'] == sentiment]['Comment']
            text = ' '.join(comments)

            wordcloud = WordCloud(width=400, height=200,
                                background_color='white',
                                colormap='viridis').generate(text)

            axes[i].imshow(wordcloud, interpolation='bilinear')
            axes[i].set_title(f'{sentiment} Comments')
            axes[i].axis('off')

        plt.tight_layout()
        plt.show()

    def generate_summary_report(self, video_info=None):
        """Generate a summary report"""
        total_comments = len(self.df)
        sentiment_counts = self.df['Sentiment'].value_counts()

        print("📊 SENTIMENT ANALYSIS REPORT")
        print("=" * 50)

        if video_info:
            print(f"📺 Video: {video_info['title']}")
            print(f"📺 Channel: {video_info['channel']}")
            print(f"👁️  Views: {video_info['view_count']}")
            print()

        print(f"💬 Total Comments Analyzed: {total_comments}")
        print(f"📅 Analysis Date: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print()

        print("📈 Sentiment Distribution:")
        for sentiment, count in sentiment_counts.items():
            percentage = (count / total_comments) * 100
            print(f"   {sentiment}: {count} ({percentage:.1f}%)")

        print()
        print("🔍 Key Insights:")

        # Most liked comment by sentiment
        for sentiment in sentiment_counts.index:
            most_liked = self.df[self.df['Sentiment'] == sentiment].nlargest(1, 'Likes')
            if not most_liked.empty:
                print(f"   Most liked {sentiment.lower()} comment ({most_liked.iloc[0]['Likes']} likes):")
                print(f"   \"{most_liked.iloc[0]['Comment'][:100]}...\"")

        # Average engagement
        avg_likes = self.df.groupby('Sentiment')['Likes'].mean()
        print(f"\n📊 Average Likes by Sentiment:")
        for sentiment, avg in avg_likes.items():
            print(f"   {sentiment}: {avg:.1f} likes")

In [None]:
### Cell 10: Main Execution

In [None]:
def main():
    """Main execution function"""

    # Example YouTube video URL (replace with your desired video)
    video_url = input("Enter YouTube video URL: ")
    max_comments = int(input("Enter maximum number of comments to analyze (default 100): ") or 100)

    print(f"\n🚀 Starting sentiment analysis for: {video_url}")
    print(f"📊 Maximum comments to analyze: {max_comments}")

    # Analyze video
    result = analyze_youtube_video(video_url, max_comments)

    if result is None:
        print("❌ Analysis failed")
        return

    df, video_info = result

    # Create visualizer
    visualizer = SentimentVisualizer(df)

    # Generate visualizations
    print("\n📊 Generating visualizations...")
    visualizer.plot_sentiment_distribution()
    visualizer.plot_engagement_analysis()
    visualizer.generate_word_cloud()

    # Generate summary report
    visualizer.generate_summary_report(video_info)

    # Save results
    output_file = f"sentiment_analysis_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(output_file, index=False)
    print(f"\n💾 Results saved to: {output_file}")

    return df, video_info

### Cell 11: Interactive Analysis

In [None]:
def run_interactive_dashboard():
    """Launches the full interactive sentiment analysis dashboard."""

    # --- Widget Creation ---
    url_input = widgets.Text(
        value='https://www.youtube.com/watch?v=R4aT6v_L-3E', # Example Tamil video
        description='Video URL:',
        layout=widgets.Layout(width='600px'),
        style={'description_width': '100px'}
    )
    max_comments_slider = widgets.IntSlider(
        value=50, min=10, max=500, step=10, description='Max Comments:',
        layout=widgets.Layout(width='400px'), style={'description_width': '100px'}
    )
    analyze_button = widgets.Button(description='Start Analysis', button_style='success', icon='play')
    stop_button = widgets.Button(description='Stop Analysis', button_style='danger', icon='stop')
    clear_button = widgets.Button(description='Clear', button_style='info', icon='refresh')
    output_area = widgets.Output()

    # Global state for analysis thread
    analysis_globals = {'is_running': False}

    def start_analysis_clicked(b):
        """Event handler for the start button."""
        if analysis_globals['is_running']:
            with output_area:
                print("Analysis is already in progress.")
            return

        analysis_globals['is_running'] = True
        output_area.clear_output()

        with output_area:
            run_analysis_logic(
                url_input.value,
                max_comments_slider.value,
                analysis_globals
            )

    def stop_analysis_clicked(b):
        """Event handler for the stop button."""
        if analysis_globals['is_running']:
            analysis_globals['is_running'] = False
            with output_area:
                display(HTML("<div style='color: #e65100; font-weight: bold;'>🛑 Analysis stopping...</div>"))

    def clear_clicked(b):
        """Clears the output area."""
        output_area.clear_output()
        analysis_globals['is_running'] = False

    analyze_button.on_click(start_analysis_clicked)
    stop_button.on_click(stop_analysis_clicked)
    clear_button.on_click(clear_clicked)

    # --- UI Display ---
    intro_html = """
    <div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; border-radius: 15px; text-align: center;'>
        <h1 style='font-size: 28px; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);'>🎬 Tamil YouTube Sentiment Analyzer</h1>
        <p style='font-size: 16px; opacity: 0.9;'>Enter a video URL to discover the public sentiment!</p>
    </div>
    """
    display(HTML(intro_html))
    display(widgets.VBox([
        url_input,
        max_comments_slider,
        widgets.HBox([analyze_button, stop_button, clear_button]),
        output_area
    ]))


def run_analysis_logic(video_url, max_comments, analysis_globals):
    """The core logic that fetches, analyzes, and displays results."""

    api_key = userdata.get('YOUTUBE_API_KEY')
    if not api_key:
        display(HTML("<div style='color: red; font-weight: bold;'>Error: YouTube API Key is not configured.</div>"))
        analysis_globals['is_running'] = False
        return

    try:
        extractor = YouTubeCommentExtractor(api_key)
        video_id = extractor.extract_video_id(video_url)
        if not video_id:
            display(HTML(f"<div style='color: red; font-weight: bold;'>Error: Could not extract video ID from '{video_url}'.</div>"))
            analysis_globals['is_running'] = False
            return

        video_info = extractor.get_video_info(video_id)
        if video_info:
            info_html = f"""<div style='background: #f0f4f8; border-left: 5px solid #1976d2; padding: 15px; margin: 10px 0; border-radius: 5px;'>
                <p><strong>Video:</strong> {video_info['title']}</p>
                <p><strong>Channel:</strong> {video_info['channel']} | <strong>Views:</strong> {video_info['view_count']}</p>
            </div>"""
            display(HTML(info_html))

        results = []
        start_time = time.time()

        display(HTML(f"<h4>🔄 Analyzing up to {max_comments} comments...</h4>"))

        for i, comment_text in enumerate(extractor.fetch_comments(video_id, max_comments)):
            if not analysis_globals['is_running']:
                display(HTML("<h4>Analysis stopped by user.</h4>"))
                break

            sentiment = analyzer.classify_sentiment(comment_text)
            results.append({'Comment': comment_text, 'Sentiment': sentiment})

            # Real-time update
            color = Config.SENTIMENT_COLORS.get(sentiment, '#9E9E9E')
            display_text = comment_text[:120] + '...' if len(comment_text) > 120 else comment_text
            progress_html = f"""<div style='border-left: 4px solid {color}; padding: 8px; margin: 4px 0; background: #fafafa;'>
                <span style='color: {color}; font-weight: bold;'>{sentiment}</span>: <em>"{display_text}"</em>
            </div>"""
            display(HTML(progress_html))

        total_time = time.time() - start_time
        display_final_results(results, video_info, total_time)

    except HttpError as e:
        error_content = e.content.decode("utf-8")
        error_details = json.loads(error_content).get("error", {})
        error_html = f"""<div style='color: red; border: 1px solid red; padding: 10px; margin: 10px 0;'>
            <strong>YouTube API Error ({e.resp.status}):</strong> {error_details.get('message', 'Unknown error.')}
            <p>This often means your daily API quota is exceeded or the API key is invalid/restricted.</p>
        </div>"""
        display(HTML(error_html))
    except Exception as e:
        display(HTML(f"<div style='color: red; font-weight: bold;'>An unexpected error occurred: {e}</div>"))
    finally:
        analysis_globals['is_running'] = False


def display_final_results(results, video_info, total_time):
    """Renders the final dashboard with charts and summaries."""
    if not results:
        display(HTML("<h4>No comments were analyzed.</h4>"))
        return

    df = pd.DataFrame(results)
    sentiment_counts = df['Sentiment'].value_counts()

    # --- Visualization ---
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    fig.suptitle('Sentiment Analysis Dashboard', fontsize=20, fontweight='bold')

    # Pie Chart
    colors = [Config.SENTIMENT_COLORS.get(s, '#9E9E9E') for s in sentiment_counts.index]
    ax1.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%',
            startangle=90, colors=colors, wedgeprops={'edgecolor': 'white', 'linewidth': 2})
    ax1.set_title('Sentiment Distribution', fontsize=16)

    # Bar Chart
    sns.barplot(x=sentiment_counts.index, y=sentiment_counts.values, palette=colors, ax=ax2, hue=sentiment_counts.index, legend=False)
    ax2.set_title('Sentiment Counts', fontsize=16)
    ax2.set_ylabel('Number of Comments')
    ax2.set_xlabel('Sentiment')
    for container in ax2.containers:
        ax2.bar_label(container)

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

    # --- HTML Summary ---
    total_comments = len(df)
    summary_html = f"""
    <div style='background: #e3f2fd; border: 1px solid #90caf9; padding: 20px; margin-top: 20px; border-radius: 10px;'>
        <h3>📊 Analysis Summary</h3>
        <p><strong>Total Comments Analyzed:</strong> {total_comments}</p>
        <p><strong>Total Time Taken:</strong> {total_time:.2f} seconds</p>
        <p><strong>Average Time per Comment:</strong> {total_time/total_comments:.2f} seconds</p>
    </div>
    <div style='margin-top:20px'>
        <h3>💬 Comment Examples</h3>
    """

    for sentiment in ['Positive', 'Negative', 'Neutral']:
        examples = df[df['Sentiment'] == sentiment].head(3)
        if not examples.empty:
            color = Config.SENTIMENT_COLORS[sentiment]
            summary_html += f"<h4 style='color:{color};'>{sentiment} Comments</h4>"
            for _, row in examples.iterrows():
                summary_html += f"<div style='border-left: 3px solid {color}; padding-left: 10px; margin-bottom: 8px;'><em>{row['Comment']}</em></div>"

    summary_html += "</div>"
    display(HTML(summary_html))

    # Save results to a file
    output_file = f"sentiment_analysis_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(output_file, index=False)
    display(HTML(f"<p>💾 Results saved to <strong>{output_file}</strong></p>"))


# --- Main Execution ---
run_interactive_dashboard()

### Cell 12: Batch Processing Functions

In [None]:
def batch_analyze_videos(video_urls, max_comments_per_video=50):
    """Batch analyze multiple videos"""

    all_results = []

    for i, url in enumerate(video_urls, 1):
        print(f"\n🔄 Processing video {i}/{len(video_urls)}: {url}")

        result = analyze_youtube_video(url, max_comments_per_video)
        if result:
            df, video_info = result
            df['Video_URL'] = url
            df['Video_Title'] = video_info['title'] if video_info else 'Unknown'
            df['Video_Channel'] = video_info['channel'] if video_info else 'Unknown'
            all_results.append(df)

        time.sleep(2)  # Rate limiting

    if all_results:
        combined_df = pd.concat(all_results, ignore_index=True)

        print(f"\n📊 Batch Analysis Complete!")
        print(f"Total videos analyzed: {len(video_urls)}")
        print(f"Total comments analyzed: {len(combined_df)}")

        # Save combined results
        output_file = f"batch_sentiment_analysis_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
        combined_df.to_csv(output_file, index=False)
        print(f"💾 Results saved to: {output_file}")

        # Generate summary
        print("\n📈 Overall Sentiment Distribution:")
        print(combined_df['Sentiment'].value_counts())

        # Per-video summary
        print("\n📺 Per-Video Summary:")
        video_summary = combined_df.groupby('Video_Title')['Sentiment'].value_counts().unstack(fill_value=0)
        print(video_summary)

        return combined_df

    return None

# Example usage:
# video_urls = [
#     "https://youtu.be/VIDEO_ID_1",
#     "https://youtu.be/VIDEO_ID_2",
#     "https://youtu.be/VIDEO_ID_3"
# ]
# batch_results = batch_analyze_videos(video_urls)

print("✅ Tamil-English Sentiment Analysis Tool Ready!")
print("📋 Available functions:")
print("   - main(): Run complete analysis")
print("   - interactive_analysis(): Interactive mode")
print("   - batch_analyze_videos(): Batch processing")
print("   - test_model(): Test with sample texts")