In [None]:
import os
import sys
from youtube_transcript_api import YouTubeTranscriptApi
from googletrans import Translator
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from transformers import pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import googleapiclient.discovery
import pandas as pd

nltk.download("vader_lexicon")
translator = Translator()
sia = SentimentIntensityAnalyzer()

output_file = "transcript_results.txt"
metadata_file = "video_metadata.csv"

# YouTube Data API setup
# Replace with your actual API key
DEVELOPER_KEY = ""
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"

def get_youtube_service():
    """Initialize YouTube API client"""
    try:
        youtube = googleapiclient.discovery.build(
            YOUTUBE_API_SERVICE_NAME, 
            YOUTUBE_API_VERSION, 
            developerKey=DEVELOPER_KEY
        )
        return youtube
    except Exception as e:
        print(f"Error initializing YouTube API: {e}")
        sys.exit(1)

def fetch_video_metadata(video_ids):
    """
    Retrieve detailed metadata for given YouTube video IDs
    
    Args:
        video_ids (list): List of YouTube video IDs
    
    Returns:
        dict: Dictionary of video metadata
    """
    youtube = get_youtube_service()
    video_metadata = {}

    try:
        # Retrieve video details
        request = youtube.videos().list(
            part="snippet,statistics,contentDetails,status",
            id=",".join(video_ids)
        )
        response = request.execute()

        for video in response.get('items', []):
            video_id = video['id']
            metadata = {
                'title': video['snippet'].get('title', 'N/A'),
                'channel_title': video['snippet'].get('channelTitle', 'N/A'),
                'published_at': video['snippet'].get('publishedAt', 'N/A'),
                'description': video['snippet'].get('description', 'N/A'),
                'tags': video['snippet'].get('tags', []),
                'view_count': int(video['statistics'].get('viewCount', 0)),
                'like_count': int(video['statistics'].get('likeCount', 0)),
                'comment_count': int(video['statistics'].get('commentCount', 0)),
                'duration': video['contentDetails'].get('duration', 'N/A'),
                'privacy_status': video['status'].get('privacyStatus', 'N/A')
            }
            video_metadata[video_id] = metadata

        # Fetch and add comments (limited to most recent comments)
        comments_metadata = fetch_video_comments(youtube, video_ids)
        for video_id, comments in comments_metadata.items():
            video_metadata[video_id]['comments'] = comments

    except Exception as e:
        print(f"Error fetching video metadata: {e}")

    return video_metadata

def fetch_video_comments(youtube, video_ids, max_comments_per_video=10):
    """
    Retrieve comments for given video IDs
    
    Args:
        youtube (obj): YouTube API service object
        video_ids (list): List of YouTube video IDs
        max_comments_per_video (int): Maximum number of comments to retrieve per video
    
    Returns:
        dict: Dictionary of comments for each video
    """
    comments_metadata = {}

    for video_id in video_ids:
        try:
            comments_request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=max_comments_per_video,
                order="relevance"
            )
            comments_response = comments_request.execute()

            comments = [
                {
                    'text': comment['snippet']['topLevelComment']['snippet']['textDisplay'],
                    'author': comment['snippet']['topLevelComment']['snippet']['authorDisplayName'],
                    'like_count': comment['snippet']['topLevelComment']['snippet'].get('likeCount', 0),
                    'published_at': comment['snippet']['topLevelComment']['snippet']['publishedAt']
                }
                for comment in comments_response.get('items', [])
            ]
            comments_metadata[video_id] = comments

        except Exception as e:
            print(f"Error fetching comments for video {video_id}: {e}")
            comments_metadata[video_id] = []

    return comments_metadata

def save_metadata_to_file(video_metadata):
    """
    Save video metadata to a CSV file
    
    Args:
        video_metadata (dict): Dictionary of video metadata
    """
    # Prepare data for DataFrame
    metadata_list = []
    for video_id, metadata in video_metadata.items():
        row = {
            'video_id': video_id,
            **{k: v for k, v in metadata.items() if k not in ['comments', 'tags']}
        }
        metadata_list.append(row)

    # Create DataFrame
    df = pd.DataFrame(metadata_list)
    
    # Save to CSV
    df.to_csv(metadata_file, index=False, encoding='utf-8')
    print(f"Metadata saved to {metadata_file}")

def analyze_video_metrics(video_metadata):
    """
    Provide basic analysis of video metrics
    
    Args:
        video_metadata (dict): Dictionary of video metadata
    """
    print("\nVideo Metrics Analysis:")
    
    # Aggregate metrics
    total_views = sum(metadata['view_count'] for metadata in video_metadata.values())
    total_likes = sum(metadata['like_count'] for metadata in video_metadata.values())
    total_comments = sum(metadata['comment_count'] for metadata in video_metadata.values())
    
    # Calculate averages
    video_count = len(video_metadata)
    avg_views = total_views / video_count
    avg_likes = total_likes / video_count
    avg_comments = total_comments / video_count
    
    print(f"Total Videos: {video_count}")
    print(f"Total Views: {total_views:,}")
    print(f"Total Likes: {total_likes:,}")
    print(f"Total Comments: {total_comments:,}")
    print(f"\nAverage Views per Video: {avg_views:,.2f}")
    print(f"Average Likes per Video: {avg_likes:,.2f}")
    print(f"Average Comments per Video: {avg_comments:,.2f}")

def fetch_and_translate_transcripts(video_ids, target_language):
    transcripts = {}
    for video_id in video_ids:
        try:
            # Fetch transcript
            available_transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
            transcript = None
            if target_language in [t.language_code for t in available_transcripts]:
                transcript = available_transcripts.find_transcript([target_language])
            elif "fr" in [t.language_code for t in available_transcripts]:
                transcript = available_transcripts.find_generated_transcript(["fr"])
            else:
                print(f"No suitable transcript available for video ID: {video_id}")
                continue
            # Fetch and translate
            data = transcript.fetch()
            translated_data = [
                {
                    "time": f"{entry['start']:.2f}",
                    "original": entry["text"],
                    "translated": translator.translate(entry["text"], src="fr", dest=target_language).text
                }
                for entry in data
            ]
            transcripts[video_id] = translated_data
        except Exception as e:
            print(f"Error processing video ID {video_id}: {e}")
    return transcripts

def save_transcripts_to_file(transcripts):
    with open(output_file, "w", encoding="utf-8") as f:
        for video_id, data in transcripts.items():
            f.write(f"Video ID: {video_id}\n")
            f.write(f"{'Time (s)':<10} | {'Original':<40} | {'Translated'}\n")
            f.write("-" * 80 + "\n")
            for entry in data:
                f.write(f"{entry['time']:<10} | {entry['original']:<40} | {entry['translated']}\n")
            f.write("\n" + "=" * 80 + "\n")

def analyze_sentiment(transcripts):
    sentiments = []
    for video_id, data in transcripts.items():
        for entry in data:
            score = sia.polarity_scores(entry["translated"])
            sentiments.append(score["compound"])
    print("Average Sentiment Score:", sum(sentiments) / len(sentiments) if sentiments else "No data available.")

def extract_keywords(transcripts):
    all_text = " ".join(entry["translated"] for data in transcripts.values() for entry in data)
    word_counts = Counter(all_text.split())
    print("Top 10 Keywords:")
    for word, count in word_counts.most_common(10):
        print(f"{word}: {count}")

def generate_wordcloud(transcripts):
    all_text = " ".join(entry["translated"] for data in transcripts.values() for entry in data)
    wordcloud = WordCloud(width=800, height=400, background_color="white").generate(all_text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.show()

def summarize_transcripts(transcripts):
    summarizer = pipeline("summarization")
    all_text = " ".join(entry["translated"] for data in transcripts.values() for entry in data)
    try:
        summary = summarizer(all_text, max_length=130, min_length=30, do_sample=False)
        print("Summary:\n", summary[0]['summary_text'])
    except Exception as e:
        print("Unable to summarize:", e)

def model_topics(transcripts, n_topics=3):
    all_text = [" ".join(entry["translated"] for entry in data) for data in transcripts.values()]
    vectorizer = CountVectorizer(stop_words="english")
    doc_term_matrix = vectorizer.fit_transform(all_text)
    lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
    lda.fit(doc_term_matrix)
    for i, topic in enumerate(lda.components_):
        print(f"Topic {i + 1}:")
        words = [vectorizer.get_feature_names_out()[index] for index in topic.argsort()[-10:]]
        print(" ".join(words))

def pacing_analysis(transcripts):
    for video_id, data in transcripts.items():
        times = [float(entry["time"]) for entry in data]
        intervals = [times[i + 1] - times[i] for i in range(len(times) - 1)]
        print(f"Video ID: {video_id}")
        if intervals:
            print(f"Average Interval: {sum(intervals) / len(intervals):.2f} seconds")
        else:
            print("No intervals available.")

def interactive_menu():
    video_ids = input("Enter YouTube video IDs separated by commas: ").strip().split(",")
    target_language = input("Enter the target language code (e.g., 'en' for English): ").strip()

    # Fetch transcripts
    transcripts = fetch_and_translate_transcripts(video_ids, target_language)
    save_transcripts_to_file(transcripts)

    # Fetch video metadata
    video_metadata = fetch_video_metadata(video_ids)
    save_metadata_to_file(video_metadata)

    while True:
        print("\nAnalysis Options:")
        print("1. Sentiment Analysis")
        print("2. Keyword Extraction")
        print("3. Generate WordCloud")
        print("4. Summarize Transcripts")
        print("5. Topic Modeling")
        print("6. Pacing Analysis")
        print("7. Video Metrics Analysis")
        print("8. Exit")
        choice = input("Select an option (1-8): ").strip()

        if choice == "1":
            analyze_sentiment(transcripts)
        elif choice == "2":
            extract_keywords(transcripts)
        elif choice == "3":
            generate_wordcloud(transcripts)
        elif choice == "4":
            summarize_transcripts(transcripts)
        elif choice == "5":
            n_topics = int(input("Enter the number of topics to model (default 3): ") or 3)
            model_topics(transcripts, n_topics)
        elif choice == "6":
            pacing_analysis(transcripts)
        elif choice == "7":
            analyze_video_metrics(video_metadata)
        elif choice == "8":
            print("Exiting...")
            break
        else:
            print("Invalid choice. Please try again.")
def advanced_metadata_analysis(video_metadata):
    """
    Perform advanced analysis on video metadata
    
    Args:
        video_metadata (dict): Dictionary of video metadata
    """
    print("\n--- Advanced Metadata Analysis ---")
    
    # Channel Analysis
    channel_counts = {}
    for metadata in video_metadata.values():
        channel = metadata['channel_title']
        channel_counts[channel] = channel_counts.get(channel, 0) + 1
    
    print("\nChannel Distribution:")
    for channel, count in sorted(channel_counts.items(), key=lambda x: x[1], reverse=True):
        print(f"{channel}: {count} video(s)")
    
    # Temporal Analysis
    import pandas as pd
    from datetime import datetime
    
    # Convert published dates to datetime
    published_dates = [
        datetime.fromisoformat(metadata['published_at'].replace('Z', '+00:00')) 
        for metadata in video_metadata.values() 
        if metadata['published_at'] != 'N/A'
    ]
    
    if published_dates:
        df_dates = pd.DataFrame({'published_at': published_dates})
        print("\nPublished Date Analysis:")
        print("Earliest Video:", df_dates['published_at'].min())
        print("Latest Video:", df_dates['published_at'].max())
        
        # Monthly distribution
        df_dates['month'] = df_dates['published_at'].dt.to_period('M')
        monthly_dist = df_dates['month'].value_counts().sort_index()
        print("\nMonthly Video Distribution:")
        for month, count in monthly_dist.items():
            print(f"{month}: {count} video(s)")
    
    # Engagement Analysis
    engagement_metrics = pd.DataFrame.from_records([
        {
            'video_id': video_id, 
            'title': metadata['title'], 
            'views': metadata['view_count'], 
            'likes': metadata['like_count'], 
            'comments': metadata['comment_count']
        } 
        for video_id, metadata in video_metadata.items()
    ])
    
    print("\nEngagement Metrics:")
    print("Top 5 Videos by Views:")
    print(engagement_metrics.nlargest(5, 'views')[['title', 'views']])
    
    print("\nTop 5 Videos by Likes:")
    print(engagement_metrics.nlargest(5, 'likes')[['title', 'likes']])
    
    print("\nTop 5 Videos by Comments:")
    print(engagement_metrics.nlargest(5, 'comments')[['title', 'comments']])
    
    # Engagement Ratio Analysis
    engagement_metrics['like_rate'] = engagement_metrics['likes'] / engagement_metrics['views'] * 100
    engagement_metrics['comment_rate'] = engagement_metrics['comments'] / engagement_metrics['views'] * 100
    
    print("\nAverage Engagement Rates:")
    print(f"Average Like Rate: {engagement_metrics['like_rate'].mean():.2f}%")
    print(f"Average Comment Rate: {engagement_metrics['comment_rate'].mean():.2f}%")
    
    # Tag Analysis
    all_tags = []
    for metadata in video_metadata.values():
        all_tags.extend(metadata.get('tags', []))
    
    tag_counts = Counter(all_tags)
    print("\nTop 10 Tags:")
    for tag, count in tag_counts.most_common(10):
        print(f"{tag}: {count}")
    
    # Privacy Status Distribution
    privacy_status = Counter(
        metadata['privacy_status'] for metadata in video_metadata.values()
    )
    print("\nPrivacy Status Distribution:")
    for status, count in privacy_status.items():
        print(f"{status}: {count} video(s)")

def comment_sentiment_analysis(video_metadata):
    """
    Perform sentiment analysis on video comments
    
    Args:
        video_metadata (dict): Dictionary of video metadata
    """
    print("\n--- Comment Sentiment Analysis ---")
    
    # Reuse existing sentiment analyzer
    try:
        sia = SentimentIntensityAnalyzer()
    except Exception as e:
        print(f"Error initializing sentiment analyzer: {e}")
        return
    
    all_comment_sentiments = []
    
    for video_id, metadata in video_metadata.items():
        comments = metadata.get('comments', [])
        if not comments:
            continue
        
        print(f"\nAnalysis for Video: {metadata['title']}")
        comment_sentiments = []
        
        for comment in comments:
            sentiment_score = sia.polarity_scores(comment['text'])
            comment_sentiments.append(sentiment_score['compound'])
        
        if comment_sentiments:
            avg_sentiment = sum(comment_sentiments) / len(comment_sentiments)
            all_comment_sentiments.extend(comment_sentiments)
            
            print(f"Number of comments analyzed: {len(comments)}")
            print(f"Average Comment Sentiment: {avg_sentiment:.2f}")
            print("Sentiment Categories:")
            print(f"Positive Comments: {sum(1 for s in comment_sentiments if s > 0)}")
            print(f"Neutral Comments: {sum(1 for s in comment_sentiments if s == 0)}")
            print(f"Negative Comments: {sum(1 for s in comment_sentiments if s < 0)}")
    
    if all_comment_sentiments:
        overall_avg_sentiment = sum(all_comment_sentiments) / len(all_comment_sentiments)
        print(f"\nOverall Average Sentiment Across All Videos: {overall_avg_sentiment:.2f}")

# Modify the existing interactive_menu function
def interactive_menu():
    video_ids = input("Enter YouTube video IDs separated by commas: ").strip().split(",")
    target_language = input("Enter the target language code (e.g., 'en' for English): ").strip()

    # Fetch transcripts
    transcripts = fetch_and_translate_transcripts(video_ids, target_language)
    save_transcripts_to_file(transcripts)

    # Fetch video metadata
    video_metadata = fetch_video_metadata(video_ids)
    save_metadata_to_file(video_metadata)

    while True:
        print("\nAnalysis Options:")
        print("1. Sentiment Analysis")
        print("2. Keyword Extraction")
        print("3. Generate WordCloud")
        print("4. Summarize Transcripts")
        print("5. Topic Modeling")
        print("6. Pacing Analysis")
        print("7. Basic Video Metrics Analysis")
        print("8. Advanced Metadata Analysis")
        print("9. Comment Sentiment Analysis")
        print("10. Exit")
        choice = input("Select an option (1-10): ").strip()

        if choice == "1":
            analyze_sentiment(transcripts)
        elif choice == "2":
            extract_keywords(transcripts)
        elif choice == "3":
            generate_wordcloud(transcripts)
        elif choice == "4":
            summarize_transcripts(transcripts)
        elif choice == "5":
            n_topics = int(input("Enter the number of topics to model (default 3): ") or 3)
            model_topics(transcripts, n_topics)
        elif choice == "6":
            pacing_analysis(transcripts)
        elif choice == "7":
            analyze_video_metrics(video_metadata)
        elif choice == "8":
            advanced_metadata_analysis(video_metadata)
        elif choice == "9":
            comment_sentiment_analysis(video_metadata)
        elif choice == "10":
            print("Exiting...")
            break
        else:
            print("Invalid choice. Please try again.")
import os
import sys
from youtube_transcript_api import YouTubeTranscriptApi
from googletrans import Translator
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from transformers import pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import googleapiclient.discovery
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download("vader_lexicon")
translator = Translator()
sia = SentimentIntensityAnalyzer()

output_file = "transcript_results.txt"
metadata_file = "video_metadata.csv"

# YouTube Data API setup
# Replace with your actual API key
DEVELOPER_KEY = "AIzaSyDoDfIPqJNhh2CvULX7FNqMmvjQv9G0qUo"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"

# [All previous functions remain the same until the end of the script]

def visualize_engagement_metrics(video_metadata):
    """
    Create visualizations for video engagement metrics
    
    Args:
        video_metadata (dict): Dictionary of video metadata
    """
    # Prepare data
    df = pd.DataFrame.from_records([
        {
            'video_id': video_id, 
            'title': metadata['title'], 
            'views': metadata['view_count'], 
            'likes': metadata['like_count'], 
            'comments': metadata['comment_count']
        } 
        for video_id, metadata in video_metadata.items()
    ])
    
    # Set up the plots
    plt.figure(figsize=(15, 10))
    
    # Views Comparison
    plt.subplot(2, 2, 1)
    df.sort_values('views', ascending=False).plot(
        x='title', 
        y='views', 
        kind='bar', 
        ax=plt.gca(), 
        rot=45, 
        title='Video Views Comparison'
    )
    plt.tight_layout()
    
    # Likes Comparison
    plt.subplot(2, 2, 2)
    df.sort_values('likes', ascending=False).plot(
        x='title', 
        y='likes', 
        kind='bar', 
        ax=plt.gca(), 
        rot=45, 
        title='Video Likes Comparison'
    )
    plt.tight_layout()
    
    # Engagement Rate
    plt.subplot(2, 2, 3)
    df['like_rate'] = df['likes'] / df['views'] * 100
    df['comment_rate'] = df['comments'] / df['views'] * 100
    
    engagement_rates = df[['title', 'like_rate', 'comment_rate']].melt(
        id_vars='title', 
        var_name='Rate Type', 
        value_name='Percentage'
    )
    
    sns.barplot(
        x='title', 
        y='Percentage', 
        hue='Rate Type', 
        data=engagement_rates
    )
    plt.title('Engagement Rates')
    plt.xticks(rotation=45)
    plt.tight_layout()
    
    plt.show()

def compare_video_topics(transcripts, n_topics=3):
    """
    Compare topics across multiple videos
    
    Args:
        transcripts (dict): Dictionary of video transcripts
        n_topics (int): Number of topics to extract
    """
    # Prepare texts
    all_texts = [" ".join(entry["translated"] for entry in data) for data in transcripts.values()]
    
    # Use TF-IDF Vectorizer for better topic representation
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(all_texts)
    
    # Topic Modeling
    lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
    lda.fit(tfidf_matrix)
    
    print("\nTopic Comparisons Across Videos:")
    for i, topic in enumerate(lda.components_):
        print(f"\nTopic {i + 1}:")
        # Get top words for each topic
        top_words_indices = topic.argsort()[-10:][::-1]
        top_words = [vectorizer.get_feature_names_out()[idx] for idx in top_words_indices]
        print(" ".join(top_words))
    
    # Compute topic similarity
    topic_vectors = lda.components_
    similarity_matrix = cosine_similarity(topic_vectors)
    
    print("\nTopic Similarity Matrix:")
    print(similarity_matrix)
    
    # Visualize topic similarity
    plt.figure(figsize=(8, 6))
    sns.heatmap(
        similarity_matrix, 
        annot=True, 
        cmap='YlGnBu', 
        xticklabels=[f'Topic {i+1}' for i in range(n_topics)],
        yticklabels=[f'Topic {i+1}' for i in range(n_topics)]
    )
    plt.title('Topic Similarity Heatmap')
    plt.tight_layout()
    plt.show()

def analyze_video_similarities(transcripts):
    """
    Analyze similarities between video transcripts
    
    Args:
        transcripts (dict): Dictionary of video transcripts
    """
    # Prepare texts
    all_texts = [" ".join(entry["translated"] for entry in data) for data in transcripts.values()]
    video_ids = list(transcripts.keys())
    
    # Use TF-IDF Vectorizer
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(all_texts)
    
    # Compute cosine similarity
    similarity_matrix = cosine_similarity(tfidf_matrix)
    
    # Create a DataFrame for better visualization
    similarity_df = pd.DataFrame(
        similarity_matrix, 
        index=video_ids, 
        columns=video_ids
    )
    
    print("\nVideo Transcript Similarity Matrix:")
    print(similarity_df)
    
    # Visualize similarity
    plt.figure(figsize=(10, 8))
    sns.heatmap(
        similarity_df, 
        annot=True, 
        cmap='YlGnBu', 
        xticklabels=video_ids,
        yticklabels=video_ids
    )
    plt.title('Video Transcript Similarity Heatmap')
    plt.tight_layout()
    plt.show()

def cross_video_keyword_analysis(transcripts):
    """
    Perform cross-video keyword analysis
    
    Args:
        transcripts (dict): Dictionary of video transcripts
    """
    # Prepare texts for each video
    video_texts = {
        video_id: " ".join(entry["translated"] for entry in data)
        for video_id, data in transcripts.items()
    }
    
    # Create TF-IDF Vectorizer
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(video_texts.values())
    
    # Get feature names (words)
    feature_names = vectorizer.get_feature_names_out()
    
    # Analyze top keywords for each video
    print("\nTop Keywords per Video:")
    for video_id, tfidf_vector in zip(video_texts.keys(), tfidf_matrix):
        # Get top 10 keywords
        top_tfidf_indices = tfidf_vector.toarray()[0].argsort()[-10:][::-1]
        top_keywords = [feature_names[idx] for idx in top_tfidf_indices]
        print(f"\nVideo {video_id}:")
        print(", ".join(top_keywords))
    
    # Common keywords across videos
    tfidf_dense = tfidf_matrix.toarray()
    global_keyword_scores = tfidf_dense.mean(axis=0)
    
    # Get top global keywords
    top_global_keyword_indices = global_keyword_scores.argsort()[-10:][::-1]
    top_global_keywords = [feature_names[idx] for idx in top_global_keyword_indices]
    
    print("\nTop Global Keywords:")
    print(", ".join(top_global_keywords))

def interactive_menu():
    video_ids = input("Enter YouTube video IDs separated by commas: ").strip().split(",")
    target_language = input("Enter the target language code (e.g., 'en' for English): ").strip()

    # Fetch transcripts
    transcripts = fetch_and_translate_transcripts(video_ids, target_language)
    save_transcripts_to_file(transcripts)

    # Fetch video metadata
    video_metadata = fetch_video_metadata(video_ids)
    save_metadata_to_file(video_metadata)

    while True:
        print("\nAnalysis Options:")
        print("1. Sentiment Analysis")
        print("2. Keyword Extraction")
        print("3. Generate WordCloud")
        print("4. Summarize Transcripts")
        print("5. Topic Modeling")
        print("6. Pacing Analysis")
        print("7. Basic Video Metrics Analysis")
        print("8. Advanced Metadata Analysis")
        print("9. Comment Sentiment Analysis")
        print("10. Visualize Engagement Metrics")
        print("11. Compare Video Topics")
        print("12. Analyze Video Similarities")
        print("13. Cross-Video Keyword Analysis")
        print("14. Exit")
        choice = input("Select an option (1-14): ").strip()

        if choice == "1":
            analyze_sentiment(transcripts)
        elif choice == "2":
            extract_keywords(transcripts)
        elif choice == "3":
            generate_wordcloud(transcripts)
        elif choice == "4":
            summarize_transcripts(transcripts)
        elif choice == "5":
            n_topics = int(input("Enter the number of topics to model (default 3): ") or 3)
            model_topics(transcripts, n_topics)
        elif choice == "6":
            pacing_analysis(transcripts)
        elif choice == "7":
            analyze_video_metrics(video_metadata)
        elif choice == "8":
            advanced_metadata_analysis(video_metadata)
        elif choice == "9":
            comment_sentiment_analysis(video_metadata)
        elif choice == "10":
            visualize_engagement_metrics(video_metadata)
        elif choice == "11":
            n_topics = int(input("Enter the number of topics to model (default 3): ") or 3)
            compare_video_topics(transcripts, n_topics)
        elif choice == "12":
            analyze_video_similarities(transcripts)
        elif choice == "13":
            cross_video_keyword_analysis(transcripts)
        elif choice == "14":
            print("Exiting...")
            break
        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    interactive_menu()

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\desai\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\desai\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


KeyboardInterrupt: 