In [None]:
!pip install langdetect

# PRINTS ONLY COMMENT AND SENTIMENT

In [None]:
import re
import json
import numpy as np
import pandas as pd
from googleapiclient.discovery import build
from langdetect import detect
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from sklearn.cluster import KMeans

# Initialize YouTube API
API_KEY = "AIzaSyDCQYezL348y6KNmWKJSt_tgVYPeVep8hU"  # Replace with your YouTube Data API v3 key
youtube = build("youtube", "v3", developerKey=API_KEY)

# Initialize models
sentiment_pipeline = pipeline("sentiment-analysis")
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def extract_video_id(url):
    """Extract YouTube video ID from a given URL."""
    pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^\"&?\/\s]{11})"
    match = re.search(pattern, url)
    return match.group(1) if match else None

def get_comments(video_id):
    """Fetch comments from a YouTube video."""
    comments = []
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100,
        textFormat="plainText"
    )
    while request:
        response = request.execute()
        for item in response.get("items", []):
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
        request = youtube.commentThreads().list_next(request, response)
    return comments

def detect_language(comment):
    """Detect the language of a comment."""
    try:
        return detect(comment)
    except:
        return "unknown"

def clean_comment(comment):
    """Clean the comment by removing special characters and links."""
    comment = re.sub(r"http\S+", "", comment)  # Remove links
    comment = re.sub(r"[^\w\s]", "", comment)  # Remove special characters
    return comment.strip().lower()

def analyze_sentiment(comment):
    """Analyze the sentiment of a comment."""
    result = sentiment_pipeline(comment)[0]
    return result["label"], result["score"]

def identify_issue_comments(comments):
    """Identify comments that mention issues."""
    issue_keywords = ["issue", "problem", "error", "bug", "glitch", "fault", "mistake", "trouble"]
    issue_comments = []
    for comment in comments:
        if any(keyword in comment for keyword in issue_keywords):
            issue_comments.append(comment)
    return issue_comments

def extract_timestamps(comment):
    """Extract timestamps from a comment."""
    return re.findall(r"\b\d{1,2}:\d{2}\b", comment)

def recommend_fixes(issue_comments):
    """Provide recommendations based on identified issues."""
    recommendations = []
    for comment in issue_comments:
        if "audio" in comment or "sound" in comment:
            recommendations.append("Check the audio levels and ensure there are no issues with the sound quality.")
        elif "video" in comment or "visual" in comment:
            recommendations.append("Review the video quality and ensure there are no visual glitches.")
        elif "sync" in comment:
            recommendations.append("Ensure that the audio and video are properly synchronized.")
        else:
            recommendations.append("Review the mentioned issue and consider appropriate fixes.")
    return recommendations

def main():
    video_url = input("Enter the YouTube video URL: ")
    video_id = extract_video_id(video_url)
    if not video_id:
        print("Invalid YouTube URL.")
        return

    print("Fetching comments...")
    comments = get_comments(video_id)
    if not comments:
        print("No comments found.")
        return

    print("Processing comments...")
    clean_comments = [clean_comment(c) for c in comments]
    issue_comments = identify_issue_comments(clean_comments)

    if not issue_comments:
        print("No issue-related comments found.")
        return

    print("\nIdentified Issues and Recommendations:")
    for comment in issue_comments:
        timestamps = extract_timestamps(comment)
        sentiment, score = analyze_sentiment(comment)
        recommendation = recommend_fixes([comment])[0]
        print(f"\nComment: {comment}")
        if timestamps:
            print(f"Timestamps: {', '.join(timestamps)}")
        print(f"Sentiment: {sentiment} (Confidence: {score:.2f})")
        print(f"Recommendation: {recommendation}")

if __name__ == "__main__":
    main()


# NO SUMMARY AND NO TIMESTAMPS BUT WORKS

In [None]:
import re
import json
import numpy as np
import pandas as pd
from collections import defaultdict
from googleapiclient.discovery import build
from langdetect import detect
from transformers import pipeline

# Initialize YouTube API
API_KEY = "AIzaSyDCQYezL348y6KNmWKJSt_tgVYPeVep8hU"  # Replace with your YouTube Data API v3 key
youtube = build("youtube", "v3", developerKey=API_KEY)

# Initialize models
sentiment_pipeline = pipeline("sentiment-analysis")

def extract_video_id(url):
    """Extract YouTube video ID from a given URL."""
    pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^\"&?\/\s]{11})"
    match = re.search(pattern, url)
    return match.group(1) if match else None

def get_comments(video_id):
    """Fetch comments from a YouTube video."""
    comments = []
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100,
        textFormat="plainText"
    )
    while request:
        response = request.execute()
        for item in response.get("items", []):
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
        request = youtube.commentThreads().list_next(request, response)
    return comments

def detect_language(comment):
    """Detect the language of a comment."""
    try:
        return detect(comment)
    except:
        return "unknown"

def clean_comment(comment):
    """Clean the comment by removing special characters and links."""
    comment = re.sub(r"http\S+", "", comment)  # Remove links
    comment = re.sub(r"[^\w\s]", "", comment)  # Remove special characters
    return comment.strip().lower()

def analyze_sentiment(comment):
    """Analyze the sentiment of a comment."""
    result = sentiment_pipeline(comment)[0]
    return result["label"], result["score"]

def identify_issue_comments(comments):
    """Identify comments that mention issues and have negative sentiment."""
    issue_keywords = ["issue", "problem", "error", "bug", "glitch", "fault", "mistake", "trouble", "not working", "bad", "broken"]
    issue_comments = []

    for comment in comments:
        if any(keyword in comment for keyword in issue_keywords):
            sentiment, score = analyze_sentiment(comment)
            if sentiment == "NEGATIVE" and score > 0.75:  # Ensure it's a strong negative sentiment
                issue_comments.append(comment)

    return issue_comments

def extract_timestamps(comment):
    """Extract timestamps from a comment."""
    return re.findall(r"\b\d{1,2}:\d{2}\b", comment)

def categorize_issues(issue_comments):
    """Categorize detected issues and summarize timestamps."""
    categorized_issues = defaultdict(list)
    timestamps_summary = defaultdict(list)

    for comment in issue_comments:
        timestamps = extract_timestamps(comment)

        if "audio" in comment or "sound" in comment:
            categorized_issues["Audio Issues"].append(comment)
            if timestamps:
                timestamps_summary["Audio Issues"].extend(timestamps)

        elif "video" in comment or "visual" in comment or "quality" in comment:
            categorized_issues["Video Issues"].append(comment)
            if timestamps:
                timestamps_summary["Video Issues"].extend(timestamps)

        elif "sync" in comment or "delay" in comment:
            categorized_issues["Synchronization Issues"].append(comment)
            if timestamps:
                timestamps_summary["Synchronization Issues"].extend(timestamps)

        else:
            categorized_issues["Other Issues"].append(comment)
            if timestamps:
                timestamps_summary["Other Issues"].extend(timestamps)

    return categorized_issues, timestamps_summary

def recommend_fixes(category):
    """Provide recommendations based on issue categories."""
    fixes = {
        "Audio Issues": "Check the audio levels, remove background noise, and verify microphone quality.",
        "Video Issues": "Review video resolution, color grading, and any potential artifacts or glitches.",
        "Synchronization Issues": "Ensure proper audio-video sync during editing and encoding.",
        "Other Issues": "Review the mentioned issues and consider appropriate fixes."
    }
    return fixes.get(category, "Investigate the issue further for a solution.")

def main():
    video_url = input("Enter the YouTube video URL: ")
    video_id = extract_video_id(video_url)
    if not video_id:
        print("Invalid YouTube URL.")
        return

    print("🔹 Fetching comments...")
    comments = get_comments(video_id)
    if not comments:
        print("No comments found.")
        return

    print("🔹 Processing comments...")
    clean_comments = [clean_comment(c) for c in comments]
    issue_comments = identify_issue_comments(clean_comments)

    if not issue_comments:
        print("✅ No issue-related negative comments found.")
        return

    categorized_issues, timestamps_summary = categorize_issues(issue_comments)

    print("\n🚨 Identified Issues and Recommendations 🚨")
    for category, comments in categorized_issues.items():
        print(f"\n🔸 **{category} ({len(comments)} comments)**")
        for comment in comments[:5]:  # Display a few sample comments
            print(f"  - {comment}")
        print(f"  ➤ Recommended Fix: {recommend_fixes(category)}")

        if category in timestamps_summary and timestamps_summary[category]:
            print(f"  ⏰ Issue Found At: {', '.join(set(timestamps_summary[category]))}")

    print("\n✅ Analysis Complete!")

if __name__ == "__main__":
    main()


# WORKS WITH TIMESTAMP BUT NOT IN PROPER FORMAT

In [None]:
import re
import json
import numpy as np
import pandas as pd
from collections import defaultdict
from googleapiclient.discovery import build
from langdetect import detect
from transformers import pipeline

# Initialize YouTube API
API_KEY = "AIzaSyDCQYezL348y6KNmWKJSt_tgVYPeVep8hU"  # Replace with your YouTube Data API v3 key
youtube = build("youtube", "v3", developerKey=API_KEY)

# Initialize sentiment analysis model
sentiment_pipeline = pipeline("sentiment-analysis")

def extract_video_id(url):
    """Extract YouTube video ID from a given URL."""
    pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^\"&?\/\s]{11})"
    match = re.search(pattern, url)
    return match.group(1) if match else None

def get_comments(video_id):
    """Fetch comments from a YouTube video."""
    comments = []
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100,
        textFormat="plainText"
    )
    while request:
        response = request.execute()
        for item in response.get("items", []):
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
        request = youtube.commentThreads().list_next(request, response)
    return comments

def detect_language(comment):
    """Detect the language of a comment."""
    try:
        return detect(comment)
    except:
        return "unknown"

def clean_comment(comment):
    """Clean the comment by removing special characters and links."""
    comment = re.sub(r"http\S+", "", comment)  # Remove links
    comment = re.sub(r"[^\w\s]", "", comment)  # Remove special characters
    return comment.strip().lower()

def analyze_sentiment(comment):
    """Analyze the sentiment of a comment, truncating if necessary."""
    MAX_LENGTH = 512  # Maximum token length for the model

    # Truncate the comment if it's too long
    truncated_comment = comment[:MAX_LENGTH]

    result = sentiment_pipeline(truncated_comment)[0]
    return result["label"], result["score"]

def extract_timestamps(comment):
    """Extract timestamps from a comment in various formats."""
    time_patterns = [
        r"\b\d{1,2}:\d{2}(:\d{2})?\b",  # MM:SS or H:MM:SS (e.g., 12:34, 1:23:45)
        r"\b\d{3,4}\b",  # MMSS or MSS (e.g., 1234 for 12:34, 234 for 2:34)
        r"\b\d{4,5}\b",  # HMMSS or HMSS (e.g., 12345 for 1:23:45, 1234 for 12:34)
        r"(\d+)\s*h\s*(\d+)?\s*m?\s*(\d+)?\s*s?",  # 1h 23m 45s, 5m 10s, etc.
    ]
    timestamps = []

    for pattern in time_patterns:
        matches = re.findall(pattern, comment)
        for match in matches:
            if isinstance(match, tuple):
                hours, minutes, seconds = match
                time_str = f"{hours}h {minutes}m {seconds}s".strip()
            else:
                time_str = match
            timestamps.append(time_str)

    return timestamps

def identify_issue_comments(comments):
    """Identify comments that mention issues and have negative sentiment."""
    issue_keywords = [
        "issue", "problem", "error", "bug", "glitch", "fault", "mistake", "trouble",
        "not working", "bad", "broken", "off"
    ]
    issue_comments = []

    for comment in comments:
        if any(keyword in comment for keyword in issue_keywords):
            sentiment, score = analyze_sentiment(comment)
            if sentiment == "NEGATIVE" and score > 0.75:  # Strong negative sentiment
                issue_comments.append(comment)

    return issue_comments

def categorize_issues(issue_comments):
    """Categorize detected issues and store timestamps properly."""
    categorized_issues = defaultdict(list)
    timestamps_summary = defaultdict(list)

    for comment in issue_comments:
        timestamps = extract_timestamps(comment)  # Extract timestamps

        if any(word in comment.lower() for word in ["audio", "sound", "voice", "mic"]):
            categorized_issues["Audio Issues"].append(comment)
            timestamps_summary["Audio Issues"].extend(timestamps)

        elif any(word in comment.lower() for word in ["video", "visual", "quality", "blurry", "pixelated"]):
            categorized_issues["Video Issues"].append(comment)
            timestamps_summary["Video Issues"].extend(timestamps)

        elif any(word in comment.lower() for word in ["sync", "delay", "out of sync", "lag"]):
            categorized_issues["Synchronization Issues"].append(comment)
            timestamps_summary["Synchronization Issues"].extend(timestamps)

        elif any(word in comment.lower() for word in ["framing", "cropped", "off-center", "too much space"]):
            categorized_issues["Framing Issues"].append(comment)
            timestamps_summary["Framing Issues"].extend(timestamps)

        else:
            categorized_issues["Other Issues"].append(comment)
            timestamps_summary["Other Issues"].extend(timestamps)

    return categorized_issues, timestamps_summary

def recommend_fixes(category):
    """Provide recommendations based on issue categories."""
    fixes = {
        "Audio Issues": "Check the audio levels, remove background noise, and verify microphone quality.",
        "Video Issues": "Review video resolution, color grading, and any potential artifacts or glitches.",
        "Synchronization Issues": "Ensure proper audio-video sync during editing and encoding.",
        "Framing Issues": "Adjust camera positioning to maintain proper framing and avoid unnecessary empty space.",
        "Other Issues": "Investigate the mentioned issues and consider appropriate fixes."
    }
    return fixes.get(category, "Investigate the issue further for a solution.")

def main():
    video_url = input("Enter the YouTube video URL: ")
    video_id = extract_video_id(video_url)
    if not video_id:
        print("Invalid YouTube URL.")
        return

    print("🔹 Fetching comments...")
    comments = get_comments(video_id)
    if not comments:
        print("No comments found.")
        return

    print("🔹 Processing comments...")
    clean_comments = [clean_comment(c) for c in comments]
    issue_comments = identify_issue_comments(clean_comments)

    if not issue_comments:
        print("✅ No issue-related negative comments found.")
        return

    categorized_issues, timestamps_summary = categorize_issues(issue_comments)

    print("\n🚨 Identified Issues and Recommendations 🚨")
    for category, comments in categorized_issues.items():
        print(f"\n🔸 **{category} ({len(comments)} comments)**")
        for comment in comments[:5]:  # Display a few sample comments
            print(f"  - {comment}")
        print(f"  ➤ Recommended Fix: {recommend_fixes(category)}")

        if category in timestamps_summary and timestamps_summary[category]:
            print(f"  ⏰ Issue Found At: {', '.join(set(timestamps_summary[category]))}")

    print("\n📌 **Final Summary of All Issues with Timestamps** 📌")
    final_timestamps = {cat: list(set(timestamps)) for cat, timestamps in timestamps_summary.items() if timestamps}

    if final_timestamps:
        for category, times in final_timestamps.items():
            print(f"🔹 {category}: {', '.join(times)}")
    else:
        print("✅ No specific timestamps mentioned for detected issues.")

    print("\n✅ Analysis Complete!")

if __name__ == "__main__":
    main()


# TO GIVE OVERALL STATS AND SENTIMENT ANALYSIS

In [None]:
import re
import json
import numpy as np
import pandas as pd
from collections import defaultdict
from googleapiclient.discovery import build
from langdetect import detect
from transformers import pipeline

# Initialize YouTube API
API_KEY = "AIzaSyDCQYezL348y6KNmWKJSt_tgVYPeVep8hU"  # Replace with your YouTube Data API v3 key
youtube = build("youtube", "v3", developerKey=API_KEY)

# Initialize sentiment analysis model
sentiment_pipeline = pipeline("sentiment-analysis")


def extract_video_id(url):
    """Extract YouTube video ID from a given URL."""
    pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^\"&?\/\s]{11})"
    match = re.search(pattern, url)
    return match.group(1) if match else None


def get_video_statistics(video_id):
    """Fetch video statistics like title, views, likes, and comments count."""
    request = youtube.videos().list(
        part="snippet,statistics",
        id=video_id
    )
    response = request.execute()

    if not response["items"]:
        return None

    video_data = response["items"][0]
    stats = {
        "title": video_data["snippet"]["title"],
        "channel": video_data["snippet"]["channelTitle"],
        "published_date": video_data["snippet"]["publishedAt"][:10],  # Extract YYYY-MM-DD
        "views": int(video_data["statistics"].get("viewCount", 0)),
        "likes": int(video_data["statistics"].get("likeCount", 0)),
        "comments": int(video_data["statistics"].get("commentCount", 0))
    }
    return stats


def get_comments(video_id):
    """Fetch comments from a YouTube video."""
    comments = []
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100,
        textFormat="plainText"
    )
    while request:
        response = request.execute()
        for item in response.get("items", []):
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
        request = youtube.commentThreads().list_next(request, response)
    return comments


def detect_language(comment):
    """Detect the language of a comment."""
    try:
        return detect(comment)
    except:
        return "unknown"


def clean_comment(comment):
    """Clean the comment while keeping timestamps (do NOT remove colons)."""
    comment = re.sub(r"http\S+", "", comment)  # Remove links
    return comment.strip().lower()  # Convert to lowercase, but keep timestamps


def analyze_sentiment(comment):
    """Analyze the sentiment of a comment, truncating if necessary."""
    MAX_LENGTH = 512  # Maximum token length for the model
    truncated_comment = comment[:MAX_LENGTH]
    result = sentiment_pipeline(truncated_comment)[0]
    return result["label"], result["score"]


def extract_timestamps(comment):
    """Extract timestamps from a comment in the format mm:ss or hh:mm:ss."""
    raw_timestamps = re.findall(r"\b\d{1,2}:\d{2}(?::\d{2})?\b", comment)
    formatted_timestamps = set(raw_timestamps)  # Use a set to remove duplicates
    return formatted_timestamps


def identify_issue_comments(comments):
    """Identify comments that mention issues and have negative sentiment, along with timestamps."""
    issue_keywords = [
        "issue", "problem", "error", "bug", "glitch", "fault", "mistake", "trouble",
        "not working", "bad", "broken", "off", "lag", "delay", "freeze", "stutter",
        "audio", "video", "sync", "framing"
    ]
    issue_comments = []
    sentiment_scores = {"positive": 0, "neutral": 0, "negative": 0}

    for comment in comments:
        sentiment, score = analyze_sentiment(comment)

        if sentiment == "POSITIVE":
            sentiment_scores["positive"] += 1
        elif sentiment == "NEGATIVE":
            sentiment_scores["negative"] += 1
        else:
            sentiment_scores["neutral"] += 1

        if any(keyword in comment for keyword in issue_keywords) or extract_timestamps(comment):
            if sentiment == "NEGATIVE" and score > 0.60:
                issue_comments.append(comment)

    return issue_comments, sentiment_scores


def categorize_issues(issue_comments):
    """Categorize detected issues and store timestamps properly."""
    categorized_issues = defaultdict(list)
    timestamps_summary = defaultdict(set)

    for comment in issue_comments:
        timestamps = extract_timestamps(comment)

        if any(word in comment for word in ["audio", "sound", "voice", "mic"]):
            categorized_issues["Audio Issues"].append(comment)
            timestamps_summary["Audio Issues"].update(timestamps)

        elif any(word in comment for word in ["video", "visual", "quality", "blurry", "pixelated"]):
            categorized_issues["Video Issues"].append(comment)
            timestamps_summary["Video Issues"].update(timestamps)

        elif any(word in comment for word in ["sync", "delay", "out of sync", "lag"]):
            categorized_issues["Synchronization Issues"].append(comment)
            timestamps_summary["Synchronization Issues"].update(timestamps)

        elif any(word in comment for word in ["framing", "cropped", "off-center"]):
            categorized_issues["Framing Issues"].append(comment)
            timestamps_summary["Framing Issues"].update(timestamps)

        else:
            categorized_issues["Other Issues"].append(comment)
            timestamps_summary["Other Issues"].update(timestamps)

    return categorized_issues, timestamps_summary


def main():
    video_url = input("Enter the YouTube video URL: ")
    video_id = extract_video_id(video_url)
    if not video_id:
        print("Invalid YouTube URL.")
        return

    # Fetch Video Statistics
    stats = get_video_statistics(video_id)
    if not stats:
        print("Failed to retrieve video data.")
        return

    print("\n📊 **Video Statistics**")
    print(f"🎥 Title: {stats['title']}")
    print(f"👤 Channel: {stats['channel']}")
    print(f"📅 Published Date: {stats['published_date']}")
    print(f"👀 Views: {stats['views']:,}")
    print(f"👍 Likes: {stats['likes']:,}")
    print(f"💬 Comments: {stats['comments']:,}")

    # Fetch Comments and Analyze
    print("\n🔹 Fetching comments...")
    comments = get_comments(video_id)
    if not comments:
        print("No comments found.")
        return

    print("🔹 Processing comments...")
    clean_comments = [clean_comment(c) for c in comments]
    issue_comments, sentiment_scores = identify_issue_comments(clean_comments)

    total_comments = sum(sentiment_scores.values())
    if total_comments > 0:
        positivity = (sentiment_scores["positive"] / total_comments) * 100
        negativity = (sentiment_scores["negative"] / total_comments) * 100
        print("\n📢 **Viewer Sentiment Summary**")
        print(f"🟢 Positive: {positivity:.2f}% | 🔴 Negative: {negativity:.2f}% | ⚪ Neutral: {100 - positivity - negativity:.2f}%")

    # Categorize Issues
    categorized_issues, timestamps_summary = categorize_issues(issue_comments)
    # (Displays categorized issues and timestamps as in the previous version...)

if __name__ == "__main__":
    main()


# WORKS WITH TIMESTAMPS

In [None]:
import re
import json
import numpy as np
import pandas as pd
from collections import defaultdict
from googleapiclient.discovery import build
from langdetect import detect
from transformers import pipeline

# Initialize YouTube API
API_KEY = "AIzaSyDCQYezL348y6KNmWKJSt_tgVYPeVep8hU"  # Replace with your YouTube Data API v3 key
youtube = build("youtube", "v3", developerKey=API_KEY)

# Initialize sentiment analysis model
sentiment_pipeline = pipeline("sentiment-analysis")


def extract_video_id(url):
    """Extract YouTube video ID from a given URL."""
    pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^\"&?\/\s]{11})"
    match = re.search(pattern, url)
    return match.group(1) if match else None


def get_comments(video_id):
    """Fetch comments from a YouTube video."""
    comments = []
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100,
        textFormat="plainText"
    )
    while request:
        response = request.execute()
        for item in response.get("items", []):
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
        request = youtube.commentThreads().list_next(request, response)
    return comments


def detect_language(comment):
    """Detect the language of a comment."""
    try:
        return detect(comment)
    except:
        return "unknown"


def clean_comment(comment):
    """Clean the comment while keeping timestamps (do NOT remove colons)."""
    comment = re.sub(r"http\S+", "", comment)  # Remove links
    return comment.strip().lower()  # Convert to lowercase, but keep timestamps


def analyze_sentiment(comment):
    """Analyze the sentiment of a comment, truncating if necessary."""
    MAX_LENGTH = 512  # Maximum token length for the model

    # Truncate the comment if it's too long
    truncated_comment = comment[:MAX_LENGTH]

    result = sentiment_pipeline(truncated_comment)[0]
    return result["label"], result["score"]


def extract_timestamps(comment):
    """Extract timestamps from a comment in the format mm:ss or hh:mm:ss."""
    raw_timestamps = re.findall(r"\b\d{1,2}:\d{2}(?::\d{2})?\b", comment)
    formatted_timestamps = set(raw_timestamps)  # Use a set to remove duplicates
    return formatted_timestamps


def identify_issue_comments(comments):
    """Identify comments that mention issues and have negative sentiment, along with timestamps."""
    issue_keywords = [
        "issue", "problem", "error", "bug", "glitch", "fault", "mistake", "trouble",
        "not working", "bad", "broken", "off", "lag", "delay", "freeze", "stutter",
        "audio", "video", "sync", "framing"
    ]
    issue_comments = []

    for comment in comments:
        if any(keyword in comment for keyword in issue_keywords) or extract_timestamps(comment):
            sentiment, score = analyze_sentiment(comment)
            if sentiment == "NEGATIVE" and score > 0.60:  # Moderate to strong negative sentiment
                issue_comments.append(comment)

    return issue_comments


def categorize_issues(issue_comments):
    """Categorize detected issues and store timestamps properly."""
    categorized_issues = defaultdict(list)
    timestamps_summary = defaultdict(set)  # Using a set to avoid duplicate timestamps

    for comment in issue_comments:
        timestamps = extract_timestamps(comment)  # Extract timestamps

        if any(word in comment.lower() for word in ["audio", "sound", "voice", "mic"]):
            categorized_issues["Audio Issues"].append(comment)
            timestamps_summary["Audio Issues"].update(timestamps)

        elif any(word in comment.lower() for word in ["video", "visual", "quality", "blurry", "pixelated"]):
            categorized_issues["Video Issues"].append(comment)
            timestamps_summary["Video Issues"].update(timestamps)

        elif any(word in comment.lower() for word in ["sync", "delay", "out of sync", "lag"]):
            categorized_issues["Synchronization Issues"].append(comment)
            timestamps_summary["Synchronization Issues"].update(timestamps)

        elif any(word in comment.lower() for word in ["framing", "cropped", "off-center", "too much space"]):
            categorized_issues["Framing Issues"].append(comment)
            timestamps_summary["Framing Issues"].update(timestamps)

        else:
            categorized_issues["Other Issues"].append(comment)
            timestamps_summary["Other Issues"].update(timestamps)

    return categorized_issues, timestamps_summary


def recommend_fixes(category):
    """Provide recommendations based on issue categories."""
    fixes = {
        "Audio Issues": "Check the audio levels, remove background noise, and verify microphone quality.",
        "Video Issues": "Review video resolution, color grading, and any potential artifacts or glitches.",
        "Synchronization Issues": "Ensure proper audio-video sync during editing and encoding.",
        "Framing Issues": "Adjust camera positioning to maintain proper framing and avoid unnecessary empty space.",
        "Other Issues": "Investigate the mentioned issues and consider appropriate fixes."
    }
    return fixes.get(category, "Investigate the issue further for a solution.")


def main():
    video_url = input("Enter the YouTube video URL: ")
    video_id = extract_video_id(video_url)
    if not video_id:
        print("Invalid YouTube URL.")
        return

    print("🔹 Fetching comments...")
    comments = get_comments(video_id)
    if not comments:
        print("No comments found.")
        return

    print("🔹 Processing comments...")
    clean_comments = [clean_comment(c) for c in comments]
    issue_comments = identify_issue_comments(clean_comments)

    if not issue_comments:
        print("✅ No issue-related negative comments found.")
        return

    categorized_issues, timestamps_summary = categorize_issues(issue_comments)

    print("\n🚨 Identified Issues and Recommendations 🚨")
    for category, comments in categorized_issues.items():
        print(f"\n🔸 **{category} ({len(comments)} comments)**")
        for comment in comments[:5]:  # Display a few sample comments
            print(f"  - {comment}")
        print(f"  ➤ Recommended Fix: {recommend_fixes(category)}")

        if timestamps_summary[category]:
            formatted_timestamps = ', '.join(sorted(timestamps_summary[category]))
            print(f"  ⏰ Issue Found At: {formatted_timestamps}")

    print("\n📌 **Final Summary of All Issues with Timestamps** 📌")
    for category, timestamps in timestamps_summary.items():
        if timestamps:
            formatted_timestamps = ', '.join(sorted(timestamps))
            print(f"🔹 {category}: {formatted_timestamps}")
    if not any(timestamps_summary.values()):
        print("✅ No specific timestamps mentioned for detected issues.")


if __name__ == "__main__":
    main()
