In [2]:
from googleapiclient.discovery import build
from textblob import TextBlob
import pandas as pd
import re
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Replace with your YouTube Data API key
API_KEY = 'API KEY'

# Function to extract video ID from URL
def extract_video_id(url):
    match = re.search(r'(?:v=|\/|embed\/|youtu\.be\/|watch\?v=|watch\?v%3D|watch\?v%253D)([a-zA-Z0-9_-]{11})', url)
    if match:
        return match.group(1)
    else:
        raise ValueError("Invalid YouTube URL or video ID could not be extracted")

# Function to fetch comments from a YouTube video using the YouTube API
def get_youtube_comments(video_url, api_key):
    video_id = extract_video_id(video_url)
    youtube = build('youtube', 'v3', developerKey=api_key)

    comments = []
    next_page_token = None
    page_count = 0

    while True:
        print(f"Fetching page {page_count + 1}...")  

        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=100,
            textFormat="plainText",
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)

        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break

        page_count += 1

    print(f"Total pages fetched: {page_count}") 
    print(f"Total comments fetched: {len(comments)}")  # Print total number of comments
    return comments

# Function to classify sentiment using TextBlob
def classify_sentiment(comment):
    analysis = TextBlob(comment)
    if analysis.sentiment.polarity > 0:
        return 'Positive'
    elif analysis.sentiment.polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

# Sample labeled comments for evaluation (for demo purposes)
labeled_comments = [
    {"comment": "I love this video!", "label": "Positive"},
    {"comment": "This is boring.", "label": "Negative"},
    {"comment": "It's okay.", "label": "Neutral"},
    {"comment": "Amazing content, keep it up!", "label": "Positive"},
    {"comment": "Terrible editing.", "label": "Negative"}
]

# Function to calculate and print evaluation metrics
def evaluate_model(predictions, true_labels):
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, average='weighted')
    recall = recall_score(true_labels, predictions, average='weighted')
    f1 = f1_score(true_labels, predictions, average='weighted')

    print("Evaluation Metrics:")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")

# Main function to scrape, classify comments, and print summary
def classify_comments_from_video():
    video_url = input("Enter the YouTube video URL: ")

    try:
        # Fetch all comments
        comments = get_youtube_comments(video_url, API_KEY)

        # Initialize counters and lists
        sentiment_counts = {'Positive': 0, 'Neutral': 0, 'Negative': 0}
        predictions = []
        positive_comments = []
        negative_comments = []

        # Classify each comment and count sentiments
        for comment in comments:
            sentiment = classify_sentiment(comment)
            sentiment_counts[sentiment] += 1
            predictions.append(sentiment)

            if sentiment == 'Positive':
                positive_comments.append(comment)
            elif sentiment == 'Negative':
                negative_comments.append(comment)

            print(f"Comment: {comment}")
            print(f"Sentiment: {sentiment}")
            print("-" * 80)  # Separator for readability

        # Print sentiment summary
        print("Sentiment Summary:")
        print(f"Total Comments: {len(comments)}")
        print(f"Positive: {sentiment_counts['Positive']}")
        print(f"Neutral: {sentiment_counts['Neutral']}")
        print(f"Negative: {sentiment_counts['Negative']}")

        # Save positive comments to CSV
        if positive_comments:
            pd.DataFrame(positive_comments, columns=["Positive Comments"]).to_csv('positive_comments.csv', index=False)
            print("Positive comments saved to positive_comments.csv")

        # Save negative comments to CSV
        if negative_comments:
            pd.DataFrame(negative_comments, columns=["Negative Comments"]).to_csv('negative_comments.csv', index=False)
            print("Negative comments saved to negative_comments.csv")

        # Evaluation (using the labeled comments)
        true_labels = [item['label'] for item in labeled_comments]
        test_predictions = [classify_sentiment(item['comment']) for item in labeled_comments]
        evaluate_model(test_predictions, true_labels)

    except Exception as e:
        print(f"An error occurred: {e}")

# Run the function
classify_comments_from_video()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
--------------------------------------------------------------------------------
Comment: Trump and Shannon sharpe doing all types of fake things to get people attention
Sentiment: Negative
--------------------------------------------------------------------------------
Comment: The left are terrified of this man, they called him racist, but then we seen pictures of him with all the great black leaders, he even won an award for helping black people that was awarded to him by Rosa Parks, even Mike Tyson couldn’t believe they called trump racist, then they called him a rapist but there was absolutely no evidence so it didn’t go anywhere , then a pedophile simply because he loves his daughter so much. they rigged his Election, they then impeach him but that didn’t stop him, they then called him a Russian asset and said he spied on Hillary‘s campaign, so the FBI looked into it and all they found was that Obama spied on Trump‘

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
