In [10]:

import datetime
import pandas as pd
import matplotlib.pyplot as plt
from googleapiclient.discovery import build
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import schedule
import time
import os

nltk.download('vader_lexicon')

# Initialize NLTK VADER for sentiment analysis
sia = SentimentIntensityAnalyzer()

# YouTube API setup
api_key = 'AIzaSyAhGvNAUXQrwgpAumCtgqgT4YmpQzefZHA' # Replace with your actual YouTube API key
youtube = build('youtube', 'v3', developerKey=api_key)

def get_comments(video_id, max_results=100):
    comments = []
    request = youtube.commentThreads().list(
        part='snippet',
        videoId='LLqnJIZqjhw',
        maxResults=max_results,
        textFormat='plainText'
    )
    response = request.execute()

    for item in response['items']:
        comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
        comments.append(comment)
    return comments

def save_comments_to_csv(comments, filename):
    df = pd.DataFrame(comments, columns=['Comment'])
    df.to_csv(filename, index=False)
    return df

def analyze_sentiments(comments):
    results = {
        'Comment': [],
        'positive': [],
        'neutral': [],
        'negative': [],
        'Compound': [],
    }

    for comment in comments:
        score = sia.polarity_scores(comment)
        results['Comment'].append(comment)
        results['Compound'].append(score['compound'])
        results['positive'].append(score['pos'])
        results['neutral'].append(score['neu'])
        results['negative'].append(score['neg'])

    # Calculate averages
    average_positive = sum(results['positive']) / len(results['positive']) if results['positive'] else 0
    average_neutral = sum(results['neutral']) / len(results['neutral']) if results['neutral'] else 0
    average_negative = sum(results['negative']) / len(results['negative']) if results['negative'] else 0
    average_compound = sum(results['Compound']) / len(results['Compound']) if results['Compound'] else 0

    # Convert results to DataFrame for further analysis and plotting
    sentiment_df = pd.DataFrame(results)

    return sentiment_df, average_positive, average_neutral, average_negative, average_compound

def plot_sentiments_pie(average_positive, average_neutral, average_negative, output_filename):
    labels = ['Positive', 'Neutral', 'Negative']
    sizes = [average_positive, average_neutral, average_negative]
    colors = ['green', 'gray', 'red']

    plt.figure(figsize=(8, 6))
    plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
    plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.title('Sentiment Composition of YouTube Comments')
    plt.savefig(output_filename)
    plt.close()
    print(f"Pie chart saved as {output_filename}")
    return output_filename

def run_sentiment_analysis():
       video_id = 'LLqnJIZqjhw'  # Replace with the actual video ID
       comments = get_comments(video_id)

       # Generate unique filenames with timestamp
       timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
       csv_filename = f'youtube_comments_{timestamp}.csv'
       pie_chart_filename = f'sentiment_pie_chart_{timestamp}.png'



       # Save comments to CSV
       save_comments_to_csv(comments, csv_filename)

       # Analyze sentiments and save results
       sentiment_df, average_positive, average_neutral, average_negative, average_compound = analyze_sentiments(comments)
       sentiment_df.to_csv(f'sentiment_analysis_{timestamp}.csv', index= True)
       print(sentiment_df.head())


       # Generate and save sentiment pie chart
       plot_sentiments_pie(average_positive, average_neutral, average_negative, pie_chart_filename)

       # Print sentiment analysis summary
       print("Sentiment Analysis Results:")
       print(sentiment_df.head())
       print(f"Average Positive: {average_positive}")
       print(f"Average Neutral: {average_neutral}")
       print(f"Average Negative: {average_negative}")
       print(f"Average Compound: {average_compound}")

       # Call the plot_sentiments function with the correct DataFrame
       plot_sentiments(sentiment_df)
def plot_sentiments(df):
    plt.figure(figsize=(10, 6))
    plt.hist(df['Compound'], bins=20, color='blue', alpha=0.7)
    plt.title('Sentiment Analysis of YouTube Comments')
    plt.xlabel('Compound Sentiment Score')
    plt.ylabel('Number of Comments')
    plt.show()

if __name__ == "__main__":
    run_sentiment_analysis()
    schedule.every(15).minutes.do(run_sentiment_analysis)
    while True:
        schedule.run_pending()
        time.sleep(1)



Output hidden; open in https://colab.research.google.com to view.