In [1]:
# Import necessary libraries and modules
import pandas as pd
import googleapiclient.discovery
from googleapiclient.errors import HttpError

In [2]:
# pip install google-api-python-client #Library to interacts with Google APIs

### Function to authenticate with the YouTube API using an API key
 

In [2]:
def get_authenticated_service(api_key):
    return googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)



# Function to retrieve comments for a YouTube video


In [3]:
def get_video_comments(service, video_id, max_comments=1000):
    comments = []
    page_token = None

    # Loop until the desired number of comments is reached
    while len(comments) < max_comments:
        kwargs = {
            "part": "snippet",
            "videoId": video_id,
            "textFormat": "plainText",
            "order": "relevance",
            "maxResults": min(100, max_comments - len(comments)),
        }

        if page_token:
            kwargs["pageToken"] = page_token

        try:
            # Execute the API request to retrieve comments
            results = service.commentThreads().list(**kwargs).execute()

            # Extract comments from the API response
            for item in results.get("items", []):
                comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
                comments.append(comment)

            # Update the page token for pagination
            page_token = results.get("nextPageToken")

            # Exit the loop if no more comments are available
            if not page_token:
                break

        except HttpError as e:
            # Handle HTTP errors and print an error message
            print(f"An error occurred: {e}")
            break

    return comments[:max_comments]


### Main function to execute the YouTube comments retrieval and processing

In [5]:
def main():
    # Set your YouTube API key here
    API_KEY = "AIzaSyBH-LQhuQMITV7p-Krarbnydkl8J1VhKck"  # Replace with your actual API key

    # Authenticate with YouTube API using the API key
    service = get_authenticated_service(API_KEY)

    # YouTube video ID: Lord of the rings Trailer
    video_id = "x8UAUAuKNcU"  # Replace with the actual video ID

    try:
        # Get up to 1000 comments for the specified video
        comments = get_video_comments(service, video_id, max_comments=1000)

        # Convert comments to a pandas DataFrame
        df = pd.DataFrame({"Comments": comments})

        # Print the DataFrame
        print(df)

        # Save the DataFrame to a CSV file
        csv_file_path = "comments.csv"
        df.to_csv(csv_file_path, index=False)

        # Print a success message with the file path
        print(f"Comments saved to {csv_file_path}")

    except HttpError as e:
        # Handle HTTP errors and print an error message
        print(f"An error occurred: {e}")


In [6]:
# Call the main function to execute the script

if __name__ == "__main__":
    main()


                                              Comments
0    “Evacuate the city, engage all defences and ge...
1    Infinity War was such a legendary movie that p...
2    The way MCU built up Thanos for 10 years is on...
3    Infinity War was MCU peak. 5 years and still h...
4    It’s almost 2024 and this trailer still puts a...
..                                                 ...
995                      humans and movies peaked here
996                             It's been five years 😢
997               1:59 is everything they took from us
998  You can't live with your own failures, where d...
999                    And now... Look at the Marvel 🤧

[1000 rows x 1 columns]
Comments saved to comments.csv


In [7]:
import pandas as pd

# Loading the comments DataFrame from the CSV file
df = pd.read_csv("comments.csv")

# Saving the DataFrame to an Excel file
excel_file_path = "comments.xlsx"
df.to_excel(excel_file_path, index=False)

print(f"Comments saved to {excel_file_path}")


Comments saved to comments.xlsx


In [8]:
# Loading the Excel file into a DataFrame
excel_file_path = "comments.xlsx"
df = pd.read_excel(excel_file_path)

# Displaying the DataFrame
print("DataFrame from Excel:")
print(df)

DataFrame from Excel:
                                              Comments
0    “Evacuate the city, engage all defences and ge...
1    Infinity War was such a legendary movie that p...
2    The way MCU built up Thanos for 10 years is on...
3    Infinity War was MCU peak. 5 years and still h...
4    It’s almost 2024 and this trailer still puts a...
..                                                 ...
995                      humans and movies peaked here
996                             It's been five years 😢
997               1:59 is everything they took from us
998  You can't live with your own failures, where d...
999                    And now... Look at the Marvel 🤧

[1000 rows x 1 columns]


In [9]:
df.head(2)#Taking first 2 elements

Unnamed: 0,Comments
0,"“Evacuate the city, engage all defences and ge..."
1,Infinity War was such a legendary movie that p...


# Comments from Multiple Videos


In [10]:
# Import necessary libraries and modules
import pandas as pd
import googleapiclient.discovery
from googleapiclient.errors import HttpError

###  Function to authenticate with the YouTube API using an API key


In [11]:
def get_authenticated_service(api_key):
    return googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)


### Function to retrieve comments for a YouTube video


In [12]:
def get_video_comments(service, video_id, remaining_comments):
    comments = []
    page_token = None

    # Loop until the remaining number of comments is reached
    while remaining_comments > 0:
        # maxResults based on the remaining_comments
        max_results = min(100, remaining_comments)

        kwargs = {
            "part": "snippet",
            "videoId": video_id,
            "textFormat": "plainText",
            "order": "relevance",
            "maxResults": max_results,
        }

        if page_token:
            kwargs["pageToken"] = page_token

        try:
            # API request to retrieve comments
            results = service.commentThreads().list(**kwargs).execute()

            # comments from the API response
            for item in results.get("items", []):
                comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
                comments.append(comment)

            # Updating page token for pagination
            page_token = results.get("nextPageToken")

            # Exit the loop if no more comments are available
            if not page_token:
                break
                
    # Handles HTTP errors and print an error message
        except HttpError as e:           
            print(f"An error occurred: {e}")
            break

        # Update the remaining number of comments
        remaining_comments -= max_results

    return comments

###  Main function to execute the YouTube comments retrieval and processing for multiple videos


In [13]:
def main():
    # Set your YouTube API key here
    API_KEY = "AIzaSyBH-LQhuQMITV7p-Krarbnydkl8J1VhKck"  # Replace with your actual API key

    # Authenticate with YouTube API using the API key
    service = get_authenticated_service(API_KEY)

    # List of YouTube video IDs
    video_ids = ["TcMBFSGVi1c", "eOrNdBpGMv8"]

    total_comments = 1000  # Total number of comments you want across all videos
    all_comments = []

    # Iterate over each video ID and retrieve comments
    for video_id in video_ids:
        try:
            # Get comments for the specified video with remaining_comments as the limit
            comments = get_video_comments(service, video_id, remaining_comments=total_comments)

            # Extend the list of all comments with comments from the current video
            all_comments.extend(comments)

            # Update the total_comments to reflect the remaining
            total_comments -= len(comments)

            # Exit the loop if the total_comments is fulfilled
            if total_comments <= 0:
                break

        except HttpError as e:
            # Handle HTTP errors and print an error message
            print(f"An error occurred for video ID {video_id}: {e}")

    # Convert all comments to a pandas DataFrame
    df = pd.DataFrame({"Comments": all_comments})

    # Print the DataFrame
    print(df)

    # Save the DataFrame to a CSV file
    csv_file_path = "all_comments.csv"
    df.to_csv(csv_file_path, index=False)

    # Print a success message with the file path
    print(f"Comments saved to {csv_file_path}")

In [14]:

# Check if the script is being run directly
if __name__ == "__main__":
    # Call the main function to execute the script
    main()


                                              Comments
0    The hype for this movie was truly a once in a ...
1    The hype for Infinity War and Endgame was unre...
2    Today is the 4th year anniversary of endgame's...
3    1:40 The way they synced up her gunshots with ...
4    This movie really lived up to it's title. It w...
..                                                 ...
995  Everyone forgot this movie.Because, this movie...
996         Avengers  All Series My Favourite All Time
997                   mcu should have ended right here
998                                 Whatever it takes!
999  whatever it takes - It will take the best supe...

[1000 rows x 1 columns]
Comments saved to all_comments.csv


In [16]:
pip install textblob


Defaulting to user installation because normal site-packages is not writeable
Collecting textblob
  Downloading textblob-0.17.1-py2.py3-none-any.whl (636 kB)
     ------------------------------------ 636.8/636.8 kB 657.4 kB/s eta 0:00:00
Installing collected packages: textblob
Successfully installed textblob-0.17.1
Note: you may need to restart the kernel to use updated packages.


In [17]:
# Import necessary libraries and modules
import pandas as pd
import googleapiclient.discovery
from googleapiclient.errors import HttpError
from textblob import TextBlob  # Import TextBlob for sentiment analysis

# Function to authenticate with the YouTube API using an API key
def get_authenticated_service(api_key):
    return googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

# Function to retrieve comments for a YouTube video
def get_video_comments(service, video_id, remaining_comments):
    comments = []
    page_token = None

    # Loop until the remaining number of comments is reached
    while remaining_comments > 0:
        # Adjust maxResults based on the remaining_comments
        max_results = min(100, remaining_comments)

        kwargs = {
            "part": "snippet",
            "videoId": video_id,
            "textFormat": "plainText",
            "order": "relevance",
            "maxResults": max_results,
        }

        if page_token:
            kwargs["pageToken"] = page_token

        try:
            # Execute the API request to retrieve comments
            results = service.commentThreads().list(**kwargs).execute()

            # Extract comments from the API response
            for item in results.get("items", []):
                comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
                comments.append(comment)

            # Update the page token for pagination
            page_token = results.get("nextPageToken")

            # Exit the loop if no more comments are available
            if not page_token:
                break

        except HttpError as e:
            # Handle HTTP errors and print an error message
            print(f"An error occurred: {e}")
            break

        # Update the remaining number of comments
        remaining_comments -= max_results

    return comments

# Function to perform sentiment analysis on a given text
def analyze_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity

# Main function to execute the YouTube comments retrieval and sentiment analysis
def main():
    # Set your YouTube API key here
    API_KEY = "AIzaSyBH-LQhuQMITV7p-Krarbnydkl8J1VhKck"  # Replace with your actual API key

    # Authenticate with YouTube API using the API key
    service = get_authenticated_service(API_KEY)

    # List of YouTube video IDs
    video_ids = ["TcMBFSGVi1c", "eOrNdBpGMv8"]

    total_comments = 1000  # Total number of comments you want across all videos
    all_comments = []

    # Iterate over each video ID and retrieve comments
    for video_id in video_ids:
        try:
            # Get comments for the specified video with remaining_comments as the limit
            comments = get_video_comments(service, video_id, remaining_comments=total_comments)

            # Extend the list of all comments with comments from the current video
            all_comments.extend(comments)

            # Update the total_comments to reflect the remaining
            total_comments -= len(comments)

            # Exit the loop if the total_comments is fulfilled
            if total_comments <= 0:
                break

        except HttpError as e:
            # Handle HTTP errors and print an error message
            print(f"An error occurred for video ID {video_id}: {e}")

    # Perform sentiment analysis on each comment
    sentiment_scores = [analyze_sentiment(comment) for comment in all_comments]

    # Add sentiment scores to the DataFrame
    df = pd.DataFrame({"Comments": all_comments, "Sentiment": sentiment_scores})

    # Print the DataFrame
    print(df)

    # Save the DataFrame to a CSV file
    csv_file_path = "comments_with_sentiment.csv"
    df.to_csv(csv_file_path, index=False)

    # Print a success message with the file path
    print(f"Comments with sentiment saved to {csv_file_path}")

# Check if the script is being run directly
if __name__ == "__main__":
    # Call the main function to execute the script
    main()


                                              Comments  Sentiment
0    The hype for this movie was truly a once in a ...   0.000000
1    The hype for Infinity War and Endgame was unre...   0.000000
2    Today is the 4th year anniversary of endgame's...   0.000000
3    1:40 The way they synced up her gunshots with ...   0.000000
4    This movie really lived up to it's title. It w...   0.200000
..                                                 ...        ...
995  Everyone forgot this movie.Because, this movie...   0.000000
996         Avengers  All Series My Favourite All Time   0.000000
997                   mcu should have ended right here   0.285714
998                                 Whatever it takes!   0.000000
999  whatever it takes - It will take the best supe...   0.666667

[1000 rows x 2 columns]
Comments with sentiment saved to comments_with_sentiment.csv


In [20]:
# Import necessary libraries and modules
import pandas as pd
import googleapiclient.discovery
from googleapiclient.errors import HttpError
from textblob import TextBlob  # Import TextBlob for sentiment analysis

# Function to authenticate with the YouTube API using an API key
def get_authenticated_service(api_key):
    return googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

# Function to retrieve comments for a YouTube video
def get_video_comments(service, video_id, remaining_comments):
    comments = []
    page_token = None

    # Loop until the remaining number of comments is reached
    while remaining_comments > 0:
        # Adjust maxResults based on the remaining_comments
        max_results = min(100, remaining_comments)

        kwargs = {
            "part": "snippet",
            "videoId": video_id,
            "textFormat": "plainText",
            "order": "relevance",
            "maxResults": max_results,
        }

        if page_token:
            kwargs["pageToken"] = page_token

        try:
            # Execute the API request to retrieve comments
            results = service.commentThreads().list(**kwargs).execute()

            # Extract comments from the API response
            for item in results.get("items", []):
                comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
                comments.append(comment)

            # Update the page token for pagination
            page_token = results.get("nextPageToken")

            # Exit the loop if no more comments are available
            if not page_token:
                break

        except HttpError as e:
            # Handle HTTP errors and print an error message
            print(f"An error occurred: {e}")
            break

        # Update the remaining number of comments
        remaining_comments -= max_results

    return comments

# Function to perform sentiment analysis on a given text
def analyze_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity

# Function to categorize sentiment into positive, negative, or neutral
def categorize_sentiment(score):
    if score > 0:
        return 'Positive'
    elif score < 0:
        return 'Negative'
    else:
        return 'Neutral'

# Main function to execute the YouTube comments retrieval and sentiment analysis
def main():
    # Set your YouTube API key here
    API_KEY = "AIzaSyBH-LQhuQMITV7p-Krarbnydkl8J1VhKck"  # Replace with your actual API key

    # Authenticate with YouTube API using the API key
    service = get_authenticated_service(API_KEY)

    # List of YouTube video IDs
    video_ids = ["TcMBFSGVi1c", "eOrNdBpGMv8"]

    total_comments = 1000  # Total number of comments you want across all videos
    all_comments = []

    # Iterate over each video ID and retrieve comments
    for video_id in video_ids:
        try:
            # Get comments for the specified video with remaining_comments as the limit
            comments = get_video_comments(service, video_id, remaining_comments=total_comments)

            # Extend the list of all comments with comments from the current video
            all_comments.extend(comments)

            # Update the total_comments to reflect the remaining
            total_comments -= len(comments)

            # Exit the loop if the total_comments is fulfilled
            if total_comments <= 0:
                break

        except HttpError as e:
            # Handle HTTP errors and print an error message
            print(f"An error occurred for video ID {video_id}: {e}")

    # Perform sentiment analysis on each comment
    sentiment_scores = [analyze_sentiment(comment) for comment in all_comments]

    # Categorize sentiments
    sentiment_categories = [categorize_sentiment(score) for score in sentiment_scores]

    # Add sentiment scores and categories to the DataFrame
    df = pd.DataFrame({"Comments": all_comments, "Sentiment Score": sentiment_scores, "Sentiment Category": sentiment_categories})

    # Print the counts for each sentiment category
    print("Sentiment Counts:")
    print(df['Sentiment Category'].value_counts())

    # Filter and print only comments with negative sentiment
    negative_comments = df[df['Sentiment Category'] == 'Negative']['Comments']
    print("\nComments with Negative Sentiment:")
    print(negative_comments)

    # Save the DataFrame to a CSV file
    csv_file_path = "comments_with_sentiment.csv"
    df.to_csv(csv_file_path, index=False)

    # Print a success message with the file path
    print(f"\nComments with sentiment saved to {csv_file_path}")

# Check if the script is being run directly
if __name__ == "__main__":
    # Call the main function to execute the script
    main()


Sentiment Counts:
Neutral     442
Positive    434
Negative    124
Name: Sentiment Category, dtype: int64

Comments with Negative Sentiment:
23     Ya 4 años de esta joya y cronológicamente ya s...
42     2023, DAN GUE MASIH NONTONIN END GAME BERULANG...
75     Even in 2023 still give insane amount of goose...
86     Can't believe today is the day we lost Tony Stark
87     Unfortunately I couldn't witness this in theat...
                             ...                        
966    *You sitting on the toilet*\n\nToilet paper: *...
974     Still think after endgame marvel went south hard
975    Avengers assembled,captain america leading,iro...
978                           I'm excited for Secret War
980        This is where everything started to go wrong.
Name: Comments, Length: 124, dtype: object

Comments with sentiment saved to comments_with_sentiment.csv
