<a href="https://colab.research.google.com/github/Sabha95/YoutubeComments/blob/Colab-version/Pull_all_Comments_and_Replies_for_YouTube_Playlists.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports

In [None]:
from googleapiclient.discovery import build
import pandas as pd
from google.colab import files, drive
import getpass
from textblob import TextBlob
import numpy as np

## User Input

In [None]:
api_key = getpass.getpass('Please enter your YouTube API key: ')
playlisti_ds = ['PLbHrOSG7nVN0iy3JQonGt6p6illtDhoqX']


Please enter your YouTube API key: ··········


In [None]:
# Build the YouTube client
youtube = build('youtube', 'v3', developerKey=api_key)

## Get Video IDs for Playlist

In [None]:
def get_all_video_ids_from_playlists(youtube, playlist_ids):
    all_videos = []  # Initialize a single list to hold all video IDs

    for playlist_id in playlist_ids:
        next_page_token = None

        # Fetch videos from the current playlist
        while True:
            playlist_request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId=playlist_id,
                maxResults=50,
                pageToken=next_page_token)
            playlist_response = playlist_request.execute()

            all_videos += [item['contentDetails']['videoId'] for item in playlist_response['items']]

            next_page_token = playlist_response.get('nextPageToken')

            if next_page_token is None:
                break

    return all_videos

# Fetch all video IDs from the specified playlists
video_ids = get_all_video_ids_from_playlists(youtube, playlist_ids)

# Now you can pass video_ids to the next function
# next_function(video_ids)

Sentimental analysis

In [None]:
class YouTubeMentalHealthAnalyzer:
    def __init__(self, api_key):
        self.youtube = build('youtube', 'v3', developerKey=api_key)

    def get_video_data(self, video_id):
        """Fetch comprehensive video metadata"""
        try:
            request = self.youtube.videos().list(
                part='snippet,statistics,contentDetails',
                id=video_id
            )
            response = request.execute()

            if not response['items']:
                return None

            video = response['items'][0]
            return {
                'video_id': video_id,
                'title': video['snippet']['title'],
                'description': video['snippet']['description'],
                'published_date': video['snippet']['publishedAt'],
                'view_count': int(video['statistics'].get('viewCount', 0)),
                'like_count': int(video['statistics'].get('likeCount', 0)),
                'comment_count': int(video['statistics'].get('commentCount', 0)),
                'duration': video['contentDetails']['duration'],
                'tags': video['snippet'].get('tags', []),
                'category_id': video['snippet']['categoryId']
            }

    def get_comments_with_sentiment(self, video_id, max_results=100):
        """Fetch comments with sentiment analysis"""
        comments_data = []

        request = self.youtube.commentThreads().list(
            part='snippet,replies',
            videoId=video_id,
            maxResults=max_results,
            textFormat='plainText'
        )

        while request:
            response = request.execute()

            for item in response['items']:
                comment = item['snippet']['topLevelComment']['snippet']

                # Perform sentiment analysis
                sentiment = TextBlob(comment['textDisplay']).sentiment

                comment_data = {
                    'comment_id': item['id'],
                    'video_id': video_id,
                    'text': comment['textDisplay'],
                    'author': comment['authorDisplayName'],
                    'published_at': comment['publishedAt'],
                    'like_count': comment['likeCount'],
                    'reply_count': item['snippet']['totalReplyCount'],
                    'is_reply': False,
                    'parent_id': None,
                    'sentiment_polarity': sentiment.polarity,
                    'sentiment_subjectivity': sentiment.subjectivity
                }

                comments_data.append(comment_data)

                # Get replies if they exist
                if 'replies' in item:
                    for reply in item['replies']['comments']:
                        reply_snippet = reply['snippet']
                        reply_sentiment = TextBlob(reply_snippet['textDisplay']).sentiment

                        reply_data = {
                            'comment_id': reply['id'],
                            'video_id': video_id,
                            'text': reply_snippet['textDisplay'],
                            'author': reply_snippet['authorDisplayName'],
                            'published_at': reply_snippet['publishedAt'],
                            'like_count': reply_snippet['likeCount'],
                            'reply_count': 0,
                            'is_reply': True,
                            'parent_id': item['id'],
                            'sentiment_polarity': reply_sentiment.polarity,
                            'sentiment_subjectivity': reply_sentiment.subjectivity
                        }

                        comments_data.append(reply_data)

            request = self.youtube.commentThreads().list_next(request, response)

        return pd.DataFrame(comments_data)

    def analyze_mental_health_indicators(self, text):
        """Analyze text for mental health-related indicators"""
        # Define keyword dictionaries for different categories
        support_seeking = ['help', 'advice', 'struggling', 'need', 'please']
        emotional_words = ['anxiety', 'depression', 'stress', 'worried', 'sad']
        support_offering = ['hope', 'support', 'here for you', 'understand']

        return {
            'contains_support_seeking': any(word in text.lower() for word in support_seeking),
            'contains_emotional_words': any(word in text.lower() for word in emotional_words),
            'contains_support_offering': any(word in text.lower() for word in support_offering)
        }

    def create_full_dataset(self, video_ids):
        """Create complete dataset for multiple videos"""
        all_video_data = []
        all_comments_data = []

        for video_id in video_ids:
            # Get video data
            video_data = self.get_video_data(video_id)
            if video_data:
                all_video_data.append(video_data)

                # Get comments data
                comments_df = self.get_comments_with_sentiment(video_id)

                # Add mental health indicators
                for idx, row in comments_df.iterrows():
                    indicators = self.analyze_mental_health_indicators(row['text'])
                    for key, value in indicators.items():
                        comments_df.at[idx, key] = value

                all_comments_data.append(comments_df)

        return pd.DataFrame(all_video_data), pd.concat(all_comments_data, ignore_index=True)

## Get All Comments

In [None]:
#original function
# Function to get replies for a specific comment
def get_replies(youtube, parent_id, video_id):  # Added video_id as an argument
    replies = []
    next_page_token = None

    while True:
        reply_request = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        reply_response = reply_request.execute()

        for item in reply_response['items']:
            comment = item['snippet']
            replies.append({
                'Timestamp': comment['publishedAt'],
                'Username': comment['authorDisplayName'],
                'VideoID': video_id,
                'Comment': comment['textDisplay'],
                'Date': comment['updatedAt'] if 'updatedAt' in comment else comment['publishedAt']
            })

        next_page_token = reply_response.get('nextPageToken')
        if not next_page_token:
            break

    return replies

# Function to get all comments (including replies) for a single video
def get_comments_for_video(youtube, video_id):
    all_comments = []
    next_page_token = None

    while True:
        comment_request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId=video_id,
            pageToken=next_page_token,
            textFormat="plainText"
        )
        comment_response = comment_request.execute()

        for item in comment_response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            all_comments.append({
                'Timestamp': top_comment['publishedAt'],
                'Username': top_comment['authorDisplayName'],
                'VideoID': video_id,  # Directly using video_id from function parameter
                'Comment': top_comment['textDisplay'],
                'Date': top_comment['updatedAt'] if 'updatedAt' in top_comment else top_comment['publishedAt']
            })

            # Fetch replies if there are any
            if item['snippet']['totalReplyCount'] > 0:
                all_comments.extend(get_replies(youtube, item['snippet']['topLevelComment']['id'], video_id))

        next_page_token = comment_response.get('nextPageToken')
        if not next_page_token:
            break

    return all_comments

# List to hold all comments from all videos
all_comments = []


for video_id in video_ids:
    video_comments = get_comments_for_video(youtube, video_id)
    all_comments.extend(video_comments)

# Create DataFrame
comments_df = pd.DataFrame(all_comments)


### Output to CSV

In [None]:
# Export whole dataset to the local machine as CSV File
csv_file = 'comments_data.csv'  # Name your file
comments_df.to_csv(csv_file, index=False)

from google.colab import files

# Trigger a download to your local machine
files.download(csv_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>