# YouTube Comments Scraper

In [1]:
# Import googleapiclient module
!pip install youtube-data-api
!pip install google-api-python-client

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple/
Collecting youtube-data-api
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/0f/75/afb624932bbafed9edb899f56acd237ec341a80e27948c6191fccbaabe49/youtube_data_api-0.0.21-py3-none-any.whl (12 kB)
Collecting pandas (from youtube-data-api)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl (11.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.4/11.4 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting pytz>=2020.1 (from pandas->youtube-data-api)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl (509 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m509.2/509.2 kB[0m [31m445.8 kB/s[0m eta [36m0:00:00[0m0m
[?25hCollecting tzdat

In [4]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import pandas as pd

# Replace with your YouTube API Key
DEVELOPER_KEY = "AIzaSyAKLFmwDR3RHqjSp8i8mFeIAcaR4dpQnO0"

# Replace with the video ID of the YouTube video
VIDEO_ID = "87Jor5G_NBs"

def get_comments(video_id, part="snippet", max_pages=200):
    """
    Retrieves comments from a YouTube video, paging through up to `max_pages`.
    
    Args:
        video_id (str): The ID of the YouTube video.
        part (str): The part of the comment snippet to retrieve.
        max_pages (int): The maximum number of pages to retrieve (you can adjust as needed).

    Returns:
        list[dict]: A list of dictionaries containing comment text and number of likes.
    """
    youtube = build("youtube", "v3", developerKey=DEVELOPER_KEY)

    comments = []
    page_token = None
    page_count = 0

    try:
        while page_count < max_pages:
            response = youtube.commentThreads().list(
                part=part,
                videoId=video_id,
                textFormat="plainText",
                maxResults=100,          # Up to 100 per page
                pageToken=page_token     # Start from the next page if available
            ).execute()

            # Extract comments from this page
            for item in response.get("items", []):
                snippet = item["snippet"]["topLevelComment"]["snippet"]
                comment_text = snippet["textDisplay"]
                likes = snippet["likeCount"]
                reply_count = snippet.get("totalReplyCount", 0)
                comments.append({
                    "comment": comment_text,
                    "num_of_likes": likes,
                    "reply_count": reply_count
                })

            # Check if there's another page
            page_token = response.get("nextPageToken")
            if not page_token:
                # No more pages
                break

            page_count += 1

        return comments

    except HttpError as error:
        print(f"An HTTP error {error.http_status} occurred:\n {error.content}")
        return []

def main():
  # Get comments from the video
  comments = get_comments(VIDEO_ID)

  if comments:
    # Create a pandas dataframe from the comments list
    df = pd.DataFrame(comments)

    # Sort dataframe by number of likes in descending order
    df = df.sort_values(by=['num_of_likes'], ascending=False)

    # Print dataframe
    print(df)

    # Export dataframe to a CSV file named "comments.csv"
    df.to_csv("../data/comments.csv", index=False)
  else:
    print("Error: Could not retrieve comments from video.")

if __name__ == "__main__":
  main()

                                                 comment  num_of_likes  \
13044  this guy is a hell of a documentarian. cuts ri...         15812   
14079  Böyle kaliteli bir belgeseli ilk izleyenlerden...         10834   
14208  Türkiye'nin gururu bu adam ülkenin en iyi içer...          5869   
0      Thank you, Opera, for sponsoring this video! C...          5156   
14362  Thank you, Opera, for sponsoring this video! C...          5156   
...                                                  ...           ...   
5694   Почему канал на русском языке, а комментарии в...             0   
5695                      THANKS FOR VISITING MY COUNTRY             0   
5696                                     Amazing nature.             0   
5697                                          Best video             0   
7185       سبحان الخالق لها هاذا الابداع ما شاء الله ✨🤍.             0   

       reply_count  
13044            0  
14079            0  
14208            0  
0                0  
14362 