# **Data Scraping from Youtube**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# !pip install google-api-python-client

To scrape YouTube comments, you'll need a Google API key. If you don't have one, you can create one in the Google Cloud Console.

Once you have your key, add it to the Colab secrets manager under the "üîë" in the left panel and name it `GOOGLE_API_KEY`.

Video that is used in this project :
- https://www.youtube.com/watch?v=g_fkq6WHcOs
- https://www.youtube.com/watch?v=O2xb1lVqUv4
- https://www.youtube.com/watch?v=9-poYwCZxDQ&t=943s

In [None]:
from googleapiclient.discovery import build
import csv
import pandas as pd

def scrape_youtube_comments(video_ids, dev_key):
    # Build the YouTube Data API service
    youtube = build("youtube", "v3", developerKey=dev_key)

    all_comments = []

    for video_id in video_ids:
        comments = []
        next_page_token = None

        while True:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=4000,
                pageToken=next_page_token
            )
            response = request.execute()

            for item in response["items"]:
                comment = item["snippet"]["topLevelComment"]["snippet"]
                comments.append({
                    'video_id': video_id,
                    'author': comment['authorDisplayName'],
                    'comment': comment['textDisplay'],
                    'published_at': comment['publishedAt']
                })

            next_page_token = response.get("nextPageToken")

            if not next_page_token:
                break
        all_comments.extend(comments)
        print(f"Scraped {len(comments)} comments from video {video_id}.")

    print(f"Scraped a total of {len(all_comments)} comments from all videos.")
    return all_comments

In [None]:
video_ids = ['g_fkq6WHcOs', 'O2xb1lVqUv4', '9-poYwCZxDQ']
dev_key = #use your api 
all_comments = scrape_youtube_comments(video_ids, dev_key)
df = pd.DataFrame(all_comments)
df.tail()

Scraped 632 comments from video g_fkq6WHcOs.
Scraped 802 comments from video O2xb1lVqUv4.
Scraped 2984 comments from video 9-poYwCZxDQ.
Scraped a total of 4418 comments from all videos.


Unnamed: 0,video_id,author,comment,published_at
4413,9-poYwCZxDQ,@mohamadferdian2039,First,2025-09-22T00:54:29Z
4414,9-poYwCZxDQ,@farrelarizky4511,üòÆüòÆ,2025-09-22T00:54:29Z
4415,9-poYwCZxDQ,@anwarfawzi1477,‚ù§,2025-09-22T00:54:27Z
4416,9-poYwCZxDQ,@muhammadghifari5629,pertamax,2025-09-22T00:54:25Z
4417,9-poYwCZxDQ,@ridwanmuhamad628,1,2025-09-22T00:54:25Z


In [None]:
df.to_excel("raw_comments.xlsx", index=False)