In [1]:
import pandas as pd
from dotenv import load_dotenv
import os
from yt_comment_scraper_util import YoutubeCommentScraper, CommentScraperUtil
from datetime import datetime
load_dotenv()
import csv

In [2]:
api_key = os.getenv('API_KEY')  # make sure you have your API key in the .env file
scraper = YoutubeCommentScraper(api_key)

In [3]:
start_date = '2024-01-01'
channel = 'CNN'

script_dir = os.getcwd()

path_to_link_csv = os.path.join(script_dir,f'../data/Raw Data/{channel}_links.csv')
path_to_comment_csv = os.path.join(script_dir,f'../data/Raw Data/{channel}_comments.csv')
path_to_error_logs = os.path.join(script_dir,f'../data/Error Logs/{channel}_{str(datetime.now)}.csv')


most_recent_video_id, previous_comments_df = CommentScraperUtil.get_earliest_video_id(path_to_comment_csv)
video_ids_to_scrape = CommentScraperUtil.load_video_ids(path_to_link_csv, start_date, most_recent_video_id)


  comments_df = pd.read_csv(path_to_comment_csv)#, quoting=csv.QUOTE_ALL, escapechar='\\')


In [4]:
all_comments = []
errors = []
for count, video_id in enumerate(video_ids_to_scrape):
    print(f'Scraping video #{count} | Video Id: {video_id}')
    test = scraper.get_video_comments(video_id)
    comments_data, error =test
    if comments_data:
        all_comments.extend(comments_data)
    if error:
        print(f'Error for {video_id}')
        errors.append({'video_id': video_id, "error": error})
new_comments = pd.DataFrame(all_comments)
errors_df = pd.DataFrame(errors)

Scraping video #0 | Video Id: N_HcQIzWfso
Some other error occurred: {
  "error": {
    "code": 403,
    "message": "The video identified by the \u003ccode\u003e\u003ca href=\"/youtube/v3/docs/commentThreads/list#videoId\"\u003evideoId\u003c/a\u003e\u003c/code\u003e parameter has disabled comments.",
    "errors": [
      {
        "message": "The video identified by the \u003ccode\u003e\u003ca href=\"/youtube/v3/docs/commentThreads/list#videoId\"\u003evideoId\u003c/a\u003e\u003c/code\u003e parameter has disabled comments.",
        "domain": "youtube.commentThread",
        "reason": "commentsDisabled",
        "location": "videoId",
        "locationType": "parameter"
      }
    ]
  }
}

Error for N_HcQIzWfso
Scraping video #1 | Video Id: 64ivmhyyRSI
Scraping video #2 | Video Id: lzeZuTGf6bI
Scraping video #3 | Video Id: 9AdixAJVTO0
Scraping video #4 | Video Id: Sipthujz68M
Scraping video #5 | Video Id: pDt1ftceAgc
Scraping video #6 | Video Id: 788aQYgla70
Scraping video #7 | Video 

In [6]:
if not previous_comments_df is None:
  comments_df = pd.concat([previous_comments_df, new_comments], ignore_index=True)
else:
  comments_df = new_comments
comments_df.to_csv(path_to_comment_csv, quoting=csv.QUOTE_NONNUMERIC, escapechar='\\', index=False, encoding='utf-8')
errors_df.to_csv(path_to_error_logs, index=False)
