In [1]:
import pandas as pd
from dotenv import load_dotenv
import os
from yt_comment_scraper_util import YoutubeCommentScraper, CommentScraperUtil
from datetime import datetime
load_dotenv()
import csv

In [2]:
api_key = os.getenv('API_KEY')  # make sure you have your API key in the .env file
scraper = YoutubeCommentScraper(api_key)

In [3]:
start_date = '2024-01-01'
channel = 'CNN'

script_dir = os.getcwd()

path_to_link_csv = os.path.join(script_dir,f'../data/Raw Data/{channel}_links.csv')
path_to_comment_csv = os.path.join(script_dir,f'../data/Raw Data/{channel}_comments.csv')
path_to_error_logs = os.path.join(script_dir,f'../data/Error Logs/{channel}_{str(datetime.now)}.csv')


most_recent_video_id, previous_comments_df = CommentScraperUtil.get_earliest_video_id(path_to_comment_csv)
video_ids_to_scrape = CommentScraperUtil.load_video_ids(path_to_link_csv, start_date, most_recent_video_id)


In [None]:
all_comments = []
errors = []
for count, video_id in enumerate(video_ids_to_scrape):
    print(f'Scraping video #{count} | Video Id: {video_id}')
    test = scraper.get_video_comments(video_id)
    comments_data, error =test
    if comments_data:
        all_comments.extend(comments_data)
    if error:
        print(f'Error for {video_id}')
        errors.append({'video_id': video_id, "error": error})
        if error == "Quota Exceeded":
            break
new_comments = pd.DataFrame(all_comments)
errors_df = pd.DataFrame(errors)

Scraping video #0 | Video Id: TsI3XQlyRUE
Scraping video #1 | Video Id: DPAZuBeQ_TM
Scraping video #2 | Video Id: FdpPmmDrrb0
Scraping video #3 | Video Id: VgOTzPcc41o
Scraping video #4 | Video Id: uo-mKcanC9c
Scraping video #5 | Video Id: ZgqRVbTKUEU
Scraping video #6 | Video Id: 75jaxrSRB7M
Scraping video #7 | Video Id: RYVr4jIukU8
Scraping video #8 | Video Id: pDMF90S7HwU
Scraping video #9 | Video Id: 8adNu2HU1-s
Scraping video #10 | Video Id: J-ErmOKOm5c
Scraping video #11 | Video Id: 2xmmCZJXtgs
Scraping video #12 | Video Id: _W3lAyPculo
Scraping video #13 | Video Id: tNr-Fa1l4e8
Scraping video #14 | Video Id: NenNx4vDbio
Scraping video #15 | Video Id: 0en6s3i_WPA
Scraping video #16 | Video Id: FBIowJ6IOiM
Scraping video #17 | Video Id: xccbV6cr5dU
Scraping video #18 | Video Id: zyKOn_ooFmk
Scraping video #19 | Video Id: RXdUcWw_LDQ
Scraping video #20 | Video Id: zwhE3lhi5h8
Scraping video #21 | Video Id: hEJDcAKNr7o
Scraping video #22 | Video Id: zkytPfu8ebc
Scraping video #23 | 

In [6]:
if not previous_comments_df is None:
  comments_df = pd.concat([previous_comments_df, new_comments], ignore_index=True)
else:
  comments_df = new_comments
comments_df.to_csv(path_to_comment_csv, quoting=csv.QUOTE_NONNUMERIC, escapechar='\\', index=False, encoding='utf-8')
errors_df.to_csv(path_to_error_logs, index=False)
