In [4]:
"""
Main script to scrape the comments of any Youtube video.

Example:
    $ python main.py YOUTUBE_VIDEO_URL
"""

import csv
import io
from selenium import webdriver
from selenium.common import exceptions
import sys
import time
from os.path import exists
from dateutil.parser import parse

def scrape(url):
    """
    Extracts the comments from the Youtube video given by the URL.

    Args:
        url (str): The URL to the Youtube video

    Raises:
        selenium.common.exceptions.NoSuchElementException:
        When certain elements to look for cannot be found
    """

    # Note: Download and replace argument with path to the driver executable.
    # Simply download the executable and move it into the webdrivers folder.
    driver = webdriver.Chrome('./webdrivers/chromedriver')

    # Navigates to the URL, maximizes the current window, and
    # then suspends execution for (at least) 5 seconds (this
    # gives time for the page to load).
    driver.get(url)
    driver.maximize_window()
    time.sleep(5)

    try:
        # Extract the elements storing the video title and
        # comment section.
        title = driver.find_element_by_xpath('//*[@id="container"]/h1/yt-formatted-string').text
        comment_section = driver.find_element_by_xpath('//*[@id="comments"]')
    except exceptions.NoSuchElementException:
        # Note: Youtube may have changed their HTML layouts for
        # videos, so raise an error for sanity sake in case the
        # elements provided cannot be found anymore.
        error = "Error: Double check selector OR "
        error += "element may not yet be on the screen at the time of the find operation"
        print(error)

    # Scroll into view the comment section, then allow some time
    # for everything to be loaded as necessary.
    driver.execute_script("arguments[0].scrollIntoView();", comment_section)
    time.sleep(7)

    # Scroll all the way down to the bottom in order to get all the
    # elements loaded (since Youtube dynamically loads them).
    last_height = driver.execute_script("return document.documentElement.scrollHeight")

    while True:
        # Scroll down 'til "next load".
        driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")

        # Wait to load everything thus far.
        time.sleep(2)

        # Calculate new scroll height and compare with last scroll height.
        new_height = driver.execute_script("return document.documentElement.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # One last scroll just in case.
    driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")

    try:
        # Extract the elements storing the usernames and comments.
        username_elems = driver.find_elements_by_xpath('//*[@id="author-text"]')
        comment_elems = driver.find_elements_by_xpath('//*[@id="content-text"]')
    except exceptions.NoSuchElementException:
        error = "Error: Double check selector OR "
        error += "element may not yet be on the screen at the time of the find operation"
        print(error)

    print("> VIDEO TITLE: " + title + "\n")

    try:
        postdate = driver.find_element_by_css_selector("span#dot+yt-formatted-string").get_attribute('innerHTML')
        print(postdate)
    except Exception as e:
        print(e)
        
    
    #parse date
    postdate= postdate.replace(',','')
    if 'Streamed live on ' in postdate:
        postdate= postdate.replace('Streamed live on ','')
        
    if 'Premiered ' in postdate:
        postdate= postdate.replace('Premiered ','')
        
         
    postdate=parse(postdate)
    postdate=postdate.strftime('%d-%b-%Y')
    a=postdate
    
    #file with link+date
    with open('out.csv', 'a',newline="") as f: # output csv file
        writer = csv.writer(f)
        writer.writerow([url,postdate])
    
    
    
    #result file
    i=1
    while(exists((a+".csv"))==True):
        a=a+"("+str(i)+")"
        i=i+1
        
    a=a+".csv"
    
    with io.open(a, 'w', newline='', encoding="utf-16") as file:
        writer = csv.writer(file, delimiter =",", quoting=csv.QUOTE_ALL)
        writer.writerow(["Username", "Comment"])
        for username, comment in zip(username_elems, comment_elems):
            writer.writerow([username.text, comment.text])

   
    driver.close()

In [7]:
import csv
with open('Altcoin Daily.csv') as f:
    reader = csv.DictReader(f, delimiter=',')
    for row in reader:
        r=row['YouTube Link']
        print(r) 
        scrape(r)

https://www.youtube.com/watch?v=pNPFnAq9XG8
> VIDEO TITLE: DIGITEX Launch Party! Interview With CEO ADAM TODD [Cryptocurrency News Online]

Streamed live on Jul 31, 2020
https://www.youtube.com/watch?v=E9qFkTDpd2A
> VIDEO TITLE: The NFL makes MAJOR Cryptocurrency Announcement for 2021 as Bloomberg Pumps Bitcoin AND Ethereum!

Dec 29, 2020
https://www.youtube.com/watch?v=-DUR5Ste-9I
> VIDEO TITLE: “Bitcoin Could Be At $100,000 Next Week!” - Brekkie Von Bitcoin Talks SwanBitcoin, BTC Art & MORE!

Feb 14, 2021


In [3]:
from dateutil.parser import parse

postdate='Premiered Dec 18, 2018'
postdate= postdate.replace(',','')
if 'Premiered ' in postdate:
        postdate= postdate.replace('Premiered ','')
postdate=parse(postdate)
postdate=postdate.strftime('%d-%b-%Y')
a=postdate
print(a)

18-Dec-2018
