In [2]:
# Import the necessary libraries
import os
import re
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

In [3]:
# Input list of YouTube videos here
video_links = ['https://www.youtube.com/watch?v=vJlTrgErEDw'] # Things You've Always Wanted To Ask: An ICA Officer (Fiona Lai, Team Leader)

In [7]:
class YouTubeScrapper:
    
    """ 
    Instructions (Make sure you have internet access and jupyter notebook installed):
    
    1) Download chrome driver (Check your Google Chrome Version before downloading; make sure you download the correct one)
    2) Unzip the chrome driver file and move the .exe file to C:\Windows
    3) Open jupyter notebook
    4) Run pip install selenium (Also run pip install pandas if you don't have pandas installed yet)
    """
    
    """ YouTubeScrapper class provides methods to export video comments as a csv 
        
        Arguments:
            headless: whether to display the browser (bool), if set to True, browser is not displayed, default False
            buffer: buffer time for page to load (float or int)   
    """
    
    def __init__(self, headless=False, 
                 short_buffer=2, long_buffer=15,
                 api_link = 'https://youtubecommentsdownloader.com/comments',
                 videoInputXpath = '//input[contains(@id, "video-id")]',
                 submitXpath = '//button[contains(@type, "submit")]',
                 commentsXpath = '//div[contains(@class, "comment__text")]'
                ):
        
        self.api_link = api_link
        
        self.videoInputXpath = videoInputXpath
        self.submitXpath = submitXpath
        self.commentsXpath = commentsXpath
        
        self.short_buffer = short_buffer
        self.long_buffer = long_buffer
        
        # If headless is set to True, the chrome browser will not pop out when scrapping
        if headless:
            chrome_options = Options()
            chrome_options.add_argument("--disable-gpu")
            chrome_options.add_argument("--headless")
            self.browser = webdriver.Chrome(options=chrome_options)
        else:
            self.browser = webdriver.Chrome()
            
        # Maximize the window
        self.browser.maximize_window()
        
        
    def getFileName(self, video_link):
        
        """ Method to extract the post id from facebook link """
        
        return video_link.partition('=')[-1] + '.csv'
    
    
    def findComments(self):
        
        """ Method to locate all the post comments and store them into a list """
        
        return [comment.text for comment in self.browser.find_elements_by_xpath(self.commentsXpath)]


    def exportCommentsToCSV(self, fileName):
        
        """ Method to save the comments into a csv file"""

        # Get list of raw and cleaned comments
        comments = self.findComments()
        
        # Create extracted_comments folder if it does not exist
        try:
            os.mkdir('extracted_comments')
        except:
            pass
        
        # Load list of comments into a dataframe and save it to a csv file
        pd.DataFrame({'Comments': comments}).to_csv('extracted_comments/{}'.format(fileName))
        
        print('{} comment(s) have been extracted successfully to {}!\n'.format(len(comments), fileName))
                   
    
    def exportCommentsOnePost(self, video_link):
        
        """ Method to export comments from one video """
        
        print('Extracting comment(s) from {} ...'.format(video_link))
        
        # Get file name
        fileName = self.getFileName(video_link)
        
        # Navigate to youtube comments downloader link
        self.browser.get(self.api_link)
        
        # Wait for page to load
        time.sleep(self.short_buffer)
        
        # Input video link
        self.browser.find_element_by_xpath(self.videoInputXpath).send_keys(video_link)
        
        # Wait for page to load
        time.sleep(self.short_buffer)
        
        # Click on get comments
        self.browser.find_element_by_xpath(self.submitXpath).click()
        
        # Wait for page to load
        time.sleep(self.long_buffer)
        
        # Export the comments to csv
        self.exportCommentsToCSV(fileName)
        

    def exportComments(self, video_links):
        
        """ Method to extract comments from a list of youtube video links """
        
        assert isinstance(video_links, list), 'video_links takes in a list as input!'
        
        # Get start time
        start_time = time.time()
        
        # Loop through the list of video_links
        for video_link in video_links:
            self.exportCommentsOnePost(video_link)
        
        # Get end time
        end_time = time.time()
        
        # Calculate time taken
        time_taken = end_time - start_time 
          
        print('Comments extraction has completed successfully!')
        print('Total time taken: {}'.format(time.strftime("%H:%M:%S", time.gmtime(time_taken))))
        
        # Close the browser
        self.browser.close()

In [8]:
# Instantiate FacebookScrapper class
yt_scrapper = YouTubeScrapper(headless=False)

# Extract comments from the list of youtube videos
yt_scrapper.exportComments(video_links)

Extracting comment(s) from https://www.youtube.com/watch?v=vJlTrgErEDw ...
488 comment(s) have been extracted successfully to vJlTrgErEDw.csv!

Comments extraction has completed successfully!
Total time taken: 00:00:28
