In [1]:
# Import the necessary libraries
import os
import re
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

In [2]:
# Input list of facebook posts here
fb_links = ['https://www.facebook.com/watch/?v=550504148839462', # ICA Officers: Protect. Secure. Inspire. (Full) video
           'https://www.facebook.com/watch/?v=2829551807143095'] # ICA – Who Are We: Image Analyst video

# Input your facebook account email and password here
email = '' # Change to your facebook account email
password = '' # Change to your facebook account password

In [5]:
class FacebookScrapper:
    
    """ 
    Instructions (Make sure you have internet access and jupyter notebook installed):
    
    1) Download chrome driver (Check your Google Chrome Version before downloading; make sure you download the correct one)
    2) Unzip the chrome driver file and move the .exe file to C:\Windows
    3) Open jupyter notebook
    4) Run pip install selenium (Also run pip install pandas if you don't have pandas installed yet)
    """
    
    """ FacebookScrapper class provides methods to export facebook post comments as a csv 
        
        Arguments:
            email: facebook account email (str)
            password: facebook account password (str)
            headless: whether to display the browser (bool), if set to True, browser is not displayed, default False
            buffer: buffer time for page to load (float or int)   
    """
    
    def __init__(self, email, password, headless=False, buffer=2, 
                 viewCommentsXpath = '//span[contains(@data-sigil, "comments-token")]',
                 viewMoreCommentsXpath='//a[contains(@data-sigil, "ajaxify")]',
                 commentsXpath='//div[contains(@data-sigil,"comment-body")]',
                ):
        
        # Check if email and password are strings
        assert isinstance(email, str) and isinstance(password, str), 'email and password takes in strings as inputs!'
        
        # Initialization of attributes
        self.email = email
        self.password = password
        
        self.viewCommentsXpath = viewCommentsXpath
        self.viewMoreCommentsXpath = viewMoreCommentsXpath
        self.commentsXpath = commentsXpath
        
        self.buffer = buffer
        
        # If headless is set to True, the chrome browser will not pop out when scrapping
        if headless:
            chrome_options = Options()
            chrome_options.add_argument("--disable-gpu")
            chrome_options.add_argument("--headless")
            self.browser = webdriver.Chrome(options=chrome_options)
        else:
            self.browser = webdriver.Chrome()
        
        self.clicked_links = set()
        self.unclicked_links = None
        
        
    def login(self):
        
        """ Method to log in into mobile facebook """
        
        # Navigate to mobile facebook login page
        self.browser.get("https://mobile.facebook.com/")
        
        # Maximise the window
        self.browser.maximize_window()
        
        # Input email and password fields 
        self.browser.find_element_by_name('email').send_keys(self.email)
        self.browser.find_element_by_name('pass').send_keys(self.password)
        
        # Locate the login button and click on it
        self.browser.find_element_by_name('login').click()

        # Pause a while for the page to load
        time.sleep(self.buffer)
        
        
    def getFileName(self, fb_link):
        
        """ Method to extract the post id from facebook link """
        
        return re.sub(r'[^0-9]', '', fb_link)[:16] + '.csv'
        
    
    def viewComments(self):
        
        """ Method to locate and click on the comments """
        
        # Get list of comments links
        comments_links = self.browser.find_elements_by_xpath(self.viewCommentsXpath)
        
        # Click on the first comment link
        comments_links[0].click()
        
        # Pause a while for the page to load
        time.sleep(self.buffer)       
        
        
    def viewMoreComments(self):
        
        """ Method to locate and click on the links to view more comments """
        
        # Get set of unclicked links
        self.unclicked_links = set(self.browser.find_elements_by_xpath(self.viewMoreCommentsXpath))
        
        # Loop through and click on all the view more comments links on the page
        while self.unclicked_links:
            
            # Loop through current set of unclicked links and click on them
            for unclicked_link in self.unclicked_links:
                
                # Click on link
                unclicked_link.click()
                
                # Append link to set of clicked links
                self.clicked_links.add(unclicked_link)
                
                # Pause a while for the page to load
                time.sleep(self.buffer)
            
            # Get set of unclicked links from the newly loaded page
            self.unclicked_links = set(self.browser.find_elements_by_xpath(self.viewMoreCommentsXpath)) - self.clicked_links
            
            
    def findComments(self):
        
        """ Method to locate all the post comments and store them into a list """
        
        return [comment.text for comment in self.browser.find_elements_by_xpath(self.commentsXpath)]
    
    
    def exportCommentsToCSV(self, fileName):
        
        """ Method to save the comments into a csv file"""

        # Get list of raw and cleaned comments
        comments = self.findComments()
        
        # Create extracted_comments folder if it does not exist
        try:
            os.mkdir('extracted_comments')
        except:
            pass
        
        # Load list of comments into a dataframe and save it to a csv file
        pd.DataFrame({'Comments': comments}).to_csv('extracted_comments/{}'.format(fileName))
        
        print('{} comment(s) have been extracted successfully to {}!\n'.format(len(comments), fileName))
        
    
    def exportCommentsOnePost(self, fb_link):
        
        """ Method to export comments from one post """
        
        # Convert facebook link to mobile facebook link
        mobile_link = fb_link.replace('www', 'mobile')
        print('Extracting comment(s) from {} ...'.format(mobile_link))
        
        # Get file name
        fileName = self.getFileName(fb_link)
        
        # Navigate to the mobile facebook link
        self.browser.get(mobile_link)
        
        # Pause a while for the page to load
        time.sleep(self.buffer)        
        
        try:
            # Click on the comments link
            self.viewComments()
        except:
            pass

        # Click on all the view more comments links
        self.viewMoreComments()
        
        # Export the comments to csv
        self.exportCommentsToCSV(fileName)
        
        # Reset the set of clicked links
        self.clicked_links = set()
        

    def exportComments(self, fb_links):
        
        """ Method to extract comments from a list of facebook post links """
        
        assert isinstance(fb_links, list), 'fb_links takes in a list as input!'
        
        # Get start time
        start_time = time.time()
        
        # Login to facebook
        self.login()
        
        # Loop through the list of fb_links
        for fb_link in fb_links:
            self.exportCommentsOnePost(fb_link)
        
        # Get end time
        end_time = time.time()
        
        # Calculate time taken
        time_taken = end_time - start_time 
          
        print('Comments extraction has completed successfully!')
        print('Total time taken: {}'.format(time.strftime("%H:%M:%S", time.gmtime(time_taken))))
        
        # Close the browser
        self.browser.close()

In [7]:
# Instantiate FacebookScrapper class
fb_scrapper = FacebookScrapper(email, password, headless=False)

# Extract comments from the list of facebook posts
fb_scrapper.exportComments(fb_links)

Extracting comment(s) from https://mobile.facebook.com/watch/?v=550504148839462 ...
59 comment(s) have been extracted successfully to 550504148839462.csv!

Extracting comment(s) from https://mobile.facebook.com/watch/?v=2829551807143095 ...
12 comment(s) have been extracted successfully to 2829551807143095.csv!

Comments extraction has completed successfully!
Total time taken: 00:00:45
