In [None]:
# Import necessary libraries
import pandas as pd  # For data manipulation and storage
from selenium import webdriver  # For web scraping
from selenium.webdriver.common.by import By  # To locate elements on the webpage
from selenium.webdriver.common.keys import Keys  # To simulate keyboard inputs
import time  # For adding delays

# Install and configure Selenium and Chrome WebDriver
!pip install selenium
!apt-get update
!apt install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin

In [None]:
def scrape_facebook_hashtag(hashtag, num_posts):
    """
    Scrapes Facebook posts for a given hashtag.
    
    Parameters:
    - hashtag: The hashtag to search for on Facebook.
    - num_posts: The number of posts to scrape.

    Returns:
    - A list of text content from posts.
    """
    # Configure Chrome WebDriver in headless mode
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run browser in the background
    options.add_argument('--no-sandbox')  # Disable sandboxing for performance
    options.add_argument('--disable-dev-shm-usage')  # Avoid shared memory issues
    driver = webdriver.Chrome(options=options)  # Initialize WebDriver

    # Construct the Facebook URL for the given hashtag
    url = f'https://www.facebook.com/hashtag/{hashtag}'
    driver.get(url)  # Open the URL in the browser

    # Wait for the page to load completely
    time.sleep(10)

    # Scroll the page until enough posts are loaded or stop if no more posts appear
    while len(driver.find_elements(By.CSS_SELECTOR, 'div[dir="auto"][style="text-align: start;"]')) < num_posts:
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END)  # Scroll to the page bottom
        time.sleep(2)  # Allow time for posts to load

    # Extract text from the specific post content elements
    div_texts = driver.find_elements(By.CSS_SELECTOR, 'div[dir="auto"][style="text-align: start;"]')
    text_list = [div.text for div in div_texts[:num_posts]]  # Collect the specified number of posts

    # Close the browser
    driver.quit()

    return text_list

In [None]:
# hashtags=[
#     'wildfire',
#     'earthquake',
#     'flood',
#     'hurricane',
#     'typhoon',
#     'tornado',
#     'tsunami',
#     'drought',
#     'famine',
#     'epidemic',
#     'pandemic',
#     'outbreak',
#     'accident',
#     'disaster'
# ]

In [None]:
# List of hashtags to scrape
hashtags=[
    'earthquake',
    'flood',
    'hurricane',
    'typhoon',
    'tornado',
    'tsunami',
    'accident',
    'disaster'
]

In [None]:
texts = scrape_facebook_hashtag('disaster', num_posts=3)
print(texts)

In [None]:
# DataFrame to store all scraped posts
df_all = pd.DataFrame(columns=['Hashtag', 'Text'])

# Loop through each hashtag and scrape posts
for hashtag in hashtags:
    texts = scrape_facebook_hashtag(hashtag, num_posts=3)  # Scrape 3 posts per hashtag
    df_temp = pd.DataFrame({'Hashtag': [hashtag] * len(texts), 'Text': texts})  # Create a DataFrame for this hashtag
    df_all = pd.concat([df_all, df_temp], ignore_index=True)  # Append to the main DataFrame


In [None]:
# Display the first few rows of the scraped data
print(df_all.head())

In [None]:
# Check the shape of the DataFrame to confirm the number of rows and columns
print(df_all.shape)

In [None]:
# Save the scraped data to a CSV file for future analysis
df_all.to_csv('fb_scraped.csv', index=False)
print("Data saved to 'fb_scraped.csv'")