In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os
import csv
import re
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()

FB_EMAIL = os.getenv('FB_EMAIL')
FB_PASSWORD = os.getenv('FB_PASSWORD')

# Function to login to Facebook
def login_to_facebook(driver, email, password):
    driver.get("https://www.facebook.com/")
    time.sleep(3)

    email_input = driver.find_element(By.ID, "email")
    email_input.send_keys(email)
    
    password_input = driver.find_element(By.ID, "pass")
    password_input.send_keys(password)
    
    login_button = driver.find_element(By.NAME, "login")
    login_button.click()
    
    time.sleep(5)  # Wait for login to complete

# Function to scroll and load more posts
def scroll_and_load_posts(driver, num_scrolls):
    for _ in range(num_scrolls):
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END)
        time.sleep(3)

# Function to click all "See More" buttons
def click_see_more_buttons(driver):
    see_more_buttons = driver.find_elements(By.XPATH, "//div[@role='button' and contains(text(), 'See more')]")
    for button in see_more_buttons:
        try:
            driver.execute_script("arguments[0].click();", button)
            time.sleep(1)  # Wait a bit for the content to expand
        except Exception as e:
            print(f"Error clicking 'See more' button: {e}")

# Function to extract posts
def extract_posts(driver):
    posts = WebDriverWait(driver, 10).until(
        EC.visibility_of_all_elements_located((By.XPATH, "//div[@role='article']"))
    )
    post_data = []
    for post in posts:
        try:
            post_title = post.find_element(By.XPATH, ".//h2").text
            content = post.find_element(By.XPATH, ".//div[@data-ad-preview='message']").text

            # Extract the date from the post content
            date_pattern = r'\b\d{1,2}\/\d{1,2}\/\d{4}\b'
            date_match = re.search(date_pattern, content)
            if date_match:
                post_date = datetime.strptime(date_match.group(), '%m/%d/%Y').strftime('%Y-%m-%d %H:%M:%S')
            else:
                post_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

            post_data.append({
                'title': post_title,
                'content': content,
                'date': post_date
            })
        except Exception as e:
            continue
    return post_data


# Function to print posts
def print_posts(post_data):
    for post in post_data:
        print("Post:")
        print(post['content'])
        print("-" * 80)

def scrape_facebook():
    geckodriver_path = '/snap/bin/geckodriver'  # Update with the correct path
    service = Service(geckodriver_path)
    driver = webdriver.Firefox(service=service)
    login_to_facebook(driver, FB_EMAIL, FB_PASSWORD)

    # Navigate to the specific Facebook page
    page_url = "https://www.facebook.com/bewketu.seyoum.3"  # Replace with the name of the page
    driver.get(page_url)
    time.sleep(5)

    # Scroll to load more posts
    scroll_and_load_posts(driver, num_scrolls=5)  # Adjust number of scrolls as needed

    # Click all "See More" buttons to expand posts
    click_see_more_buttons(driver)

    # Extract posts
    post_data = extract_posts(driver)

    # Save post data to a CSV file
    with open('facebook_posts.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Facebook_Username', 'Post_Title', 'Post_Content', 'Date_Posted']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for post in post_data:
            post_url = driver.current_url
            writer.writerow({
                'Facebook_Username': 'bewketu.seyoum.3',  # Replace with the actual username
                'Post_Title': post['title'],
                'Post_Content': post['content'],
                'Date_Posted': post['date']
            })

    if post_data:
        print("Scraped posts saved to 'facebook_posts.csv'.")
    else:
        print("No posts found.")

    # Close the driver
    driver.quit()


# def save_to_postgres(post_data):
#     try:
#         conn = psycopg2.connect(
#             host=POSTGRES_HOST,
#             database=POSTGRES_NAME,
#             user=POSTGRES_USER,
#             password=POSTGRES_PASSWORD
#         )
#         cur = conn.cursor()

#         # Insert data into the existing table
#         for post in post_data:
#             cur.execute("""
#                 INSERT INTO facebook_posts (facebook_username, post_title, post_content, date_posted)
#                 VALUES (%s, %s, %s, %s)
#             """, (
#                 'bewketu.seyoum.3',  # Replace with the actual username
#                 post['title'],
#                 post['content'],
#                 post['date']
#             ))

#         conn.commit()
#         print("Data saved to PostgreSQL successfully.")

#     except (Exception, psycopg2.Error) as error:
#         print("Error while connecting to PostgreSQL", error)
#     finally:
#         if conn:
#             cur.close()
#             conn.close()
#             print("PostgreSQL connection closed.")


if __name__ == "__main__":
   scrape_facebook()