In [None]:
import time
import csv
import random
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

In [None]:
def setup_driver():
    chrome_options = Options()
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-notifications")
    chrome_options.add_argument("--disable-infobars")
    chrome_options.add_argument("--start-maximized")
    
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

In [None]:
def login(driver, email, password):
    try:
        driver.get("https://www.facebook.com/")
        try:
            cookie_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, "//button[contains(string(), 'Accept') or contains(string(), 'Allow') or contains(string(), 'Accepter')]"))
            )
            cookie_button.click()
        except:
            print("No cookie dialog found or already accepted.")
        
        email_field = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "email")))
        email_field.send_keys(email)
        
        password_field = driver.find_element(By.ID, "pass")
        password_field.send_keys(password)
        
        login_button = driver.find_element(By.NAME, "login")
        login_button.click()
        
        WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.XPATH, "//div[@role='navigation']")))
        print("Login successful!")
        return True
        
    except Exception as e:
        print(f"Login failed: {str(e)}")
        return False

In [None]:
def scroll_page(driver, scroll_count=3):
    for i in range(scroll_count):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(random.uniform(5, 8))
        print(f"Scrolled {i+1}/{scroll_count} times")
        
        try:
            see_more_buttons = driver.find_elements(By.XPATH, "//div[contains(text(), 'See More') or contains(text(), 'Voir plus')]")
            for button in see_more_buttons[:5]:
                driver.execute_script("arguments[0].click();", button)
                time.sleep(1)
        except:
            pass

In [None]:
def search_hashtag(driver, hashtag):
    try:
        driver.get(f"https://www.facebook.com/hashtag/{hashtag}")
        time.sleep(15)
        scroll_page(driver, 20)
        return parse_posts(driver)
    except Exception as e:
        print(f"Error searching hashtag: {str(e)}")
        return []

In [None]:
def parse_posts(driver):
    posts = []
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    post_elements = soup.find_all('div', {'class': 'x1yztbdb'})
    
    for post in post_elements:
        try:
            post_text_element = post.find('div', {'data-ad-preview': 'message'})
            post_text = post_text_element.get_text() if post_text_element else "No text found"
            
            date_element = post.find('span', {'class': 'x4k7w5x'})
            post_date = date_element.get_text() if date_element else "No date found"
            
            author_element = post.find('span', {'class': 'x3nfvp2'})
            author = author_element.get_text() if author_element else "No author found"
            
            link_element = post.find('a', {'class': 'x1i10hfl'})
            post_url = link_element.get('href') if link_element else "No URL found"
            if post_url.startswith('/'):
                post_url = 'https://www.facebook.com' + post_url
            
            posts.append({
                'author': author,
                'date': post_date,
                'text': post_text,
                'url': post_url
            })
        except Exception as e:
            print(f"Error parsing post: {str(e)}")
            continue
            
    print(f"Found {len(posts)} posts")
    return posts

In [None]:
def save_to_csv(posts, filename="harcelement_posts.csv"):
    try:
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['author', 'date', 'text', 'url']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for post in posts:
                writer.writerow(post)
        print(f"Successfully saved {len(posts)} posts to {filename}")
    except Exception as e:
        print(f"Error saving to CSV: {str(e)}")

In [None]:
email = "tyassin375@gmail.com"
password = "58998503"

driver = setup_driver()


In [None]:
login(driver, email, password)

In [None]:
posts = search_hashtag(driver, "harcèlement")

In [None]:
posts[:2]

In [None]:
save_to_csv(posts)

In [None]:
driver.quit()