In [35]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json
import getpass
from datetime import datetime
import os
import pandas as pd
import re


In [36]:
class FacebookGroupMemberCrawl:
    def __init__(self, username, password, group_id, scroll_count):
        print("\n====== Facebook Group Member Scraper ======")
        self.email = username
        self.password = password
        self.group_id = group_id
        self.scroll_count = scroll_count
        self.setup_driver()

    def setup_driver(self):
        try:
            self.driver = webdriver.Chrome()
            self.driver.maximize_window()
        except Exception as e:
            print(f"Error: {e}")

    def login(self):
        try:
            self.driver.get("https://www.facebook.com/")
            self.driver.implicitly_wait(10)
            self.driver.find_element(By.ID, "email").send_keys(self.email)
            self.driver.find_element(By.ID, "pass").send_keys(self.password)
            self.driver.find_element(By.NAME, "login").click()
            time.sleep(10)
            print('Login success')
            return True
        except Exception as e:
            print(f"Error: {e}")
            return False

    def get_group_members(self):
        try:
            self.driver.get(f"https://www.facebook.com/groups/{self.group_id}/members")
            time.sleep(5)
            members = set()
            for i in range(self.scroll_count):
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(3)
                print(f"Scroll {i+1}/{self.scroll_count}")
                user_elements = self.driver.find_elements(By.CSS_SELECTOR, "a[href*='/user/']")
                print(len(user_elements))
                for user in user_elements:
                    try:
                        href = user.get_attribute("href")
                        if '/user/' in href:
                            user_id = href.split('/user/')[1].split('?')[0].strip('/')
                            name = user.text
                            members.add((user_id, name))
                            print(f"Member: {user_id} - {name}")
                    except Exception as e:
                        continue
            return list(members)
        
        except Exception as e:
            print(f"Error: {e}")

    def save_to_excel(self, members):
        try:
            file_name = f"all_group_members.xlsx"
            df = pd.DataFrame(members, columns=["User ID", "Name"])
            df_clean = df[df['Name'].str.strip() != '']
            df_clean.to_excel(file_name, index=False)
            print(f"Data saved to {file_name}")
        except Exception as e:
            print(f"Error: {e}")


In [37]:
class FacebookGroupPostCrawl:
    def __init__(self, username, password, group_id, scroll_count):
        print("\n====== Facebook Group post Scraper ======")
        self.email = username
        self.password = password
        self.group_id = group_id
        self.scroll_count = scroll_count
        self.setup_driver()

    def setup_driver(self):
        try:
            self.driver = webdriver.Chrome()
            self.driver.maximize_window()
        except Exception as e:
            print(f"Error: {e}")

    def login(self):
        try:
            self.driver.get("https://www.facebook.com/")
            self.driver.implicitly_wait(10)
            self.driver.find_element(By.ID, "email").send_keys(self.email)
            self.driver.find_element(By.ID, "pass").send_keys(self.password)
            self.driver.find_element(By.NAME, "login").click()
            time.sleep(10)
            print('Login success')
            return True
        except Exception as e:
            print(f"Error: {e}")
            return False
    def get_group_posts(self):
        try:
            self.driver.get(f"https://www.facebook.com/groups/{self.group_id}/?sorting_setting=TOP_POSTS")
            time.sleep(5)
            postlist = set()
            last_height = self.driver.execute_script("return document.body.scrollHeight")

            for i in range(self.scroll_count):
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(3) 
                post_elements = self.driver.find_elements(By.XPATH, "//div[contains(@role, 'article')]")
                print(f"Found {len(post_elements)} post elements on scroll {i+1}")
                for post in post_elements:
                    try:
                        content = post.text.strip()
                        post_user = content.split('\n')[0]
                        post_content = content.split('\n')[1]
                        post_id_element = post.find_element(By.XPATH, ".//a[contains(@href, '/posts/')]")
                        post_id = post_id_element.get_attribute("href").split("/")[-2]
                        postlist.add((post_id,post_user,post_content))
                    except Exception as e:
                        continue
            return list(postlist)
        except Exception as e:
            print(f"{e}")
    def get_post_reactions(self,post_id):
        try:
            self.driver.get(f"https://www.facebook.com/groups/{self.group_id}/posts/{post_id}/") 
            time.sleep(10)
            last_height = self.driver.execute_script("return document.body.scrollHeight")
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3) 
            try:
                xpaths = [
                "//span[@class='xrbpyxo x6ikm8r x10wlt62 xlyipyv x1exxlbk']"
                ]
                reaction_button = self.driver.find_element(By.XPATH, '//i[contains(@style, "2yrWPRXYtWq.png")]').click()
                # self.driver.execute_script("arguments[0].click();", reaction_button)
            except Exception as e:
                print(e)
                # print("No reactions found for this post.")
                return []
            reactions = set()
            user_elements = self.driver.find_elements(By.XPATH, "//a[contains(@href, '/user/')]")
            for user in user_elements:
                try:
                    user_name = user.text.strip()
                    user_id = user.get_attribute("href").split("/user/")[1].split("/")[0]
                    reactions.add((user_id,user_name))
                    print(f"Name: {user_name}, User ID: {user_id}")
                except Exception as e:
                    print(f"Error processing reaction: {e}")
                # try:
                #     load_more = self.driver.find_element(By.XPATH, "//a[contains(@class, 'uiMorePagerPrimary')]")
                #     load_more.click()
                #     time.sleep(3)
                # except Exception:
                #     break  # Exit loop if no more reactions to load
            return reactions
        except Exception as e:
            print(f"Error fetching reactions: {e}")
            return []
    def get_post_comment(self, post_id):
        try:
            self.driver.get(f"https://www.facebook.com/groups/{self.group_id}/posts/{post_id}/")  # Navigate to the post URL
            time.sleep(5)
            try:
                xpaths = [
                "//span[@class='xrbpyxo x6ikm8r x10wlt62 xlyipyv x1exxlbk']",
                "//span[@class='xrbpyxo x6ikm8r x10wlt62 xlyipyv x1exxlbk']//span[contains(text(), '2')]"
                ]
                reaction_button = self.driver.find_element(By.XPATH, xpaths)
                self.driver.execute_script("arguments[0].click();", reaction_button)
                time.sleep(5)
            except Exception:
                print("No reactions found for this post.")
                return []

            reactions = []
            while True:
                user_elements = self.driver.find_elements(By.XPATH, "//a[contains(@href, '/user/')]")
                for reaction in user_elements:
                    try:
                        user_url = reaction.get_attribute("href")
                        user_id = user_url.split("/")[-1]  # Extract user ID from URL
                        user_name = reaction.text.strip()
                        if user_id and user_name:
                            reactions.append({"User ID": user_id, "User Name": user_name})
                    except Exception as e:
                        print(f"Error processing reaction: {e}")

                # Scroll to load more reactions if possible
                try:
                    load_more = self.driver.find_element(By.XPATH, "//a[contains(@class, 'uiMorePagerPrimary')]")
                    load_more.click()
                    time.sleep(3)
                except Exception:
                    break  # Exit loop if no more reactions to load

            return reactions
        except Exception as e:
            print(f"Error fetching reactions: {e}")
    def get_detail_each_post(self,postlist):
        detailpost = ()
        for post in postlist:
            post_id =  post[0]
            post_reaction = self.get_post_reactions(post_id)
    def save_post_to_excel(self, postlist):
        try:
            file_name = f"ex_post.xlsx"
            df = pd.DataFrame(postlist, columns=["post_id", "post_user",'post_content'])
            df.to_excel(file_name, index=False)
            print(f"Post data saved to {file_name}")
        except Exception as e:
            print(f"Error saving posts to Excel: {e}")
        

In [38]:
username = 'phanmthihoa05@gmail.com'
password = 'Hung26082003'
group_id = '811896080494851'
scroll_count = 1
try:
    scraper = FacebookGroupPostCrawl(username= username, password = password, group_id = group_id, scroll_count= scroll_count)
    if scraper.login():
        print('-----------------')
        postlist = scraper.get_group_posts()
        scraper.get_detail_each_post(postlist)
        time.sleep(10)
except Exception as e:
    pass


Login success
-----------------
Found 7 post elements on scroll 1
Message: element not interactable
  (Session info: chrome=131.0.6778.205)
Stacktrace:
	GetHandleVerifier [0x008533E3+25059]
	(No symbol) [0x007DCDE4]
	(No symbol) [0x006BBD39]
	(No symbol) [0x00700D3B]
	(No symbol) [0x006F5FD7]
	(No symbol) [0x00721EFC]
	(No symbol) [0x006F5A24]
	(No symbol) [0x00722194]
	(No symbol) [0x0073B51E]
	(No symbol) [0x00721C96]
	(No symbol) [0x006F3FAC]
	(No symbol) [0x006F4F3D]
	GetHandleVerifier [0x00B45543+3113795]
	GetHandleVerifier [0x00B5A20A+3198986]
	GetHandleVerifier [0x00B529E2+3168226]
	GetHandleVerifier [0x008F3250+680016]
	(No symbol) [0x007E572D]
	(No symbol) [0x007E29D8]
	(No symbol) [0x007E2B75]
	(No symbol) [0x007D57D0]
	BaseThreadInitThunk [0x75475D49+25]
	RtlInitializeExceptionChain [0x7734CEBB+107]
	RtlGetAppContainerNamedObjectPath [0x7734CE41+561]

Message: element not interactable
  (Session info: chrome=131.0.6778.205)
Stacktrace:
	GetHandleVerifier [0x008533E3+25059]


In [39]:
# def save_to_excel(members):
#             file_name = f"ex_post.xlsx"
#             df = pd.DataFrame(members, columns=["Post ID", "type"])
#             df.to_excel(file_name, index=False)
#             print(f"Data saved to {file_name}")
# save_to_excel(all_post)

In [40]:
driver.get("https://www.facebook.com/groups/1280400922659065/posts/1319622732070217/")

xpaths = [
    "//span[@class='xrbpyxo x6ikm8r x10wlt62 xlyipyv x1exxlbk']",
    "//span[@class='xrbpyxo x6ikm8r x10wlt62 xlyipyv x1exxlbk']//span[contains(text(), '2')]"
]

for xpath in xpaths:
    try:
        reaction_button = driver.find_element(By.XPATH, xpath)
        print("Found element with XPath:", xpath)
        driver.execute_script("arguments[0].click();", reaction_button)
        sleep(5)
        break
    except Exception as e:
        print(f"Failed for XPath: {xpath}, Error: {e}")

# Lấy danh sách các phần tử chứa thông tin người đã tương tác
user_elements = driver.find_elements(By.XPATH, "//a[contains(@href, '/user/')]")

# Duyệt qua các phần tử và trích xuất thông tin
user_data = []
for user in user_elements:
    try:
        user_name = user.text.strip()  # Lấy tên
        user_id = user.get_attribute("href").split("/user/")[1].split("/")[0]  # Lấy ID
        user_data.append({"Name": user_name, "User ID": user_id})
        print(f"Name: {user_name}, User ID: {user_id}")
    except Exception as e:
        print(f"Error processing user: {e}")

# # Đóng trình duyệt
# driver.quit()

# Lưu thông tin vào file Excel
import pandas as pd
df = pd.DataFrame(user_data)
df.to_excel("reactions_info.xlsx", index=False)
print("Data saved to reactions_info.xlsx")


NameError: name 'driver' is not defined