In [37]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json
import getpass
from datetime import datetime
import os
import pandas as pd
import re


In [38]:
class FacebookGroupMemberCrawl:
    def __init__(self, username, password, group_id, scroll_count):
        print("\n====== Facebook Group Member Scraper ======")
        self.email = username
        self.password = password
        self.group_id = group_id
        self.scroll_count = scroll_count
        self.setup_driver()

    def setup_driver(self):
        try:
            self.driver = webdriver.Chrome()
            self.driver.maximize_window()
        except Exception as e:
            print(f"Error: {e}")

    def login(self):
        try:
            self.driver.get("https://www.facebook.com/")
            self.driver.implicitly_wait(10)
            self.driver.find_element(By.ID, "email").send_keys(self.email)
            self.driver.find_element(By.ID, "pass").send_keys(self.password)
            self.driver.find_element(By.NAME, "login").click()
            time.sleep(10)
            print('Login success')
            return True
        except Exception as e:
            print(f"Error: {e}")
            return False

    def get_group_members(self):
        try:
            self.driver.get(f"https://www.facebook.com/groups/{self.group_id}/members")
            time.sleep(5)
            members = set()
            for i in range(self.scroll_count):
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(3)
                print(f"Scroll {i+1}/{self.scroll_count}")
                user_elements = self.driver.find_elements(By.CSS_SELECTOR, "a[href*='/user/']")
                print(len(user_elements))
                for user in user_elements:
                    try:
                        href = user.get_attribute("href")
                        if '/user/' in href:
                            user_id = href.split('/user/')[1].split('?')[0].strip('/')
                            name = user.text
                            members.add((user_id, name))
                            print(f"Member: {user_id} - {name}")
                    except Exception as e:
                        continue
            return list(members)
        
        except Exception as e:
            print(f"Error: {e}")

    def save_to_excel(self, members):
        try:
            file_name = f"all_group_members.xlsx"
            df = pd.DataFrame(members, columns=["User ID", "Name"])
            df_clean = df[df['Name'].str.strip() != '']
            df_clean.to_excel(file_name, index=False)
            print(f"Data saved to {file_name}")
        except Exception as e:
            print(f"Error: {e}")


In [39]:
class FacebookGroupPostCrawl:
    def __init__(self, username, password, group_id, scroll_count):
        print("\n====== Facebook Group post Scraper ======")
        self.email = username
        self.password = password
        self.group_id = group_id
        self.scroll_count = scroll_count
        self.setup_driver()

    def setup_driver(self):
        try:
            self.driver = webdriver.Chrome()
            self.driver.maximize_window()
        except Exception as e:
            print(f"Error: {e}")

    def login(self):
        try:
            self.driver.get("https://www.facebook.com/")
            self.driver.implicitly_wait(10)
            self.driver.find_element(By.ID, "email").send_keys(self.email)
            self.driver.find_element(By.ID, "pass").send_keys(self.password)
            self.driver.find_element(By.NAME, "login").click()
            time.sleep(10)
            print('Login success')
            return True
        except Exception as e:
            print(f"Error: {e}")
            return False
    def get_post_image(self):
        try:
            self.driver.get(f"https://www.facebook.com/groups/{self.group_id}/?sorting_setting=TOP_POSTS")
            time.sleep(5)
            listpost = set()
            for i in range(self.scroll_count):
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(3)
                print(f"Scroll {i+1}/{self.scroll_count}")
                likeBtn = self.driver.find_elements(By.CSS_SELECTOR, "a[href*='/photo/']")
                print(len(likeBtn))
                for user in likeBtn:
                    try:
                        href = user.get_attribute("href")
                        if '/photo/' in href:
                            post_id_raw = href.split('/photo/')[1]
                            clean_post_id = re.sub(r'(idorvanity=\d+).*', r'\1', post_id_raw)
                            listpost.add((clean_post_id,'photo'))
                            print(f"ID_post: {clean_post_id}")
                    except Exception as e:
                        continue
            print(listpost)
            return listpost
        except Exception as e:
            print(f"Error: {e}")

    def get_post_video(self):
        try:
            self.driver.get(f"https://www.facebook.com/groups/{self.group_id}/?sorting_setting=TOP_POSTS")
            time.sleep(5)
            listpost = set()
            for i in range(self.scroll_count):
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(3)
                print(f"Scroll {i+1}/{self.scroll_count}")
                likeBtn = self.driver.find_elements(By.CSS_SELECTOR, "a[href*='/videos/']")
                print(len(likeBtn))
                for user in likeBtn:
                    try:
                        href = user.get_attribute("href")
                        if '/videos/' in href:
                            post_id_raw = href.split('/videos/')[1]
                            clean_post_id = re.sub(r'(idorvanity=\d+).*', r'\1', post_id_raw)
                            # name = user.text
                            listpost.add((clean_post_id,'video'))
                            print(f"ID_post: {clean_post_id}")
                    except Exception as e:
                        continue
            print(listpost)
            return listpost
        except Exception as e:
            print(f"Error: {e}")

In [40]:
username = 'phanmthihoa05@gmail.com'
password = 'Hung26082003'
group_id = '811896080494851'
scroll_count = 5
try:
    scraper = FacebookGroupPostCrawl(username= username, password = password, group_id = group_id, scroll_count= scroll_count)
    if scraper.login():
        print('-----------------')
        imagelist = scraper.get_post_image()
        videolist = scraper.get_post_video()
        time.sleep(10)
except Exception as e:
    pass


Login success
-----------------
Scroll 1/5
6
ID_post: ?fbid=122099049674336077&set=p.122099049674336077
ID_post: ?fbid=576717238433491&set=g.811896080494851
ID_post: ?fbid=632310112460152&set=g.811896080494851
ID_post: ?fbid=632310075793489&set=g.811896080494851
ID_post: ?fbid=632310032460160&set=g.811896080494851
ID_post: ?fbid=576717238433491&set=gm.940359197648538&idorvanity=811896080494851
Scroll 2/5
6
ID_post: ?fbid=122099049674336077&set=p.122099049674336077
ID_post: ?fbid=576717238433491&set=g.811896080494851
ID_post: ?fbid=632310112460152&set=g.811896080494851
ID_post: ?fbid=632310075793489&set=g.811896080494851
ID_post: ?fbid=632310032460160&set=g.811896080494851
ID_post: ?fbid=576717238433491&set=gm.940359197648538&idorvanity=811896080494851
Scroll 3/5
10
ID_post: ?fbid=122099049674336077&set=p.122099049674336077
ID_post: ?fbid=576717238433491&set=g.811896080494851
ID_post: ?fbid=632310112460152&set=g.811896080494851
ID_post: ?fbid=632310075793489&set=g.811896080494851
ID_po

In [42]:
all_post = imagelist.union(videolist)
print(all_post)
all_post = list(all_post)

{('?fbid=532166123079873&set=gm.899560911728367&idorvanity=811896080494851', 'photo'), ('?fbid=122099049674336077&set=p.122099049674336077', 'photo'), ('?fbid=122196906338211717&set=gm.936338714717253&idorvanity=811896080494851', 'photo'), ('?fbid=122161421954282461&set=gm.936123088072149&idorvanity=811896080494851', 'photo'), ('1318747156208084/?idorvanity=811896080494851', 'video'), ('953768903480315/?idorvanity=811896080494851', 'video'), ('?fbid=632310032460160&set=pcb.938181804532944&__cft__[0]=AZUYUueVDiAz67FajrzYO5uWQvyT_D0x8NkdsU6_7ao4CfUCHvJNiIcI98myUYLBaaKXHhFoPHLJvzNKAk6DNEfbEoM7SwybNIZvzqN6J9tANj7NbD9Bu420RapRhMKnvvc2JW7BGcANf-VIw0F5xGx_-XyZHsZw3yfnyA_spPmXezuC4CJbBvGaQWV0eitpAuF-rCCdJ0VFnl_AEfs4uEpK&__tn__=*bH-R', 'photo'), ('?fbid=576717238433491&set=g.811896080494851', 'photo'), ('?fbid=632310032460160&set=g.811896080494851', 'photo'), ('?fbid=632310112460152&set=g.811896080494851', 'photo'), ('?fbid=576717238433491&set=gm.940359197648538&idorvanity=811896080494851', 'ph

In [43]:
print(all_post)

[('?fbid=532166123079873&set=gm.899560911728367&idorvanity=811896080494851', 'photo'), ('?fbid=122099049674336077&set=p.122099049674336077', 'photo'), ('?fbid=122196906338211717&set=gm.936338714717253&idorvanity=811896080494851', 'photo'), ('?fbid=122161421954282461&set=gm.936123088072149&idorvanity=811896080494851', 'photo'), ('1318747156208084/?idorvanity=811896080494851', 'video'), ('953768903480315/?idorvanity=811896080494851', 'video'), ('?fbid=632310032460160&set=pcb.938181804532944&__cft__[0]=AZUYUueVDiAz67FajrzYO5uWQvyT_D0x8NkdsU6_7ao4CfUCHvJNiIcI98myUYLBaaKXHhFoPHLJvzNKAk6DNEfbEoM7SwybNIZvzqN6J9tANj7NbD9Bu420RapRhMKnvvc2JW7BGcANf-VIw0F5xGx_-XyZHsZw3yfnyA_spPmXezuC4CJbBvGaQWV0eitpAuF-rCCdJ0VFnl_AEfs4uEpK&__tn__=*bH-R', 'photo'), ('?fbid=576717238433491&set=g.811896080494851', 'photo'), ('?fbid=632310032460160&set=g.811896080494851', 'photo'), ('?fbid=632310112460152&set=g.811896080494851', 'photo'), ('?fbid=576717238433491&set=gm.940359197648538&idorvanity=811896080494851', 'ph

In [None]:
duma