In [None]:
import re
import time
import random
import csv
import pyperclip
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

PROFILE_PATH = "G:/LinkedIn_Web_Scraper/chrome-profile"
CHROMEDRIVER_PATH = "G:/LinkedIn_Web_Scraper/chromedriver-win64/chromedriver.exe"
CSV_INPUT = "profiles.csv"
CSV_OUTPUT = "linkedin_posts.csv"


In [2]:
def setup_driver():
    options = Options()
    options.add_argument(f"user-data-dir={PROFILE_PATH}")
    options.add_argument("--start-maximized")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)

    service = Service(CHROMEDRIVER_PATH)
    driver = webdriver.Chrome(service=service, options=options)

    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": '''
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined
            })
        '''
    })

    return driver


In [3]:
driver = setup_driver()
wait = WebDriverWait(driver, 20)

In [4]:
def human_delay(min_sec=3, max_sec=6):
    time.sleep(random.uniform(min_sec, max_sec))

def scroll_down(driver, times=3):
    for _ in range(times):
        driver.execute_script("window.scrollBy({ top: 800, behavior: 'smooth' });")
        human_delay(2, 4)


In [1]:
def extract_post_links(driver, profile_url):
    posts_data = []
    driver.get(profile_url)
    print(f"➡️ Visiting profile: {profile_url}")

    # Dynamic scrolling to load all posts
    scroll_down(driver)

    posts = driver.find_elements(By.CSS_SELECTOR, "div.feed-shared-update-v2")
    print(f"🔍 Found {len(posts)} posts")

    # Extract username from profile URL for filename
    username = re.search(r"in/([^/]+)/", profile_url).group(1) if re.search(r"in/([^/]+)/", profile_url) else "unknown_user"
    output_file = f"{username}.csv"

    if not posts:
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            f.write("Message\nThis user has no posts yet.")
        print(f"ℹ️ This user ({username}) has no posts yet.")
        return posts_data

    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=["Post URL", "Likes"])
        writer.writeheader()

        for i, post in enumerate(posts, 1):
            try:
                print(f"➡️ Processing post {i}")
                driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", post)
                human_delay(0.5, 1)

                # Extract likes
                likes = 0
                try:
                    likes_element = post.find_element(By.CSS_SELECTOR, "button[data-reaction-details] .social-details-social-counts__reactions-count")
                    likes_text = likes_element.text.strip()
                    likes = int(likes_text.replace(',', '')) if likes_text.isdigit() or likes_text.replace(',', '').isdigit() else 0
                    print(f"✅ Extracted likes: {likes}")
                except Exception as e:
                    print(f"⚠️ Failed to extract likes for post {i}: {str(e)}")

                # Extract post URL
                post_url = None
                try:
                    link_elements = post.find_elements(By.CSS_SELECTOR, "a[href*='linkedin.com/posts'], a[href*='activity-'], [data-urn*='urn:li:activity:'], [data-urn*='activity:'], .update-components-text a")
                    for element in link_elements:
                        if element.tag_name == "a":
                            post_url = element.get_attribute("href")
                        else:
                            urn = element.get_attribute("data-urn")
                            if urn and ("urn:li:activity:" in urn or "activity:" in urn):
                                activity_id = urn.split(":")[-1]
                                post_url = f"https://www.linkedin.com/posts/activity-{activity_id}"
                        if post_url:
                            print(f"✅ Extracted post URL directly: {post_url}")
                            break
                    if not post_url:
                        raise Exception("No valid link or urn found")
                except Exception as e:
                    print(f"⚠️ Failed to extract post URL directly for post {i}: {str(e)}")
                    # Fallback to clipboard method
                    try:
                        menu_button = post.find_element(By.CSS_SELECTOR, "div.feed-shared-update-v2__control-menu-container button[aria-expanded]")
                        actions = ActionChains(driver)
                        actions.move_to_element(menu_button).pause(0.5).click().perform()
                        human_delay(1, 2)
                        menu = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.artdeco-dropdown__content-inner")))
                        menu_items = menu.find_elements(By.CSS_SELECTOR, "li.feed-shared-control-menu__item")
                        for item in menu_items:
                            item_text = item.find_element(By.CSS_SELECTOR, "div.feed-shared-control-menu__dropdown-item").text.strip().lower()
                            if "copy link to post" in item_text:
                                pyperclip.copy("")
                                actions.move_to_element(item.find_element(By.CSS_SELECTOR, "div.feed-shared-control-menu__dropdown-item")).click().perform()
                                human_delay(2, 3)
                                break
                        else:
                            raise Exception("Copy link to post not found")
                        for _ in range(3):
                            post_link = pyperclip.paste().strip()
                            if "linkedin.com" in post_link or post_link.startswith("http"):
                                post_url = post_link
                                print(f"✅ Got post link: {post_url}")
                                break
                            human_delay(1)
                        else:
                            raise Exception("Failed to get valid post link from clipboard")
                    except Exception as e:
                        print(f"⚠️ Fallback failed for post {i}: {str(e)}")
                        continue

                # Save data
                writer.writerow({"Post URL": post_url, "Likes": likes})
                posts_data.append({"url": post_url, "likes": likes})

            except Exception as e:
                print(f"❌ Error processing post {i}: {str(e)}")
            finally:
                human_delay(0.5, 1)

    return posts_data

In [24]:
def scrape_profiles():
    with open(CSV_INPUT, newline="", encoding="utf-8") as infile, \
         open(CSV_OUTPUT, "w", newline="", encoding="utf-8") as outfile:
        reader = csv.reader(infile)
        writer = csv.writer(outfile)
        writer.writerow(["Profile URL", "Post URL"])

        for row in reader:
            profile_url = row[0].strip()
            if not profile_url.startswith("http"):
                continue

            print(f"\n➡️ Visiting profile: {profile_url}")
            try:
                driver.get(profile_url)
                wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
                human_delay(6, 10)

                scroll_down(driver, times=random.randint(2, 4))
                post_links = extract_post_links(driver)
            except Exception as e:
                print(f"❌ Failed to scrape profile {profile_url}: {e}")


In [25]:
scrape_profiles()



➡️ Visiting profile: https://www.linkedin.com/in/shanzaywasim/recent-activity/all/
🔍 Found 7 posts
➡️ Processing post 1
✅ Extracted likes: 631
✅ Menu found for post 1
🧾 Found 3 menu items:
🧾 Menu item: Save
🧾 Menu item: Copy link to post
🧾 Menu item: Report post
✅ Extracted post URL directly: https://www.linkedin.com/company/university-of-engineering-and-technology-lahore/
📋 Attempt 1/5: Raw clipboard content: https://www.linkedin.com/posts/shanzaywasim_%F0%9D%90%80%F0%9D%90%A5%F0%9D%90%A1%F0%9D%90%9A%F0%9D%90%A6%F0%9D%90%9D%F0%9D%90%AE%F0%9D%90%A5%F0%9D%90%A2%F0%9D%90%A5%F0%9D%90%A5%F0%9D%90%9A%F0%9D%90%A1-%F0%9D%90%96%F0%9D%90%9E-%F0%9D%90%9D%F0%9D%90%A2%F0%9D%90%9D-activity-7334593370264121345-_pgE?utm_source=share&utm_medium=member_desktop&rcm=ACoAAFvAQg0BXzGljwy6sM6JsCqdhoXDUT4E_zA
✅ Got post link: https://www.linkedin.com/posts/shanzaywasim_%F0%9D%90%80%F0%9D%90%A5%F0%9D%90%A1%F0%9D%90%9A%F0%9D%90%A6%F0%9D%90%9D%F0%9D%90%AE%F0%9D%90%A5%F0%9D%90%A2%F0%9D%90%A5%F0%9D%90%A5%F0%9D%9

In [None]:
# driver.quit()  # Uncomment this to close browser after scraping
