In [None]:
!pip install -q selenium undetected-chromedriver pandas

In [None]:
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import undetected_chromedriver as uc

# ---------------- SETUP: START BROWSER ----------------
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Remove this for debugging
options.add_argument("--disable-blink-features=AutomationControlled")

driver = uc.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# ---------------- LOGIN FUNCTION ----------------
def linkedin_login(email, password):
    driver.get("https://www.linkedin.com/login")
    time.sleep(random.uniform(3, 5))

    # Enter credentials
    email_input = driver.find_element(By.ID, "username")
    password_input = driver.find_element(By.ID, "password")
    email_input.send_keys(email)
    password_input.send_keys(password)
    password_input.send_keys(Keys.RETURN)

    time.sleep(random.uniform(5, 8))  # Wait for login to complete

# ---------------- SEARCH FOR PEOPLE BY JOB TITLE ----------------
def search_profiles(job_title, num_pages=2):
    """Search for LinkedIn profiles based on job title"""
    profiles = []
    search_url = f"https://www.linkedin.com/search/results/people/?keywords={job_title.replace(' ', '%20')}"
    driver.get(search_url)
    time.sleep(random.uniform(4, 7))

    for _ in range(num_pages):  # Loop through multiple pages
        profile_links = driver.find_elements(By.XPATH, "//a[contains(@href, '/in/')]")
        for link in profile_links:
            profile_url = link.get_attribute("href")
            if profile_url and "linkedin.com/in/" in profile_url:
                profiles.append(profile_url)

        # Go to next page
        try:
            next_button = driver.find_element(By.XPATH, "//button[@aria-label='Next']")
            driver.execute_script("arguments[0].click();", next_button)
            time.sleep(random.uniform(5, 10))
        except:
            break  # No more pages

    return list(set(profiles))  # Remove duplicates

# ---------------- SCRAPE LINKEDIN PROFILE ----------------
def scrape_profile(profile_url):
    driver.get(profile_url)
    time.sleep(random.uniform(4, 7))

    # Extract Name
    try:
        name = driver.find_element(By.TAG_NAME, "h1").text
    except:
        name = "Unknown"

    # Extract Job Positions
    jobs = []
    try:
        job_elements = driver.find_elements(By.XPATH, "//div[contains(@class,'experience__list-item')]")
        for job in job_elements:
            job_title = job.find_element(By.TAG_NAME, "h3").text
            company = job.find_element(By.TAG_NAME, "p").text
            jobs.append(f"{job_title} at {company}")
    except:
        pass

    # Extract Education
    education = []
    try:
        edu_elements = driver.find_elements(By.XPATH, "//section[contains(@id,'education-section')]//li")
        for edu in edu_elements:
            school = edu.find_element(By.TAG_NAME, "h3").text
            education.append(school)
    except:
        pass

    # Extract Skills
    skills = []
    try:
        driver.get(profile_url + "/details/skills/")  # Navigate to skills section
        time.sleep(random.uniform(4, 6))
        skill_elements = driver.find_elements(By.XPATH, "//span[contains(@class, 'skill-name')]")
        for skill in skill_elements:
            skills.append(skill.text)
    except:
        pass

    return {
        "Name": name,
        "Jobs": "; ".join(jobs),
        "Education": "; ".join(education),
        "Skills": "; ".join(skills),
        "Profile URL": profile_url
    }

# ---------------- MAIN SCRIPT ----------------
if __name__ == "__main__":
    EMAIL = "your-email@example.com"
    PASSWORD = "your-password"

    linkedin_login(EMAIL, PASSWORD)

    job_titles = [
        "Software Developer",
        "Software Engineer",
        "Data Scientist",
        "AI Engineer",
        "Machine Learning Engineer",
        "Backend Developer",
        "Fronend Developer",
        "Devops Engineer",
        "QA Engineer",
        "IT Administrator",
        "System Administrator",
        "Data Analyst",
        "Full Stack Developer",
        "Web Developer",
        "System Architect",
        "Cyber Security",
        "SRE",
        "Security Analyst",
        "Cloud Architect",
        "AI researcher",
        "Automation Engineer"
    ]

    all_profiles = []
    for job in job_titles:
        print(f"🔍 Searching for: {job}")
        profiles = search_profiles(job, num_pages=4)
        all_profiles.extend(profiles)
        time.sleep(random.uniform(5, 10))

    # Remove duplicates
    all_profiles = list(set(all_profiles))

    # Scrape each profile
    scraped_data = []
    for profile_url in all_profiles[:10]:  # Limit to 10 profiles to avoid bans
        data = scrape_profile(profile_url)
        scraped_data.append(data)
        print(f"✅ Scraped: {data['Name']} - {data['Jobs']}")
        time.sleep(random.uniform(5, 10))

    # Save to CSV
    df = pd.DataFrame(scraped_data)
    df.to_csv("linkedin_profiles.csv", index=False)
    print("✅ Data saved to linkedin_profiles.csv")

    driver.quit()
