## Profile Scraper
### Requirements loading


In [1]:
import os
import csv
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException
from dotenv import load_dotenv

### Load Environment Variables

In [2]:
# Set up environment variables for email and password using dotenv
EMAIL = os.getenv('EMAIL')
PASSWORD = os.getenv('PASSWORD')

### Initialize the Browser Driver


In [3]:
# Set up the Chrome WebDriver to automate browser actions
driver = webdriver.Chrome()


### Open LinkedIn Login Page

In [4]:
# Navigate to LinkedIn's login page and wait for the page to load
driver.get('https://www.linkedin.com/login')
sleep(2)

### Login to LinkedIn

In [5]:
# Find and fill the email and password fields, then submit the login form
email_field = driver.find_element(By.ID, 'username')
email_field.send_keys(EMAIL)

password_field = driver.find_element(By.ID, 'password')
password_field.send_keys(PASSWORD)

# Submit the login form
password_field.submit()
sleep(3)

### Take Input for Search Query

In [6]:
# Take the input for search query (first name and last name)
first_name = input("Enter first name: ")
last_name = input("Enter last name: ")



### Navigate to LinkedIn Search Page

In [7]:
# Navigate to LinkedIn search page
search_url = f"https://www.linkedin.com/search/results/people/?keywords={first_name}%20{last_name}"
driver.get(search_url)
sleep(3)


NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=132.0.6834.84)
Stacktrace:
	GetHandleVerifier [0x00007FF67698CC05+28741]
	(No symbol) [0x00007FF6768EFED0]
	(No symbol) [0x00007FF676784FBA]
	(No symbol) [0x00007FF67675ED55]
	(No symbol) [0x00007FF676806DE7]
	(No symbol) [0x00007FF67681F8B2]
	(No symbol) [0x00007FF6767FFBD3]
	(No symbol) [0x00007FF6767C9F43]
	(No symbol) [0x00007FF6767CB2D1]
	GetHandleVerifier [0x00007FF676CBDE3D+3375741]
	GetHandleVerifier [0x00007FF676CD096A+3452330]
	GetHandleVerifier [0x00007FF676CC671D+3410781]
	GetHandleVerifier [0x00007FF676A5653B+854395]
	(No symbol) [0x00007FF6768FBE0F]
	(No symbol) [0x00007FF6768F7714]
	(No symbol) [0x00007FF6768F78AD]
	(No symbol) [0x00007FF6768E6189]
	BaseThreadInitThunk [0x00007FFAB975E8D7+23]
	RtlUserThreadStart [0x00007FFABB31FBCC+44]


### Extract Profile Data

In [8]:
# Initialize an empty list to store extracted profile data
profile_data = []

# Identify and loop through search result profiles
profiles = driver.find_elements(By.XPATH, ".//ul[contains(@class, 'list-style-none')]/li")  # Target 'li' elements

### Loop Through Profiles and Extract Details

In [10]:
# Loop through the profiles and extract details
for i in range(1, 11):  # Dynamically loop through all profile 'li' elements
    profile_info = {}
    
    # Construct dynamic XPaths for name, headline, location, and profile URL
    name_xpath = f"/html/body/div[6]/div[3]/div[2]/div/div[1]/main/div/div/div[2]/div/ul/li[{i}]/div/div/div/div[2]/div[1]/div[1]/div/span[1]/span/a/span/span[1]"
    headline_xpath = f"/html/body/div[6]/div[3]/div[2]/div/div[1]/main/div/div/div[2]/div/ul/li[{i}]/div/div/div/div[2]/div[1]/div[2]"
    location_path = f"/html/body/div[6]/div[3]/div[2]/div/div[1]/main/div/div/div[2]/div/ul/li[{i}]/div/div/div/div[2]/div[1]/div[3]"
    url = f"/html/body/div[6]/div[3]/div[2]/div/div[1]/main/div/div/div[2]/div/ul/li[{i}]/div/div/div/div[2]/div[1]/div[1]/div/span[1]/span/a"
    
    # ==========================================
    # Extract Name
    # ==========================================
    try:
        name_element = driver.find_element(By.XPATH, name_xpath)
        profile_info['Name'] = name_element.text.strip()
    except NoSuchElementException:
        profile_info['Name'] = 'N/A'
    
    # ==========================================
    # Extract Headline
    # ==========================================
    try:
        headline_element = driver.find_element(By.XPATH, headline_xpath)
        profile_info['Headline'] = headline_element.text.strip()
    except NoSuchElementException:
        profile_info['Headline'] = 'N/A'
    
    # ==========================================
    # Extract Location
    # ==========================================
    try:
        location_element = driver.find_element(By.XPATH, location_path)
        profile_info['Location'] = location_element.text.strip()
    except NoSuchElementException:
        profile_info['Location'] = 'N/A'
    
    # ==========================================
    # Extract Profile URL
    # ==========================================
    try:
        profile_url = driver.find_element(By.XPATH, url).get_attribute('href')
        profile_info['Public Profile URL'] = profile_url
    except NoSuchElementException:
        profile_info['Public Profile URL'] = 'N/A'
            
    # Append the profile info to the list
    profile_data.append(profile_info)

# Output the extracted profile data
profile_data


[{'Name': 'Adarsh Kumar',
  'Headline': 'B.Tech (CSE-H) (AI&ML) (IBM) at CHANDIGARH UNIVERSITY',
  'Location': 'Chandigarh, India',
  'Public Profile URL': 'https://www.linkedin.com/in/adarsh-kr-kumar?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAAEZzxAcBwkYbgOwh8qsTchnBYuBGZOVKvjw'},
 {'Name': 'Adarsh Kumar',
  'Headline': "IIT BHU '26 | Competitive Programming | Software Development",
  'Location': 'Varanasi',
  'Public Profile URL': 'https://www.linkedin.com/in/adarsh--?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAAD1-uKABrDLSbZ2b7B34G6ypF-b5Yk9na34'},
 {'Name': 'Adarsh Kumar',
  'Headline': 'Upcoming Intern @Oracle Netsuite || Training and Placement Cell Representative, IIT BHU || Competitive Programming',
  'Location': 'Kolkata',
  'Public Profile URL': 'https://www.linkedin.com/in/adarsh-kumar-2415b4242?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAADw-s1EBKpjK2wiuEsb31P0rk24o-XiwmKk'},
 {'Name': 'Adarsh Kumar',
  'Headline': 'Data Engineer 2 @Accordion',
  'Location': 'Patna

In [11]:
import csv

# Save data to a CSV file
with open('linkedin_user_data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Name', 'Headline', 'Location', 'Public Profile URL'])
    writer.writeheader()
    writer.writerows(profile_data)

# Confirm the file was saved successfully
print("CSV file 'linkedin_user_data.csv' saved successfully.")


# Close the browser and end the WebDriver session
driver.quit()

CSV file 'linkedin_user_data.csv' saved successfully.
