In [4]:
import warnings
warnings.filterwarnings("ignore")
import os
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from dotenv import load_dotenv
import time
from time import sleep

load_dotenv()

url = "https://www.linkedin.com/in/laxmimerit"  # profile to scrape
driver = webdriver.Chrome()  # start a new window with chrome web browser
wait = WebDriverWait(driver, 10)  # WebDriverWait instance with a 10-second timeout
profile_data = {}  # dictionary to store profile data

In [5]:
def login():
    driver.get('https://www.linkedin.com/login')
    try:
        wait.until(EC.title_contains("LinkedIn Login"))
    except TimeoutException:
        print("Error: Not on login page")
        return

    email = wait.until(EC.presence_of_element_located((By.ID, 'username')))
    email.send_keys(os.environ['EMAIL'])

    password = driver.find_element(By.ID, 'password')
    password.send_keys(os.environ['PASSWORD'])
    password.submit()

    try:
        wait.until(EC.url_contains("/feed/"))
    except TimeoutException:
        print("Error: Login failed or page not redirected")

def scrape_name_headline():
    driver.get(url)
    sleep(4)
    try:
        wait.until(EC.presence_of_element_located((By.XPATH, "//h1[contains(@class, 'inline') and contains(@class, 't-24') and contains(@class, 'v-align-middle')]")))
    except TimeoutException:
        print("Error: Could not load profile page or invalid URL")
        return

    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    try:
        name_element = driver.find_element(By.XPATH, "//h1[contains(@class, 'inline') and contains(@class, 't-24') and contains(@class, 'v-align-middle')]")
        name = name_element.text.strip()
        profile_data['name'] = name
    except Exception as e:
        print(e)
        profile_data['name'] = ""

    profile_data['url'] = url

    try:
        headline = soup.find('div', {'class': 'text-body-medium break-words'})
        profile_data['headline'] = headline.get_text().strip() if headline else ""
        print("Headline fetched ", profile_data)
    except Exception as e:
        print(e)
        profile_data['headline'] = ""

def get_all_sections_list():
    try:
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'lxml')
        sleep(1)
        return soup.find_all('section', {'class': 'artdeco-card pv-profile-card break-words mt2'})
    except Exception as e:
        print(e)
        return

def scrape_about(sections):
    try:
        about_section = next((sec for sec in sections if sec.find('div', {'id': 'about'})), None)
        #sleep(1)
        if not about_section:
            print("No About section found")
            profile_data['about'] = ""
            return
        try:
            about = about_section.find('div', class_='display-flex ph5 pv3')
            profile_data['about'] = about.get_text().strip() if about else ""
            print('ABOUT DONE                  = \n', profile_data['about'])
        except Exception as e:
            print(e)
            profile_data['about'] = ""
    except Exception as e:
        print(e)
        profile_data['about'] = ""
    
def get_exp(exp):
    exp_dict = {}

    # Extract company name
    try:
        name_container = exp.find('div', {'class': 'display-flex flex-wrap align-items-center full-height'})
        name = name_container.find('span', {'class': 'visually-hidden'}).get_text().strip() if name_container else ""
    except AttributeError:
        name = ""
    exp_dict['company_name'] = name

    # Extract duration
    try:
        duration_container = exp.find('span', {'class': 't-14 t-normal'})
        duration = duration_container.find('span', {'class': 'visually-hidden'}).get_text().strip() if duration_container else ""
    except AttributeError:
        duration = ""
    exp_dict['duration'] = duration

    # Extract designations
    designations = exp.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk yCpXOOXwXcJnFOsCoYTsmMdAvLcplVbgNCBU'}) or []
    item_list = []

    for position in designations:
        spans = position.find_all('span', {'class': 'visually-hidden'})
        item_dict = {
            'designation': spans[0].get_text().strip() if len(spans) > 0 else "",
            'duration': spans[1].get_text().strip() if len(spans) > 1 else "",
            'location': spans[2].get_text().strip() if len(spans) > 2 else "",
            'projects': spans[3].get_text().strip() if len(spans) > 3 else ""
        }
        item_list.append(item_dict)

    exp_dict['designations'] = item_list

    return exp_dict


def scrape_experience(sections):
    try:
        experience_section = next((sec for sec in sections if sec.find('div', {'id': 'experience'})), None)
        sleep(1)
        if not experience_section:
            print("No Experience section found")
            profile_data['experience'] = []
            return

        try:
            experiences = experience_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'})
            profile_data['experience'] = [get_exp(exp) for exp in experiences]
            print('EXPERIENCE DONE                   =\n',profile_data['experience'])
        except Exception as e:
            print(e)
            profile_data['experience'] = []
    except Exception as e:
        print(e)
        profile_data['experience'] = []


def get_project(item):
    spans = item.find_all('span', {'class': 'visually-hidden'})

    item_dict = {
        'project_name': spans[0].get_text().strip() if len(spans) > 0 else "",
        'duration': spans[1].get_text().strip() if len(spans) > 1 else "",
        'description': spans[2].get_text().strip() if len(spans) > 2 else "",
    }
    return item_dict


def scrape_projects():
    try:
        # Wait for the 'See All Projects' button and click it
        view_all_projects_button = wait.until(EC.element_to_be_clickable((By.ID, "navigation-index-see-all-projects")))
        view_all_projects_button.click()
        sleep(4)
    except TimeoutException:
        print("Failed to find the 'See All Projects' button.")
        profile_data['projects'] = []
        return

    try:
        # Wait for the projects section to load
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'section.artdeco-card.pb3')))
    except TimeoutException:
        print("Failed to load the projects section.")
        profile_data['projects'] = []
        return

    # Parse the page source for project details
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    projects_section = soup.find('section', {'class': 'artdeco-card pb3'})
    items = projects_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'}) if projects_section else []

    profile_data['projects'] = [get_project(item) for item in items]
    print('PROJECT DONE                    =\n',profile_data['projects'])

    # Navigate back to the main profile page
    driver.back()
    sleep(4)
    #wait.until(EC.url_to_be(url))  # Ensure the navigation is complete


def get_skills(item):
    spans = item.find_all('span', {'class': 'visually-hidden'})
    return spans[0].get_text().strip() if spans else ""


def scrape_skills():
    try:
        # Wait for the 'Show All Skills' link and click it
        view_all_skills_link = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//a[contains(@id, 'navigation-index-Show-all-') and contains(@id, '-skills')]"))
        )
        view_all_skills_link.click()
        sleep(4)
    except TimeoutException:
        print("Failed to find or click the 'Show All Skills' link.")
        profile_data['skills'] = []
        return

    try:
        # Wait for the skills section to load
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'section.artdeco-card.pb3')))
    except TimeoutException:
        print("Failed to load the skills section.")
        profile_data['skills'] = []
        return

    # Parse the page source for skills
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    skills_section = soup.find('section', {'class': 'artdeco-card pb3'})
    items = skills_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'}) if skills_section else []

    profile_data['skills'] = [get_skills(item) for item in items]
    print('Skills done             =\n',profile_data['skills'])

    # Navigate back to the main profile page
    driver.back()
    sleep(4)
    #wait.until(EC.url_to_be(url))  # Ensure the navigation is complete

import json
def save_profile_data_to_json(profile_data, file_name="profile_data.json"):
    try:
        # Convert Python dictionary to a JSON string and save it to a file
        with open(file_name, 'w', encoding='utf-8') as json_file:
            json.dump(profile_data, json_file, indent=4, ensure_ascii=False)
        print(f"Profile data saved to {file_name}")
    except Exception as e:
        print(f"An error occurred while saving JSON: {e}")



# Main execution flow
sections = None

login()
scrape_name_headline()
sections = get_all_sections_list()
#print('sections = ',sections)
scrape_about(sections)
scrape_experience(sections)
scrape_projects()
scrape_skills()
print("Profile_data = ", profile_data)
driver.quit()

save_profile_data_to_json(profile_data)

Headline fetched  {'name': 'Laxmi Kant', 'url': 'https://www.linkedin.com/in/laxmimerit', 'headline': 'Gen AI in Finance & Investment Services | Data Scientist | IIT Kharagpur | Asset Management | AI-Driven Financial Modeling | Search Ranking | NLP Python BERT AWS Elasticsearch GNN SQL LLM | AI in Investment Strategies'}
ABOUT DONE                  = 
 Demonstrated 8+ years of expertise in advanced analytics as an AVP in Data Science, showcasing dynamic and impactful contributions. Seeking to leverage expertise in customer behavior modeling, personalized marketing, product discovery & search optimization, and recommendations to drive impactful solutions in the fields of Data Science, Machine Learning, and Artificial Intelligence.👉 Significant Achievements Across Career▪ Strategically led impactful initiatives in Customer Behavior Modeling, resulting in a substantial 30% increase in customer retention and a 10% reduction in advertising spending.▪ Pioneered the development and implementa

####################################################################################################################################################################################################################################################################################################################################################################

In [8]:
import warnings
warnings.filterwarnings("ignore")
import os
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from dotenv import load_dotenv
import json
import sys
from time import sleep



#url = "https://www.linkedin.com/in/laxmimerit"  # profile to scrape
global driver
global wait
global url
global profile_data

def login():
    driver.get('https://www.linkedin.com/login')
    try:
        wait.until(EC.title_contains("LinkedIn Login"))
    except TimeoutException:
        print("Error: Not on login page")
        return

    email = wait.until(EC.presence_of_element_located((By.ID, 'username')))
    email.send_keys(os.environ['EMAIL'])

    password = driver.find_element(By.ID, 'password')
    password.send_keys(os.environ['PASSWORD'])
    password.submit()

    try:
        wait.until(EC.url_contains("/feed/"))
    except TimeoutException:
        print("Error: Login failed or page not redirected")

def scrape_name_headline():
    driver.get(url)
    sleep(4)
    try:
        wait.until(EC.presence_of_element_located((By.XPATH, "//h1[contains(@class, 'inline') and contains(@class, 't-24') and contains(@class, 'v-align-middle')]")))
    except TimeoutException:
        print("Error: Could not load profile page or invalid URL")

    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    try:
        name_element = driver.find_element(By.XPATH, "//h1[contains(@class, 'inline') and contains(@class, 't-24') and contains(@class, 'v-align-middle')]")
        name = name_element.text.strip()
        profile_data['name'] = name
    except Exception as e:
        print(e)
        profile_data['name'] = ""

    profile_data['url'] = url

    try:
        headline = soup.find('div', {'class': 'text-body-medium break-words'})
        profile_data['headline'] = headline.get_text().strip() if headline else ""
        print("Headline fetched ", profile_data)
    except Exception as e:
        print(e)
        profile_data['headline'] = ""

def scrape_pfp_banner():
    try:
        page_source = driver.page_source  # gets all the html code
        soup = BeautifulSoup(page_source, 'lxml') # parse
    except Exception as e:
        print(e)

    try:
        image_div = soup.find('div', {'class': 'ph5 pb5'})
        image_tag = image_div.find('img', {'class' : 'gYwGeQHVKFihyyWibCvmHZFDQZfKneaBo pv-top-card-profile-picture__image--show evi-image ember-view'})
        pfp_uri = image_tag.get('src')
    except Exception as e:
        print(e)
        pfp_uri=""
    profile_data['pfp'] = pfp_uri

    try:
        image_tag = soup.find('img', {'id': "profile-background-image-target-image"})
        banner_uri = image_tag.get('src')
    except Exception as e:
        print(e)
        banner_uri = ""
    profile_data['banner'] = banner_uri
    

def get_all_sections_list():
    try:
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'lxml')
        sleep(1)
        return soup.find_all('section', {'class': 'artdeco-card pv-profile-card break-words mt2'})
    except Exception as e:
        print(e)
        return

def scrape_about(sections):
    try:
        about_section = next((sec for sec in sections if sec.find('div', {'id': 'about'})), None)
        sleep(1)
        if not about_section:
            print("No About section found")
            profile_data['about'] = ""
            return
        try:
            about = about_section.find('div', class_='display-flex ph5 pv3')
            profile_data['about'] = about.get_text().strip() if about else ""
            print('ABOUT DONE                  = \n', profile_data['about'])
        except Exception as e:
            print(e)
            profile_data['about'] = ""
    except Exception as e:
        print(e)
        profile_data['about'] = ""
    
def get_exp(exp):
    exp_dict = {}

    # Extract company name
    try:
        name_container = exp.find('div', {'class': 'display-flex flex-wrap align-items-center full-height'})
        name = name_container.find('span', {'class': 'visually-hidden'}).get_text().strip() if name_container else ""
    except AttributeError:
        name = ""
    exp_dict['company_name'] = name

    # Extract duration
    try:
        duration_container = exp.find('span', {'class': 't-14 t-normal'})
        duration = duration_container.find('span', {'class': 'visually-hidden'}).get_text().strip() if duration_container else ""
    except AttributeError:
        duration = ""
    exp_dict['duration'] = duration

    # get the company logo
    try:
        image_tag = exp.find('img', {'class': 'ivm-view-attr__img--centered EntityPhoto-square-3 evi-image lazy-image ember-view'})
        logo = image_tag.get('src')
    except:
        logo = ""
    exp_dict['logo'] = logo  

    # Extract designations
    designations = exp.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk yCpXOOXwXcJnFOsCoYTsmMdAvLcplVbgNCBU'}) or []
    item_list = []

    for position in designations:
        spans = position.find_all('span', {'class': 'visually-hidden'})
        item_dict = {
            'designation': spans[0].get_text().strip() if len(spans) > 0 else "",
            'duration': spans[1].get_text().strip() if len(spans) > 1 else "",
            'location': spans[2].get_text().strip() if len(spans) > 2 else "",
            'projects': spans[3].get_text().strip() if len(spans) > 3 else ""
        }
        item_list.append(item_dict)

    exp_dict['designations'] = item_list

    return exp_dict


def scrape_experience(sections):
    try:
        experience_section = next((sec for sec in sections if sec.find('div', {'id': 'experience'})), None)
        sleep(1)
        if not experience_section:
            print("No Experience section found")
            profile_data['experience'] = []
            return

        try:
            experiences = experience_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'})
            profile_data['experience'] = [get_exp(exp) for exp in experiences]
            print('EXPERIENCE DONE                   =\n',profile_data['experience'])
        except Exception as e:
            print(e)
            profile_data['experience'] = []
    except Exception as e:
        print(e)
        profile_data['experience'] = []


def get_project(item):
    spans = item.find_all('span', {'class': 'visually-hidden'})

    item_dict = {
        'project_name': spans[0].get_text().strip() if len(spans) > 0 else "",
        'duration': spans[1].get_text().strip() if len(spans) > 1 else "",
        'description': spans[2].get_text().strip() if len(spans) > 2 else "",
    }
    return item_dict


def scrape_projects():
    try:
        # Wait for the 'See All Projects' button and click it
        view_all_projects_button = wait.until(EC.element_to_be_clickable((By.ID, "navigation-index-see-all-projects")))
        view_all_projects_button.click()
        sleep(4)
    except TimeoutException:
        print("Failed to find the 'See All Projects' button.")
        profile_data['projects'] = []
        return

    try:
        # Wait for the projects section to load
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'section.artdeco-card.pb3')))
    except TimeoutException:
        print("Failed to load the projects section.")
        profile_data['projects'] = []
        return

    # Parse the page source for project details
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    projects_section = soup.find('section', {'class': 'artdeco-card pb3'})
    items = projects_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'}) if projects_section else []

    profile_data['projects'] = [get_project(item) for item in items]
    print('PROJECT DONE                    =\n',profile_data['projects'])

    # Navigate back to the main profile page
    driver.back()
    sleep(4)


def get_skills(item):
    spans = item.find_all('span', {'class': 'visually-hidden'})
    return spans[0].get_text().strip() if spans else ""


def scrape_skills():
    try:
        # Wait for the 'Show All Skills' link and click it
        view_all_skills_link = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//a[contains(@id, 'navigation-index-Show-all-') and contains(@id, '-skills')]"))
        )
        view_all_skills_link.click()
        sleep(4)
    except TimeoutException:
        print("Failed to find or click the 'Show All Skills' link.")
        profile_data['skills'] = []
        return

    try:
        # Wait for the skills section to load
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'section.artdeco-card.pb3')))
    except TimeoutException:
        print("Failed to load the skills section.")
        profile_data['skills'] = []
        return

    # Parse the page source for skills
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    skills_section = soup.find('section', {'class': 'artdeco-card pb3'})
    items = skills_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'}) if skills_section else []

    profile_data['skills'] = [get_skills(item) for item in items]
    print('Skills done             =\n',profile_data['skills'])

    # Navigate back to the main profile page
    driver.back()
    sleep(4)



def save_profile_data_to_json(profile_data, file_name="profile_data.json"):
    try:
        # Convert Python dictionary to a JSON string and save it to a file
        with open(file_name, 'w', encoding='utf-8') as json_file:
            json.dump(profile_data, json_file, indent=4, ensure_ascii=False)
        print(f"Profile data saved to {file_name}")
    except Exception as e:
        print(f"An error occurred while saving JSON: {e}")


def main():
    # if len(sys.argv) < 2:
    #     print("Usage: python scrapper.py <LinkedIn_Profile_URL>")
    #     sys.exit(1)

    load_dotenv()
    global driver
    global wait
    global profile_data
    global url

    #url = sys.argv[1]
    url = "https://www.linkedin.com/in/chandrasiddhartha/"

    driver = webdriver.Chrome()  # start a new window with chrome web browser
    wait = WebDriverWait(driver, 10)  # WebDriverWait instance with a 10-second timeout
    profile_data = {}  # dictionary to store profile data
    sections = None # sections in linkedin (eg about section, experience section, etc)
    try:
        login()
        scrape_name_headline()
        scrape_pfp_banner()
        sections = get_all_sections_list()
        #print('sections = ',sections)
        scrape_about(sections)
        scrape_experience(sections)
        scrape_projects()
        scrape_skills()
        print("Profile_data = ", profile_data)
        save_profile_data_to_json(profile_data)  # saves the json file in same directory
    except TimeoutException:
        print("Timed out waiting for a page element. Ensure that the provided URL is correct or maybe a wild captcha appeared.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()
        #return json.dump(profile_data)

if __name__ == "__main__":
    main()




Headline fetched  {'name': 'Siddhartha Chandra', 'url': 'https://www.linkedin.com/in/chandrasiddhartha/', 'headline': 'Technologist | Startup & Student Mentor | EdTech Innovator | Educator & Community Builder'}
ABOUT DONE                  = 
 Seasoned developer in Data Science, with a diverse experience of solving problems in several US-based AI start-ups. I'm passionate about solving problems collaboratively by building resourceful solutions on robust and flexible architectural foundations, which can scale organically accruing minimal tech debt.Experience: 8+ years in Data Science11+ years in EngineeringIn my past roles, I have owned and built multiple test-backed systems from ground-up taking them from prototype to production, transitioned legacy systems, created insightful monitoring systems, authored internal libraries and data connection layers, written and maintained data pipelines in Airflow and CI/CD systems in Jenkins, and mentored developers.Areas of Expertise:Programming: Py

In [10]:
import warnings
warnings.filterwarnings("ignore")
import os
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from dotenv import load_dotenv
import json
import sys
from time import sleep



#url = "https://www.linkedin.com/in/laxmimerit"  # profile to scrape
global driver
global wait
global url
global profile_data

def login():
    driver.get('https://www.linkedin.com/login')
    try:
        wait.until(EC.title_contains("LinkedIn Login"))
    except TimeoutException:
        print("Error: Not on login page")
        return

    email = wait.until(EC.presence_of_element_located((By.ID, 'username')))
    email.send_keys(os.environ['EMAIL'])

    password = driver.find_element(By.ID, 'password')
    password.send_keys(os.environ['PASSWORD'])
    password.submit()

    try:
        wait.until(EC.url_contains("/feed/"))
    except TimeoutException:
        print("Error: Login failed or page not redirected")

def scrape_name_headline():
    driver.get(url)
    sleep(4)
    try:
        wait.until(EC.presence_of_element_located((By.XPATH, "//h1[contains(@class, 'inline') and contains(@class, 't-24') and contains(@class, 'v-align-middle')]")))
    except TimeoutException:
        print("Error: Could not load profile page or invalid URL")

    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    try:
        name_element = driver.find_element(By.XPATH, "//h1[contains(@class, 'inline') and contains(@class, 't-24') and contains(@class, 'v-align-middle')]")
        name = name_element.text.strip()
        profile_data['name'] = name
    except Exception as e:
        print(e)
        profile_data['name'] = ""

    profile_data['url'] = url

    try:
        headline = soup.find('div', {'class': 'text-body-medium break-words'})
        profile_data['headline'] = headline.get_text().strip() if headline else ""
        print("Headline fetched ", profile_data)
    except Exception as e:
        print(e)
        profile_data['headline'] = ""

def scrape_pfp_banner():
    try:
        page_source = driver.page_source  # gets all the html code
        soup = BeautifulSoup(page_source, 'lxml') # parse
    except Exception as e:
        print(e)

    try:
        image_div = soup.find('div', {'class': 'ph5 pb5'})
        image_tag = image_div.find('img', {'class' : 'gYwGeQHVKFihyyWibCvmHZFDQZfKneaBo pv-top-card-profile-picture__image--show evi-image ember-view'})
        pfp_uri = image_tag.get('src')
    except Exception as e:
        print(e)
        pfp_uri=""
    profile_data['pfp'] = pfp_uri

    try:
        image_tag = soup.find('img', {'id': "profile-background-image-target-image"})
        banner_uri = image_tag.get('src')
    except Exception as e:
        print(e)
        banner_uri = ""
    profile_data['banner'] = banner_uri
    

def get_all_sections_list():
    try:
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'lxml')
        sleep(1)
        return soup.find_all('section', {'class': 'artdeco-card pv-profile-card break-words mt2'})
    except Exception as e:
        print(e)
        return

def scrape_about(sections):
    try:
        about_section = next((sec for sec in sections if sec.find('div', {'id': 'about'})), None)
        sleep(1)
        if not about_section:
            print("No About section found")
            profile_data['about'] = ""
            return
        try:
            about = about_section.find('div', class_='display-flex ph5 pv3')
            profile_data['about'] = about.get_text().strip() if about else ""
            print('ABOUT DONE                  = \n', profile_data['about'])
        except Exception as e:
            print(e)
            profile_data['about'] = ""
    except Exception as e:
        print(e)
        profile_data['about'] = ""
    
def get_exp(exp):
    exp_dict = {}

    # Extract company name
    try:
        name_container = exp.find('div', {'class': 'display-flex flex-wrap align-items-center full-height'})
        name = name_container.find('span', {'class': 'visually-hidden'}).get_text().strip() if name_container else ""
    except AttributeError:
        name = ""
    exp_dict['company_name'] = name

    # Extract duration
    try:
        duration_container = exp.find('span', {'class': 't-14 t-normal'})
        duration = duration_container.find('span', {'class': 'visually-hidden'}).get_text().strip() if duration_container else ""
    except AttributeError:
        duration = ""
    exp_dict['duration'] = duration

    # get the company logo
    try:
        image_tag = exp.find('img', {'class': 'ivm-view-attr__img--centered EntityPhoto-square-3 evi-image lazy-image ember-view'})
        logo = image_tag.get('src')
    except:
        logo = ""
    exp_dict['logo'] = logo  

    # Extract designations
    designations = exp.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk yCpXOOXwXcJnFOsCoYTsmMdAvLcplVbgNCBU'}) or []
    item_list = []

    for position in designations:
        spans = position.find_all('span', {'class': 'visually-hidden'})
        item_dict = {
            'designation': spans[0].get_text().strip() if len(spans) > 0 else "",
            'duration': spans[1].get_text().strip() if len(spans) > 1 else "",
            'location': spans[2].get_text().strip() if len(spans) > 2 else "",
            'projects': spans[3].get_text().strip() if len(spans) > 3 else ""
        }
        item_list.append(item_dict)

    exp_dict['designations'] = item_list

    return exp_dict


def scrape_experience(sections):
    try:
        experience_section = next((sec for sec in sections if sec.find('div', {'id': 'experience'})), None)
        sleep(1)
        if not experience_section:
            print("No Experience section found")
            profile_data['experience'] = []
            return

        try:
            experiences = experience_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'})
            profile_data['experience'] = [get_exp(exp) for exp in experiences]
            print('EXPERIENCE DONE                   =\n',profile_data['experience'])
        except Exception as e:
            print(e)
            profile_data['experience'] = []
    except Exception as e:
        print(e)
        profile_data['experience'] = []


def get_project(item):
    spans = item.find_all('span', {'class': 'visually-hidden'})

    item_dict = {
        'project_name': spans[0].get_text().strip() if len(spans) > 0 else "",
        'duration': spans[1].get_text().strip() if len(spans) > 1 else "",
        'description': spans[2].get_text().strip() if len(spans) > 2 else "",
    }
    return item_dict


def scrape_projects():
    try:
        # Wait for the 'See All Projects' button and click it
        view_all_projects_button = wait.until(EC.element_to_be_clickable((By.ID, "navigation-index-see-all-projects")))
        view_all_projects_button.click()
        sleep(4)
    except TimeoutException:
        print("Failed to find the 'See All Projects' button.")
        profile_data['projects'] = []
        return

    try:
        # Wait for the projects section to load
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'section.artdeco-card.pb3')))
    except TimeoutException:
        print("Failed to load the projects section.")
        profile_data['projects'] = []
        return

    # Parse the page source for project details
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    projects_section = soup.find('section', {'class': 'artdeco-card pb3'})
    items = projects_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'}) if projects_section else []

    profile_data['projects'] = [get_project(item) for item in items]
    print('PROJECT DONE                    =\n',profile_data['projects'])

    # Navigate back to the main profile page
    driver.back()
    sleep(4)


def get_skills(item):
    spans = item.find_all('span', {'class': 'visually-hidden'})
    return spans[0].get_text().strip() if spans else ""


def scrape_skills():
    try:
        # Wait for the 'Show All Skills' link and click it
        view_all_skills_link = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//a[contains(@id, 'navigation-index-Show-all-') and contains(@id, '-skills')]"))
        )
        view_all_skills_link.click()
        sleep(4)
    except TimeoutException:
        print("Failed to find or click the 'Show All Skills' link.")
        profile_data['skills'] = []
        return

    try:
        # Wait for the skills section to load
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'section.artdeco-card.pb3')))
    except TimeoutException:
        print("Failed to load the skills section.")
        profile_data['skills'] = []
        return

    # Parse the page source for skills
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    skills_section = soup.find('section', {'class': 'artdeco-card pb3'})
    items = skills_section.find_all('div', {'class': 'fPLNkfiTqBJivqMiXLaRuObcmlUMZsDPkIAVk SeRILEEOfWLelfcuceiLywOjAamlMoEkmnTdFk itGgYIXPpfAaqNrUDXHQVkGSUXMzldwQtdzM'}) if skills_section else []

    profile_data['skills'] = [get_skills(item) for item in items]
    print('Skills done             =\n',profile_data['skills'])

    # Navigate back to the main profile page
    driver.back()
    sleep(4)



def save_profile_data_to_json(profile_data, file_name="profile_data.json"):
    try:
        # Convert Python dictionary to a JSON string and save it to a file
        with open(file_name, 'w', encoding='utf-8') as json_file:
            json.dump(profile_data, json_file, indent=4, ensure_ascii=False)
        print(f"Profile data saved to {file_name}")
    except Exception as e:
        print(f"An error occurred while saving JSON: {e}")

def scrape_linkedin_profile(profile_url):
    load_dotenv()
    global driver
    global wait
    global profile_data
    global url

    url = profile_url

    driver = webdriver.Chrome()  # start a new window with chrome web browser
    wait = WebDriverWait(driver, 10)  # WebDriverWait instance with a 10-second timeout
    profile_data = {}  # dictionary to store profile data
    sections = None # sections in linkedin (eg about section, experience section, etc)
    try:
        login()
        scrape_name_headline()
        scrape_pfp_banner()
        sections = get_all_sections_list()
        #print('sections = ',sections)
        scrape_about(sections)
        scrape_experience(sections)
        scrape_projects()
        scrape_skills()
        print("Profile_data = ", profile_data)
    except TimeoutException:
        print("Timed out waiting for a page element. Ensure that the provided URL is correct or maybe a wild captcha appeared.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()
        return save_profile_data_to_json(profile_data)  # Saves the JSON file in the same directory

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python scraper.py <LinkedIn_Profile_URL>")
        sys.exit(1)
    profile_url = sys.argv[1]
    scrape_linkedin_profile(profile_url)

An error occurred: Message: invalid argument
  (Session info: chrome=131.0.6778.109)
Stacktrace:
	GetHandleVerifier [0x00007FF6186C6CF5+28821]
	(No symbol) [0x00007FF618633880]
	(No symbol) [0x00007FF6184D55B9]
	(No symbol) [0x00007FF6184C3051]
	(No symbol) [0x00007FF6184C12FD]
	(No symbol) [0x00007FF6184C1B3C]
	(No symbol) [0x00007FF6184D885A]
	(No symbol) [0x00007FF6185701FE]
	(No symbol) [0x00007FF61854F2FA]
	(No symbol) [0x00007FF61856F412]
	(No symbol) [0x00007FF61854F0A3]
	(No symbol) [0x00007FF61851A778]
	(No symbol) [0x00007FF61851B8E1]
	GetHandleVerifier [0x00007FF6189FFCED+3408013]
	GetHandleVerifier [0x00007FF618A1745F+3504127]
	GetHandleVerifier [0x00007FF618A0B63D+3455453]
	GetHandleVerifier [0x00007FF61878BDFB+835995]
	(No symbol) [0x00007FF61863EB9F]
	(No symbol) [0x00007FF61863A854]
	(No symbol) [0x00007FF61863A9ED]
	(No symbol) [0x00007FF61862A1D9]
	BaseThreadInitThunk [0x00007FFBA654259D+29]
	RtlUserThreadStart [0x00007FFBA70CAF38+40]

Profile data saved to profile_da