# Import Libraries

In [14]:
import pandas as pd     
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from setups import get_local_safe_setup
import os
import time
import pickle

## Login Manually & Save Cookies

In [15]:
'''
# Create a new WebDriver instance
driver = webdriver.Chrome()

# Navigate to the login page
driver.get('https://www.linkedin.com/login')

# Manually log in to the website
time.sleep(30)

pickle.dump( driver.get_cookies() , open("cookies.pkl","wb"))

# Close the browser
driver.quit()

'''

'\n# Create a new WebDriver instance\ndriver = webdriver.Chrome()\n\n# Navigate to the login page\ndriver.get(\'https://www.linkedin.com/login\')\n\n# Manually log in to the website\ntime.sleep(30)\n\npickle.dump( driver.get_cookies() , open("cookies.pkl","wb"))\n\n# Close the browser\ndriver.quit()\n\n'

## Login by Cookies

In [16]:
# Create local setup like here: https://gist.github.com/theDestI/aa21a0e721b06a74bd58a0a391d96e8f
driver = get_local_safe_setup()

# Enter to the site
driver.get('https://www.linkedin.com/login');
time.sleep(2)

# Load the cookies from the file
cookies = pickle.load(open("cookies.pkl", "rb"))

for cookie in cookies:
    driver.add_cookie(cookie)

# Navigate to a page that requires authentication
driver.get('https://www.linkedin.com/jobs/search/?currentJobId=3601104124&geoId=106155005&keywords=data%20analyst&location=Egypt&refresh=true')



## Get Job offers Links

In [17]:
# Get all links for these offers
links = []
# Navigate 13 pages
print('Links are being collected now.')
try: 
    for page in range(2,10):
        time.sleep(2)
        jobs_block = driver.find_element(By.CLASS_NAME, 'scaffold-layout__list-container')
        jobs_list= jobs_block.find_elements(By.CSS_SELECTOR, '.jobs-search-results__list-item')
    
        for job in jobs_list:
            all_links = job.find_elements(By.TAG_NAME, 'a')
            for a in all_links:
                if str(a.get_attribute('href')).startswith("https://www.linkedin.com/jobs/view") and a.get_attribute('href') not in links: 
                    links.append(a.get_attribute('href'))
                else:
                    pass
            # scroll down for each job element
            driver.execute_script("arguments[0].scrollIntoView();", job)
        
        print(f'Collecting the links in the page: {page-1}')
        
        # go to next page:
        driver.find_element(By.XPATH, f"//button[@aria-label='Page {page}']").click()
        
        time.sleep(3)
        
except Exception as e:
    # Catch the exception and print the error message
    print(f"An error occurred: {e}")
    pass
    
    
print('Found ' + str(len(links)) + ' links for job offers')



Links are being collected now.
Collecting the links in the page: 1
Collecting the links in the page: 2
Collecting the links in the page: 3
Collecting the links in the page: 4
Collecting the links in the page: 5
Collecting the links in the page: 6
Collecting the links in the page: 7
Collecting the links in the page: 8
Found 200 links for job offers


## Scrape Job Offers

In [None]:

offer = 1
job_titles = []
company_names = []
company_locations = []
work_methods = []
post_dates = []
work_times = [] 
job_desc = []

# Visit each link one by one to scrape the information
print('Visiting the links and collecting information just started.')


for i in range(len(links)):
    try:
        driver.get(links[i])
        #i=i+1
        time.sleep(2)

        # Click See more.
        driver.find_element(By.CLASS_NAME, "artdeco-card__actions").click()
        
        time.sleep(2)
        
    except Exception as e:
        # Catch the exception and print the error message
        print(f"An error occurred: {e}")
        pass
    
    # Find the general information of the job offers
    contents = driver.find_elements(By.CLASS_NAME, 'p5')
    for content in contents:
        try:
            job_titles.append(content.find_element(By.TAG_NAME, "h1").text)
        except:
            job_titles.append(None)
        
        try:            
            company_names.append(content.find_element(By.CLASS_NAME, "jobs-unified-top-card__company-name").text)
        except:
            company_names.append(None)
        
        try:
            company_locations.append(content.find_element(By.CLASS_NAME, "jobs-unified-top-card__bullet").text)
        except:
            company_locations.append(None)
            
        try:
            work_methods.append(content.find_element(By.CLASS_NAME, "jobs-unified-top-card__workplace-type").text)
        except:
            work_methods.append(None)
        
        try:            
            post_dates.append(content.find_element(By.CLASS_NAME, "jobs-unified-top-card__posted-date").text)
        except:
            post_dates.append(None)
        
        try:        
            work_times.append(content.find_element(By.CLASS_NAME, "jobs-unified-top-card__job-insight").text)
        except:
            work_times.append(None)
        
        print(f'Scraping the Job Offer {offer} DONE.')
        offer+= 1  

        time.sleep(2)
        
    # Scraping the job description
    job_description = driver.find_elements(By.CLASS_NAME, 'jobs-description__content')
    for description in job_description:
        job_text = description.find_element(By.CLASS_NAME, "jobs-box__html-content").text
        job_desc.append(job_text)
                
        print(f'Scraping the Job Description {offer}')
        time.sleep(2)  

df = pd.DataFrame({ 'Job Title': job_titles, 'Company Name': company_names, 'Company Location': company_locations, 'Work Method': work_methods, 'Post Dates': post_dates,
               'Work Times': work_times, 'Job Link': links })

df.to_csv('job_offers.csv', index=False)


# Output job descriptions to txt file
with open('job_descriptions.txt', 'w',encoding="utf-8") as f:
    for desc in job_desc:
        f.write(desc)
        f.write('\n' + '-'*100 + '\n')


In [1]:
#Print Results
df

Unnamed: 0,Job Title,Company Name,Company Location,Work Method,Post Dates,Work Times,Job Link
0,Cloud Data Analyst _VOIS,_VOIS,"Cairo, Cairo, Egypt",Hybrid,5 days ago,Full-time,https://www.linkedin.com/jobs/view/3531284710/...
1,LV Sales Specialist- Water & Waste water,ABB,"Cairo, Cairo, Egypt",On-site,1 day ago,Full-time,https://www.linkedin.com/jobs/view/3531284710/...
2,System Analyst (Business Analyst),Agility,"Suez, As Suways, Egypt",On-site,1 week ago,Full-time · Associate,https://www.linkedin.com/jobs/view/3531284710/...
3,"Data Analyst (Bangkok Based, Relocation Provided)",Agoda,"Cairo, Cairo, Egypt",,3 days ago,Full-time · Associate,https://www.linkedin.com/jobs/view/3531284710/...
4,"Customer Insights Analyst (Bangkok Based, Relo...",Agoda,"Giza, Al Jizah, Egypt",,3 days ago,Full-time · Associate,https://www.linkedin.com/jobs/view/3531284710/...
...,...,...,...,...,...,...,...
145,CBI Associate,"UNHCR, the UN Refugee Agency","Cairo, Cairo, Egypt",,1 week ago,Full-time · Entry level,https://www.linkedin.com/jobs/view/3531284710/...
146,Ultralogistics Transport Planner,Unilever,"6th of October, Al Jizah, Egypt",On-site,4 days ago,Full-time · Mid-Senior level,https://www.linkedin.com/jobs/view/3531284710/...
147,,,,,,,https://www.linkedin.com/jobs/view/3531284710/...
148,,,,,,,https://www.linkedin.com/jobs/view/3531284710/...
