# The Scraper Bot

In this notebook, we create a bot that will automatically collect job postings from LinkedIn.

Shoutout to Matan Freedman for the inspiration and the initial code structure:
<a href="https://medium.com/nerd-for-tech/linked-in-web-scraper-using-selenium-15189959b3ba">article here.</a>


In [None]:
import sys
sys.path.append('..')

import time
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.common.by import By
from src.bot.ScraperNew import ScraperNew
from src.database.csv import save_to_csv

## Initialisation

In [19]:
RAW_FOLDER = '../data/raw/'

In [30]:
# Create a scraper object
scraper = ScraperNew(delay=1)

# Login to LinkedIn
scraper.login()

Logging in...


## Search jobs

### Debugging

In [5]:
# Search jobs by title and location
job_title = 'data scientist'
job_location = 'canada'
scraper.search_jobs(job_title, job_location)

# Get the  pagination buttons
time.sleep(2)
pagination_buttons = scraper.get_pagination_buttons()

Searching for jobs: data scientist in canada
Found 3 search bars
Location search bar not found.
Getting pagination buttons...
Found 4 pagination buttons


In [None]:
current_page_jobs = scraper.get_current_page_jobs()

In [8]:
len(current_page_jobs)

25

In [11]:
pagination_buttons = scraper.get_pagination_buttons()
pagination_buttons[1].click()

Getting pagination buttons...
Found 4 pagination buttons


### Collect Job Data

Search for jobs and loop through all pages to get the data. 

In [31]:
def collect_job_data(job_title, job_location, delay=3):
    
    # Perform the initial search
    scraper.search_jobs(job_title, job_location)
    time.sleep(delay)

    # Get the pagination buttons
    pagination_buttons = scraper.get_pagination_buttons()
    num_pages = len(pagination_buttons)
    print(f"{num_pages} pages found")

    all_jobs = []

    # Loop through each page
    # We stop at num_pages, clicking “Next” after each scrape
    for page_idx in range(num_pages):
        print(f"--- Page {page_idx + 1}/{num_pages} ---")
        time.sleep(delay)

        try:
            # — scrape this page —
            current_page_jobs = scraper.get_current_page_jobs()
            all_jobs.extend(current_page_jobs)

            # If there’s a next page, click it —
            if page_idx < num_pages - 1:
                # Re-grab buttons to avoid StaleElementReference
                pagination_buttons = scraper.get_pagination_buttons()
                next_button = pagination_buttons[page_idx + 1]
                next_button.click()

        except StaleElementReferenceException as e:
            print(f"StaleElementReferenceException on page {page_idx+1}: {e}")
        except Exception as e:
            print(f"Error on page {page_idx+1}: {e}")

    return all_jobs


### Run the search

In [None]:
job_titles = [
    "data scientist",
    "machine learning engineer",
    "deep learning engineer",
    "mlops engineer",
    "llm engineer",
    "ai engineer",
    "ai researcher",
    "generative ai engineer",
    "prompt engineer",
    "NLP engineer",
    "computer vision engineer",
    "applied scientist",
    "AI software engineer"
]
job_location = "canada"

In [33]:
all_jobs_titles = []
for title in job_titles:
    print(f"\n=== Collecting data for: {title.title()} ===")
    try:
        # Collect job data for the given title and location
        jobs = collect_job_data(title, job_location, delay=3)
        columns = ["Job Title", "Company", "City", "Work Mode", "Description", "Skills"]

        # print(f"---------------->")
        # print(jobs)  # Print the first two jobs for verification

        all_jobs_titles.extend(jobs)
        
        # Save the jobs to a CSV file with the title as the filename
        save_to_csv(
            data=jobs,
            folder=RAW_FOLDER,
            filename=title,
            colnames=columns
        )

        # break
    
    except Exception as e:
        print(f"An error occurred while collecting data for {title}: {e}")


=== Collecting data for: Ai Software Engineer ===
Searching for jobs: AI software engineer in canada
Found 3 search bars
Location search bar not found.
Getting pagination buttons...
Found 4 pagination buttons
4 pages found
--- Page 1/4 ---
Getting current page jobs…
Scrolling all job listings...
Layout detected: AI-powered
  → 25 cards loaded so far…
  → 25 cards loaded so far…
Reached bottom of job list.
Layout detected: AI-powered
Found 25 job cards.
Gathering job 0 information...
Gathering job 1 information...
Gathering job 2 information...
Gathering job 3 information...
Gathering job 4 information...
Gathering job 5 information...
Gathering job 6 information...
Gathering job 7 information...
Gathering job 8 information...
Gathering job 9 information...
Gathering job 10 information...
Gathering job 11 information...
Gathering job 12 information...
Gathering job 13 information...
Gathering job 14 information...
Gathering job 15 information...
Gathering job 16 information...
Gatherin

In [35]:
scraper.close_driver()