In [1]:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

# =========================
# CONFIG
# =========================
BASE_URL = "https://www.ibm.com/careers/search?field_keyword_05[0]=United%20States&p={}"

TARGET_FIELDS = {
    "Job Title": "job_title",
    "Job ID": "job_id",
    "Date posted": "date_posted",
    "State / Province": "state_province",
    "Area of work": "area_of_work",
    "Projected Minimum Salary per year": "min_salary",
    "Projected Maximum Salary per year": "max_salary",
    "Position type": "position_type",
    "Required education": "required_education",
    "Preferred education": "preferred_education",
    "Preferred technical and professional experience": "preferred_technical_experience",
}

# =========================
# DRIVER
# =========================
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920,1080")

driver = webdriver.Chrome(options=chrome_options)
wait = WebDriverWait(driver, 10)

# =========================
# HELPERS
# =========================
def scrape_job_detail(driver):
    fields = wait.until(
        EC.presence_of_all_elements_located(
            (By.CLASS_NAME, "article__content__view__field")
        )
    )

    raw = {}

    for field in fields:
        try:
            label = field.find_element(
                By.CLASS_NAME,
                "article__content__view__field__label"
            ).text.strip()

            value = field.find_element(
                By.CLASS_NAME,
                "article__content__view__field__value"
            ).text.strip()

            raw[label] = value
        except:
            continue

    return {
        TARGET_FIELDS[k]: raw.get(k, None)
        for k in TARGET_FIELDS
    }

# =========================
# SCRAPE ALL PAGES
# =========================
rows = []
page = 1

while True:
    url = BASE_URL.format(page)
    driver.get(url)

    try:
        cards = wait.until(
            EC.presence_of_all_elements_located(
                (By.CSS_SELECTOR, 'div.bx--card-group__cards__col[role="region"]')
            )
        )
    except:
        break

    if not cards:
        break

    job_links = []
    for card in cards:
        try:
            link = card.find_element(By.TAG_NAME, "a").get_attribute("href")
            job_links.append(link)
        except:
            continue

    if not job_links:
        break

    for link in job_links:
        driver.execute_script("window.open(arguments[0]);", link)
        driver.switch_to.window(driver.window_handles[-1])

        try:
            rows.append(scrape_job_detail(driver))
        except:
            pass

        driver.close()
        driver.switch_to.window(driver.window_handles[0])

    print(f"Finished scraping page {page}")
    page += 1

driver.quit()

# =========================
# DATAFRAME + SAVE
# =========================
df = pd.DataFrame(rows)

df.head()


Finished scraping page 1
Finished scraping page 2
Finished scraping page 3
Finished scraping page 4
Finished scraping page 5
Finished scraping page 6
Finished scraping page 7
Finished scraping page 8
Finished scraping page 9
Finished scraping page 10
Finished scraping page 11
Finished scraping page 12
Finished scraping page 13
Finished scraping page 14
Finished scraping page 15
Finished scraping page 16
Finished scraping page 17


Unnamed: 0,job_title,job_id,date_posted,state_province,area_of_work,min_salary,max_salary,position_type,required_education,preferred_education,preferred_technical_experience
0,Software Developer Intern 2026: SVL,75759,30-Jan-2026,California,Software Engineering,90720.0,166320.0,Internship,High School Diploma/GED,Bachelor's Degree,"Exposure to cloud platforms (IBM Cloud, AWS, o..."
1,SAP Technology Architect,72352,26-Nov-2025,"Illinois, Texas, Indiana, New Jersey",Software Engineering,144000.0,248000.0,Professional,Bachelor's Degree,,"SAP\nS/4HANA, SAP BTP (CAP, RAP, Event Mesh, I..."
2,"Senior Software Engineer, (Boundary) - HashiCorp",82844,03-Feb-2026,"Texas, Massachusetts, California",Software Engineering,131000.0,226000.0,Professional,High School Diploma/GED,Bachelor's Degree,What's nice to have (preferred qualifications)...
3,DevOps Engineer - AWS,80597,19-Dec-2025,Texas,Infrastructure & Technology,99875.0,129250.0,Professional,Bachelor's Degree,Master's Degree,
4,Federal Senior Compliance Lead - CAS / FAR SME,86235,29-Jan-2026,"Minnesota, New York, Virginia, Maryland, Distr...",Enterprise Operations,147000.0,254000.0,Professional,Bachelor's Degree,Master's Degree,Direct experience with the Defense Contract Au...


In [2]:

df.to_csv("ibm_jobs.csv", index=False, encoding="utf-8")

print(f"\nTotal jobs scraped: {len(df)}")



Total jobs scraped: 478
