In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import json

def scrape_workday_jobs(url):
    service = Service("/usr/local/bin/chromedriver")
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36")

    driver = webdriver.Chrome(service=service, options=options)
    wait = WebDriverWait(driver, 30)
    all_jobs = []

    try:
        driver.get(url)
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "section[data-automation-id='jobResults']")))
        job_elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "li.css-1q2dra3 a[data-automation-id='jobTitle']")))

        for job in job_elements:
            title = job.text
            link = job.get_attribute("href")
            all_jobs.append({"title": title, "link": link})

        #Investigate the network requests.
        network_logs = driver.execute_script("return window.performance.getEntries();")
        for log in network_logs:
          if "graphql" in log['name']:
            print(f"Network Request: {log['name']}")

        for job in all_jobs:
            print(f"Title: {job['title']}, Link: {job['link']}")

        print(f"Found {len(all_jobs)} job listings")

    except TimeoutException:
        print("Timeout occurred while waiting for elements.")
    except NoSuchElementException:
        print("Element not found.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()

url = "https://uva.wd1.myworkdayjobs.com/en-US/UVAStudentJobs"
scrape_workday_jobs(url)

An error occurred: Message: unknown error: Unable to resolve weakLocalObjectReference=1
  (Session info: chrome=134.0.6998.165)
Stacktrace:
#0 0x55971ffedffa <unknown>
#1 0x55971faac970 <unknown>
#2 0x55971fab3b4a <unknown>
#3 0x55971fab6037 <unknown>
#4 0x55971fb4b81b <unknown>
#5 0x55971fb24292 <unknown>
#6 0x55971fb4a70c <unknown>
#7 0x55971fb24063 <unknown>
#8 0x55971faf0328 <unknown>
#9 0x55971faf1491 <unknown>
#10 0x55971ffb542b <unknown>
#11 0x55971ffb92ec <unknown>
#12 0x55971ff9ca22 <unknown>
#13 0x55971ffb9e64 <unknown>
#14 0x55971ff80bef <unknown>
#15 0x55971ffdc558 <unknown>
#16 0x55971ffdc736 <unknown>
#17 0x55971ffece76 <unknown>
#18 0x7f8be4564ac3 <unknown>



In [58]:
import requests
import json
from bs4 import BeautifulSoup
import time
from urllib.parse import quote

def extract_job_details_from_html(html_content):
    """Extracts job details from JSON within <script type="application/ld+json"> in the <head>."""
    soup = BeautifulSoup(html_content, 'html.parser')

    head_tag = soup.find('head')
    if head_tag:
        script_tag = head_tag.find('script', {'type': 'application/ld+json'})
        if script_tag:
            try:
                json_data = json.loads(script_tag.string)

                address_locality = json_data.get("jobLocation", {}).get("address", {}).get("addressLocality", "Not Available")
                date_posted = json_data.get("datePosted", "Not Available")
                employment_type = json_data.get("employmentType", "Not Available")
                description = json_data.get("description", "Not Available")
                title = json_data.get("title", "Not Available")
                

                return {
                    "title": title,
                    "addressLocality": address_locality,
                    "datePosted": date_posted,
                    "employmentType": employment_type,
                    "description": description,
                }
            except json.JSONDecodeError:
                return {"error": "Invalid JSON in script tag"}
        else:
            return {"error": "Script tag not found in head"}
    else:
        return {"error": "Head tag not found"}

url = "https://uva.wd1.myworkdayjobs.com/wday/cxs/uva/UVAStudentJobs/jobs"
payload = {
    "limit": 20,
    "offset": 0,
    "searchText": "",
    "appliedFacets": {}
}

all_jobs = []

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

while True:
    response = requests.post(url, json=payload, headers=headers)
    if response.status_code == 200:
        try:
            data = response.json()
            if 'jobPostings' in data:
                for job in data['jobPostings']:
                    external_path = job['externalPath']
                    detail_url = "https://uva.wd1.myworkdayjobs.com/en-US/UVAStudentJobs" + external_path #Corrected line.
                    print(detail_url)
                    detail_response = requests.get(detail_url, headers=headers)
                    if detail_response.status_code == 200:
                        job_data = extract_job_details_from_html(detail_response.content)
                        all_jobs.append(job_data)
                    else:
                        print(f"Error getting detail page: {detail_response.status_code}")
                        print(f"Response text: {detail_response.text}")
                        all_jobs.append({"error":f"error getting detail page: {detail_response.status_code}"})
                    time.sleep(2)

                if len(data['jobPostings']) < 20:
                    break

                payload["offset"] += 20
            else:
                break;
        except json.JSONDecodeError as e:
            print(f"JSON Decode Error: {e}")
            print(f"Response Text: {response.text}")
            print(f"Response Headers: {response.headers}")
            break
    else:
        print(f"Error: {response.status_code}")
        break

for job in all_jobs:
    print(json.dumps(job, indent=2))

print(f"Total Jobs: {len(all_jobs)}")

https://uva.wd1.myworkdayjobs.com/en-US/UVAStudentJobs/job/Charlottesville-VA/Facility-Operations-Assistant---Outdoor-Support--Student-Wage-_R0071193
https://uva.wd1.myworkdayjobs.com/en-US/UVAStudentJobs/job/Charlottesville-VA/Karsh-Institute-of-Democracy-Research-Assistant---Hybrid-Position--Student-Wage-_R0070934
https://uva.wd1.myworkdayjobs.com/en-US/UVAStudentJobs/job/Charlottesville-VA/Office-Assistant--Student-Wage--Federal-Work-Study_R0063567
https://uva.wd1.myworkdayjobs.com/en-US/UVAStudentJobs/job/Charlottesville-VA/Student-Instructor-Assistant--SIA---ARCH-GSVS-2150-GLOBAL-SUSTAINABILITY--Student-Wage-_R0064175
https://uva.wd1.myworkdayjobs.com/en-US/UVAStudentJobs/job/Charlottesville-VA/Student-Data-Science-Analyst--Federal-Work-Study--Student-Wage-_R0068540-1
https://uva.wd1.myworkdayjobs.com/en-US/UVAStudentJobs/job/Charlottesville-VA/Community-Resilience-Research-Assistant--Student-Wage---Federal-Work-Study-Only-_R0070433
https://uva.wd1.myworkdayjobs.com/en-US/UVAStude

In [59]:
import pandas as pd
df = pd.DataFrame(all_jobs)

In [60]:
df.head(15)

Unnamed: 0,title,addressLocality,datePosted,employmentType,description,error
0,Facility Operations Assistant – Outdoor Suppor...,Memorial Gymnasium,2025-03-28,PART_TIME,University of Virginia Department of Recreatio...,
1,Karsh Institute of Democracy Research Assistan...,Bond House,2025-03-26,PART_TIME,The University of Virginia’s Karsh Institute o...,
2,Office Assistant (Student Wage) Federal Work S...,Peabody Hall,2025-03-25,PART_TIME,We are looking for enthusiastic students to jo...,
3,"Student Instructor Assistant (SIA), ARCH/GSVS ...",Bryan Hall,2025-03-25,PART_TIME,This is an excellent opportunity for graduate ...,
4,Student Data Science Analyst (Federal Work Stu...,Booker House,2025-03-25,PART_TIME,The Enrollment Intelligence and Analytics depa...,
5,Community Resilience Research Assistant (Stude...,2400 Old Ivy Road,2025-03-25,PART_TIME,The Institute for Engagement + Negotiation (IE...,
6,IEN Student Research Assistant (SRA) - Chesape...,2400 Old Ivy Road,2025-03-24,PART_TIME,The Institute for Engagement + Negotiation (IE...,
7,Undergraduate Student Teaching Consultants (St...,Hotel D,2025-03-23,PART_TIME,The Center for Teaching Excellence is hiring u...,
8,Office Assistant (Student Wage),Monroe Hall,2025-03-21,PART_TIME,The Office for the Undergraduate College of A&...,
9,Darden Graduate Statistical Research Assistant...,Darden Faculty,2025-03-19,PART_TIME,The graduate statistical research assistant wi...,


In [66]:
df = df.drop(index=21)

In [None]:
df = df.drop(columns = ['error'])

KeyError: "['error'] not found in axis"

In [79]:
df.to_csv('uvajobsdata.csv')