Skip to content

Commit

Permalink
chore: id added for JobPost schema (#152)
Browse files Browse the repository at this point in the history
  • Loading branch information
fasihhussain00 committed May 20, 2024
1 parent 1ffdb17 commit 08d63a8
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/jobspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def worker(site):

# Desired column order
desired_order = [
"id",
"site",
"job_url_hyper" if hyperlinks else "job_url",
"job_url_direct",
Expand Down
1 change: 1 addition & 0 deletions src/jobspy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ class DescriptionFormat(Enum):


class JobPost(BaseModel):
id: str | None = None
title: str
company_name: str | None
job_url: str
Expand Down
1 change: 1 addition & 0 deletions src/jobspy/scrapers/glassdoor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def _process_job(self, job_data):
description = None
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
return JobPost(
id=str(job_id),
title=title,
company_url=company_url if company_id else None,
company_name=company_name,
Expand Down
1 change: 1 addition & 0 deletions src/jobspy/scrapers/indeed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def _process_job(self, job: dict) -> JobPost | None:
employer_details = employer.get("employerDetails", {}) if employer else {}
rel_url = job["employer"]["relativeCompanyPageUrl"] if job["employer"] else None
return JobPost(
id=str(job["key"]),
title=job["title"],
description=description,
company_name=job["employer"].get("name") if job.get("employer") else None,
Expand Down
11 changes: 11 additions & 0 deletions src/jobspy/scrapers/linkedin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def _process_job(
job_details = self._get_job_details(job_url)

return JobPost(
id=self._get_id(job_url),
title=title,
company_name=company,
company_url=company_url,
Expand All @@ -223,6 +224,16 @@ def _process_job(
logo_photo_url=job_details.get("logo_photo_url"),
)

def _get_id(self, url: str):
"""
Extracts the job id from the job url
:param url:
:return: str
"""
if not url:
return None
return url.split("/")[-1]

def _get_job_details(self, job_page_url: str) -> dict:
"""
Retrieves job description and other job details by going to the job page url
Expand Down
1 change: 1 addition & 0 deletions src/jobspy/scrapers/ziprecruiter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def _process_job(self, job: dict) -> JobPost | None:
comp_max = int(job["compensation_max"]) if "compensation_max" in job else None
comp_currency = job.get("compensation_currency")
return JobPost(
id=str(job['listing_key']),
title=title,
company_name=company,
location=location,
Expand Down

0 comments on commit 08d63a8

Please sign in to comment.