Skip to content

Commit

Permalink
chore: version
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed May 28, 2024
1 parent 7f6271b commit 6439f71
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 9 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs = scrape_jobs(
country_indeed='USA', # only needed for indeed / glassdoor

# linkedin_fetch_description=True # get full description and direct job url for linkedin (slower)
# proxies=["Efb5EA8OIk0BQb:wifi;us;@proxy.soax.com:9000", "localhost"],
# proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],

)
print(f"Found {len(jobs)} jobs")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.1.54"
version = "1.1.55"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/JobSpy"
Expand Down
11 changes: 4 additions & 7 deletions src/jobspy/scrapers/linkedin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from typing import Optional
from datetime import datetime

from threading import Lock
from bs4.element import Tag
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urlunparse, unquote
Expand Down Expand Up @@ -71,8 +70,7 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse:
self.scraper_input = scraper_input
job_list: list[JobPost] = []
seen_urls = set()
url_lock = Lock()
page = scraper_input.offset // 25 * 25 if scraper_input.offset else 0
page = scraper_input.offset // 10 * 10 if scraper_input.offset else 0
request_count = 0
seconds_old = (
scraper_input.hours_old * 3600 if scraper_input.hours_old else None
Expand Down Expand Up @@ -142,10 +140,9 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse:
job_id = href.split("-")[-1]
job_url = f"{self.base_url}/jobs/view/{job_id}"

with url_lock:
if job_url in seen_urls:
continue
seen_urls.add(job_url)
if job_url in seen_urls:
continue
seen_urls.add(job_url)
try:
fetch_desc = scraper_input.linkedin_fetch_description
job_post = self._process_job(job_card, job_url, fetch_desc)
Expand Down

0 comments on commit 6439f71

Please sign in to comment.