In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
from tqdm import tqdm
from pprint import pprint
import random
import json
from concurrent.futures import ThreadPoolExecutor, as_completed


categories = [
    "https://www.hireitpeople.com/resume-database/70-oracle-developers-resumes",
    "https://www.hireitpeople.com/resume-database/71-sap-resumes",
    "https://www.hireitpeople.com/resume-database/77-oracle-resumes",
    "https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes",
]

categories = [
        "https://www.hireitpeople.com/resume-database/77-oracle-resumes",
    "https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes",
]




def scrape_resume_links(category_url, max_pages=200):
    """
    Purpose: scrapes resume page URLs from a given category link that
    contains multiple paged resume listings. eg: https://www.hireitpeople.com/resume-database/63-net-developers-architects-resumes

    working logic:
    1. resume_links' to store the extracted URLs.
    2. define HTTP request headers to mimic a real web browser and avoid basic blocking mechanisms.
    3. session object is created for efficiency.
    4. loop runs from page 1 up to 'max_pages'. For each iteration:
         - constructs the page URL by appending '/page/{page}' to the category URL.
         - sends a GET request to that page.
         - if the response code is not 200, it assumes there are no more pages and stops.
         - html is parsed using bs4.
         - looks for resume links using the CSS selector 'table.hit-table h4 a'.
         - extracts the 'href' attribute of each <a> tag and adds it to the list.
         - until no links are found.
         - random delay (from 0.3 to 0.7 seconds) is added to avoid server overload or detection.
    5. returns a list of all collected resume page URLs.
    """
    resume_links = []
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/116.0 Safari/537.36"
    }

    session = requests.Session()
    session.headers.update(headers)

    for page in range(1, max_pages + 1):
        url = f"{category_url}/page/{page}"
        print(f"[+] Scraping {url}")

        r = session.get(url)
        if r.status_code != 200:
            print(f"[-] Page {page} returned {r.status_code}, stopping.")
            break

        soup = BeautifulSoup(r.text, "html.parser")
        links = [a['href'] for a in soup.select("table.hit-table h4 a")]

        if not links:  # No more resumes
            print(f"[-] No resumes found on page {page}, stopping.")
            break

        resume_links.extend(links)

        # small random delay (just to avoid hammering)
        time.sleep(random.uniform(0.3, 0.7))

    return resume_links

all_links = {}

with ThreadPoolExecutor(max_workers=len(categories)) as executor:
    futures = {executor.submit(scrape_resume_links, cat_url, max_pages=200): cat_url for cat_url in categories}
    for future in as_completed(futures):
        cat_url = futures[future]
        try:
            links = future.result()
            all_links[cat_url] = links
            print(f"[✓] Collected {len(links)} resumes from {cat_url}")
        except Exception as e:
            print(f"[!] Failed to scrape {cat_url}: {e}")

# flatten list
all_resume_links = [link for links in all_links.values() for link in links]


[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/1[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/1

[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/2
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/2
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/3
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/3
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/4
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/4
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/5
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/5
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/6
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle

In [2]:
with open("resume_links.txt", "w") as f:
    f.write("\n".join(all_resume_links))

In [None]:
with open("resume_links.txt", "r") as f:
    all_resume_links = f.readlines()

In [3]:
def scrape_resume_links(category_url, max_pages=200):
    """
    Purpose: scrapes resume page URLs from a given category link that
    contains multiple paged resume listings. eg: https://www.hireitpeople.com/resume-database/63-net-developers-architects-resumes

    working logic:
    1. resume_links' to store the extracted URLs.
    2. define HTTP request headers to mimic a real web browser and avoid basic blocking mechanisms.
    3. session object is created for efficiency.
    4. loop runs from page 1 up to 'max_pages'. For each iteration:
         - constructs the page URL by appending '/page/{page}' to the category URL.
         - sends a GET request to that page.
         - if the response code is not 200, it assumes there are no more pages and stops.
         - html is parsed using bs4.
         - looks for resume links using the CSS selector 'table.hit-table h4 a'.
         - extracts the 'href' attribute of each <a> tag and adds it to the list.
         - until no links are found.
         - random delay (from 0.3 to 0.7 seconds) is added to avoid server overload or detection.
    5. returns a list of all collected resume page URLs.
    """
    resume_links = []
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/116.0 Safari/537.36"
    }

    session = requests.Session()
    session.headers.update(headers)

    for page in range(1, max_pages + 1):
        url = f"{category_url}/page/{page}"
        print(f"[+] Scraping {url}")

        r = session.get(url)
        if r.status_code != 200:
            print(f"[-] Page {page} returned {r.status_code}, stopping.")
            break

        soup = BeautifulSoup(r.text, "html.parser")
        links = [a['href'] for a in soup.select("table.hit-table h4 a")]

        if not links:  # No more resumes
            print(f"[-] No resumes found on page {page}, stopping.")
            break

        resume_links.extend(links)

        # small random delay (just to avoid hammering)
        time.sleep(random.uniform(0.3, 0.7))

    return resume_links

In [46]:
categories = [
    "https://www.hireitpeople.com/resume-database/70-oracle-developers-resumes",
    "https://www.hireitpeople.com/resume-database/71-sap-resumes",
    "https://www.hireitpeople.com/resume-database/77-oracle-resumes",
    "https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes",
]

categories = [
        "https://www.hireitpeople.com/resume-database/77-oracle-resumes",
    "https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes",
]

In [None]:
categories = [
    "https://www.hireitpeople.com/resume-database/66-business-analyst-resumes",
    "https://www.hireitpeople.com/resume-database/70-oracle-developers-resumes",
    "https://www.hireitpeople.com/resume-database/71-sap-resumes",
    "https://www.hireitpeople.com/resume-database/73-datawarehousing-etl-informatica-resumes",
    "https://www.hireitpeople.com/resume-database/74-business-intelligence-business-object-resumes",
    "https://www.hireitpeople.com/resume-database/77-oracle-resumes",
    "https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes",
    "https://www.hireitpeople.com/resume-database/80-peoplesoft-resumes",
    "https://www.hireitpeople.com/resume-database/81-project-manager-resumes",
    "https://www.hireitpeople.com/resume-database/82-quality-assurance-resumes",
    "https://www.hireitpeople.com/resume-database/87-sql-developers-resumes"
]


# "https://www.hireitpeople.com/resume-database/63-net-developers-architects-resumes",
# "https://www.hireitpeople.com/resume-database/64-java-developers-architects-resumes",
# "https://www.hireitpeople.com/resume-database/65-informatica-developers-architects-resumes",
# "https://www.hireitpeople.com/resume-database/66-business-analyst-resumes",
# "https://www.hireitpeople.com/resume-database/67-quality-assurance-qa-resumes",
# "https://www.hireitpeople.com/resume-database/68-network-and-systems-administrators-resumes",
# "https://www.hireitpeople.com/resume-database/69-help-desk-support-resumes",
# "https://www.hireitpeople.com/resume-database/70-oracle-developers-resumes",
# "https://www.hireitpeople.com/resume-database/71-sap-resumes",
# "https://www.hireitpeople.com/resume-database/72-web-developer-resumes",
# "https://www.hireitpeople.com/resume-database/73-datawarehousing-etl-informatica-resumes",
# "https://www.hireitpeople.com/resume-database/74-business-intelligence-business-object-resumes",
# "https://www.hireitpeople.com/resume-database/75-mainframe-resumes",
# "https://www.hireitpeople.com/resume-database/76-network-admin-resumes",
# "https://www.hireitpeople.com/resume-database/77-oracle-resumes",
# "https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes",
# "https://www.hireitpeople.com/resume-database/80-peoplesoft-resumes",
# "https://www.hireitpeople.com/resume-database/81-project-manager-resumes",
# "https://www.hireitpeople.com/resume-database/82-quality-assurance-resumes",
# "https://www.hireitpeople.com/resume-database/83-recruiter-resumes",
# "https://www.hireitpeople.com/resume-database/85-sas-resumes",
# "https://www.hireitpeople.com/resume-database/86-sharepoint-resumes",
# "https://www.hireitpeople.com/resume-database/87-sql-developers-resumes",
# "https://www.hireitpeople.com/resume-database/88-technical-writers-resumes",
# "https://www.hireitpeople.com/resume-database/89-websphere-resumes",

num_pages = 500    # each page contains 10 resumes

In [47]:

def scrape_resume_links(category_url, max_pages=200):
    """
    Purpose: scrapes resume page URLs from a given category link that
    contains multiple paged resume listings. eg: https://www.hireitpeople.com/resume-database/63-net-developers-architects-resumes

    working logic:
    1. resume_links' to store the extracted URLs.
    2. define HTTP request headers to mimic a real web browser and avoid basic blocking mechanisms.
    3. session object is created for efficiency.
    4. loop runs from page 1 up to 'max_pages'. For each iteration:
         - constructs the page URL by appending '/page/{page}' to the category URL.
         - sends a GET request to that page.
         - if the response code is not 200, it assumes there are no more pages and stops.
         - html is parsed using bs4.
         - looks for resume links using the CSS selector 'table.hit-table h4 a'.
         - extracts the 'href' attribute of each <a> tag and adds it to the list.
         - until no links are found.
         - random delay (from 0.3 to 0.7 seconds) is added to avoid server overload or detection.
    5. returns a list of all collected resume page URLs.
    """
    resume_links = []
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/116.0 Safari/537.36"
    }

    session = requests.Session()
    session.headers.update(headers)

    for page in range(1, max_pages + 1):
        url = f"{category_url}/page/{page}"
        print(f"[+] Scraping {url}")

        r = session.get(url)
        if r.status_code != 200:
            print(f"[-] Page {page} returned {r.status_code}, stopping.")
            break

        soup = BeautifulSoup(r.text, "html.parser")
        links = [a['href'] for a in soup.select("table.hit-table h4 a")]

        if not links:  # No more resumes
            print(f"[-] No resumes found on page {page}, stopping.")
            break

        resume_links.extend(links)

        # small random delay (just to avoid hammering)
        time.sleep(random.uniform(0.3, 0.7))

    return resume_links

all_links = {}

with ThreadPoolExecutor(max_workers=len(categories)) as executor:
    futures = {executor.submit(scrape_resume_links, cat_url, max_pages=200): cat_url for cat_url in categories}
    for future in as_completed(futures):
        cat_url = futures[future]
        try:
            links = future.result()
            all_links[cat_url] = links
            print(f"[✓] Collected {len(links)} resumes from {cat_url}")
        except Exception as e:
            print(f"[!] Failed to scrape {cat_url}: {e}")

# flatten list
all_resume_links = [link for links in all_links.values() for link in links]

[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/1
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/1
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/2
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/2
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/3
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/3
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/4
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/4
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/5
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/5
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/6
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle

In [50]:
all_links

{'https://www.hireitpeople.com/resume-database/77-oracle-resumes': ['https://www.hireitpeople.com/resume-database/77-oracle-resumes/628150-oracle-pl-sql-developer-resume-deerfield-il-1',
  'https://www.hireitpeople.com/resume-database/77-oracle-resumes/628148-oracle-pl-sql-developer-resume-deerfield-il',
  'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627839-oracle-pl-sql-developer-resume-san-antonio-tx-50',
  'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627837-oracle-pl-sql-developer-resume-san-antonio-tx-48',
  'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627789-oracle-database-administrator-resume-dubuque-iowa-2',
  'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627754-oracle-pl-sql-developer-resume-san-antonio-tx-47',
  'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627582-oracle-pl-sql-developer-resume-san-antonio-tx-42',
  'https://www.hireitpeople.com/resume-database/77-oracle-resumes/62758

In [5]:
session = requests.Session()
session.headers.update({"User-Agent": "Mozilla/5.0 ..."})

org_resume_dict = {}

print(f"[+] Scraping {len(all_resume_links)} resumes in parallel...")

with ThreadPoolExecutor(max_workers=12) as executor:  # adjust workers
    futures = {executor.submit(scrape_resume, url, session): url for url in all_resume_links}
    
    # tqdm progress bar with total count
    for future in tqdm(as_completed(futures), total=len(futures), desc="Scraping resumes", unit="resume"):
        url = futures[future]
        try:
            data = future.result()
            org_resume_dict[url] = data
        except Exception as e:
            print(f"[!] Failed {url}: {e}")

print(f"[✔] Completed scraping {len(org_resume_dict)} resumes.")

[+] Scraping 49730 resumes in parallel...


Scraping resumes:   0%|                             | 8/49730 [00:06<8:29:42,  1.63resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/627034-lead-oracle-database-administrator-resume-highlands-ranch-co-1: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/626958-account-tech-subject-matter-expert-tech-lead-resume-5: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                            | 40/49730 [00:18<5:59:19,  2.30resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/624509-oracle-financial-functional-consultant-resume-chicago-il-1: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                            | 42/49730 [00:20<7:24:50,  1.86resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/624383-oracle-dba-resume-atlanta-ga-12: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                            | 80/49730 [00:33<4:55:24,  2.80resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/614190-oracle-finance-functional-lead-consultant-resume-albany-ny-1: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                            | 83/49730 [00:34<3:59:12,  3.46resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/615269-soa-consultant-oracle-fusion-developer-resume-harrisburg-pa-1: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                            | 88/49730 [00:35<3:22:46,  4.08resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/612206-soa-admin-sme-resume-sacramento-ca: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                           | 110/49730 [00:42<3:18:07,  4.17resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/608077-sr-oracle-dba-resume-nc-3: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                           | 114/49730 [00:44<4:43:27,  2.92resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/608092-sr-oracle-developer-resume-nc-5: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                           | 116/49730 [00:46<6:12:36,  2.22resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/604485-oracle-developer-resume-albany-ny-4: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes:   0%|                           | 156/49730 [01:03<4:41:35,  2.93resume/s]

[!] Failed https://www.hireitpeople.com/resume-database/77-oracle-resumes/588260-oracle-fusion-middleware-administrator-resume-sacramento-california-3: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Scraping resumes: 100%|███████████████████████████| 49730/49730 [37:46<00:00, 21.94resume/s]

[✔] Completed scraping 49719 resumes.





In [6]:
with open("resumes.json", "w", encoding="utf-8") as f:
    json.dump(org_resume_dict, f, ensure_ascii=False, indent=4)

In [7]:
df = pd.DataFrame.from_dict(org_resume_dict, orient="index").reset_index()
df.rename(columns={"index": "url"}, inplace=True)

# Convert any dict/list column into a JSON string
for col in df.columns:
    df[col] = df[col].apply(
        lambda x: json.dumps(x, ensure_ascii=False) if isinstance(x, (dict, list)) else x
    )

# Save to Parquet
df.to_parquet("resumes.parquet", engine="pyarrow", index=False)
print("✅ Saved to resumes.parquet")


✅ Saved to resumes.parquet


In [23]:
# all_links = {}
# for cat_url in categories:
#     links = scrape_resume_links(cat_url, max_pages=1)
#     all_links[cat_url] = links
#     print(f"[✓] Collected {len(links)} resumes from {cat_url}")


# with open("resume_links.txt", "w") as f:
#     for cat, links in all_links.items():
#         f.write(f"# {cat}\n")
#         for link in links:
#             f.write(link + "\n")



# org_resume_dict = {}

# for domain_link in all_links.keys():
#     domain_resume_list = []
#     for resume_link in all_links[domain_link]: 
#         resume_dict = scrape_resume(resume_link)
#         domain_resume_list.append(resume_dict)
#         print(f"[✓] {resume_link}")
#     org_resume_dict[domain_link] = domain_resume_list
#     print(f"DOMAIN COMPLETED: {domain_link}")
    

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
from tqdm import tqdm
from pprint import pprint
import random
import json
from concurrent.futures import ThreadPoolExecutor, as_completed


categories = [
    "https://www.hireitpeople.com/resume-database/70-oracle-developers-resumes",
    "https://www.hireitpeople.com/resume-database/71-sap-resumes",
    "https://www.hireitpeople.com/resume-database/77-oracle-resumes",
    "https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes",
]

categories = [
        "https://www.hireitpeople.com/resume-database/77-oracle-resumes",
    "https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes",
]




def scrape_resume_links(category_url, max_pages=200):
    """
    Purpose: scrapes resume page URLs from a given category link that
    contains multiple paged resume listings. eg: https://www.hireitpeople.com/resume-database/63-net-developers-architects-resumes

    working logic:
    1. resume_links' to store the extracted URLs.
    2. define HTTP request headers to mimic a real web browser and avoid basic blocking mechanisms.
    3. session object is created for efficiency.
    4. loop runs from page 1 up to 'max_pages'. For each iteration:
         - constructs the page URL by appending '/page/{page}' to the category URL.
         - sends a GET request to that page.
         - if the response code is not 200, it assumes there are no more pages and stops.
         - html is parsed using bs4.
         - looks for resume links using the CSS selector 'table.hit-table h4 a'.
         - extracts the 'href' attribute of each <a> tag and adds it to the list.
         - until no links are found.
         - random delay (from 0.3 to 0.7 seconds) is added to avoid server overload or detection.
    5. returns a list of all collected resume page URLs.
    """
    resume_links = []
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/116.0 Safari/537.36"
    }

    session = requests.Session()
    session.headers.update(headers)

    for page in range(1, max_pages + 1):
        url = f"{category_url}/page/{page}"
        print(f"[+] Scraping {url}")

        r = session.get(url)
        if r.status_code != 200:
            print(f"[-] Page {page} returned {r.status_code}, stopping.")
            break

        soup = BeautifulSoup(r.text, "html.parser")
        links = [a['href'] for a in soup.select("table.hit-table h4 a")]

        if not links:  # No more resumes
            print(f"[-] No resumes found on page {page}, stopping.")
            break

        resume_links.extend(links)

        # small random delay (just to avoid hammering)
        time.sleep(random.uniform(0.3, 0.7))

    return resume_links

all_links = {}

with ThreadPoolExecutor(max_workers=len(categories)) as executor:
    futures = {executor.submit(scrape_resume_links, cat_url, max_pages=200): cat_url for cat_url in categories}
    for future in as_completed(futures):
        cat_url = futures[future]
        try:
            links = future.result()
            all_links[cat_url] = links
            print(f"[✓] Collected {len(links)} resumes from {cat_url}")
        except Exception as e:
            print(f"[!] Failed to scrape {cat_url}: {e}")

# flatten list
all_resume_links = [link for links in all_links.values() for link in links]


[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/1[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/1

[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/2
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/2
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/3
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/3
[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/4
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/4
[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/5[+] Scraping https://www.hireitpeople.com/resume-database/77-oracle-resumes/page/5

[+] Scraping https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/page/6
[+] Scraping https://www.hireitpeople.com/resume-database/77-or

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
import re
import requests
from typing import List, Optional, Tuple
from pydantic import BaseModel
from tqdm import tqdm


class Experience(BaseModel):
    job_role: str
    responsibilities: List[str]
    environment: Optional[str] = None


class Resume(BaseModel):
    job_role: str
    professional_summary: List[str]
    technical_skills: List[str]
    experiences: List[Experience]


class PostExtractionResult(BaseModel):
    """Result model for post extraction step."""
    job_role: Optional[str]
    structured_content: List[dict]
    full_text: str
    container_text: str
    missing_excerpt: str
    skipped_blocks: List[str]
    warnings: List[str]
    
def normalize_breaks(soup):
    """Convert <br> tags to newline text nodes so .get_text() uses them."""
    for br in soup.find_all("br"):
        br.replace_with("\n")

def clean_whitespace(text):
    lines = [ln.strip() for ln in text.splitlines()]
    while lines and lines[0] == "":
        lines.pop(0)
    while lines and lines[-1] == "":
        lines.pop()
    return "\n".join([re.sub(r'\s+', ' ', ln) for ln in lines])

def extract_job_role(soup):
    media_bodies = soup.find_all("div", class_=re.compile(r"media-body"))
    if media_bodies:
        media_body = media_bodies[0]
        job_title_tag = media_body.find("h3")
        if job_title_tag:
            job_role = job_title_tag.get_text(strip=True)
            if job_role:
                return job_role
    return None

def extract_post_body_safe(
    url: str,
    target_class: Optional[str] = None,
    class_regex: Optional[str] = None,
    allow_fallback: bool = True,
    debug: bool = False,
    min_word_threshold: int = 120,
    retries: int = 3,
) -> PostExtractionResult:
    headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64)"}
    
    for attempt in range(retries):
        try:
            with requests.Session() as session:
                session.headers.update(headers)
                resp = session.get(url, timeout=20)
                resp.raise_for_status()
                if len(resp.text) < 1000:
                    raise ValueError("Response too short.")
                break
        except Exception as e:
            if attempt == retries - 1:
                raise
            time.sleep(random.uniform(1, 3))
    soup = BeautifulSoup(resp.text, "html.parser")

    normalize_breaks(soup)

    # Identify container
    container = None
    if target_class:
        container = soup.find("div", class_=target_class)
    if not container and class_regex:
        container = soup.find("div", class_=re.compile(class_regex))
    if not container and allow_fallback:
        divs = soup.find_all("div")
        if divs:
            container = max(divs, key=lambda d: len(d.get_text(strip=True)))
    if not container:
        raise ValueError("Could not find a suitable container.")

    raw_container_text = container.get_text(separator="\n", strip=True)
    container_text = clean_whitespace(raw_container_text)

    structured_content = []
    skipped_blocks = []  # 🔹 Track skipped text blocks

    resume_job_role = extract_job_role(soup)

    # 2️⃣ Handle normal paragraphs and lists not under media-body
    for element in container.find_all(["p", "ul"], recursive=True):
        if element.find_parent("div", class_=re.compile(r"media-body")):
            continue  # already captured above

        if element.name == "p":
            text = clean_whitespace(" ".join(element.stripped_strings))
            # 🚫 Skip overly long text blocks
            if len(text.split()) > min_word_threshold:
                skipped_blocks.append(text[:120] + "...")
                continue
            if text:
                structured_content.append({"type": "p", "text": text})

        elif element.name == "ul":
            items = []
            for li in element.find_all("li", recursive=False):
                li_text = clean_whitespace(" ".join(li.stripped_strings))
                # 🚫 Skip overly long list items
                if len(li_text.split()) > min_word_threshold:
                    skipped_blocks.append(li_text[:120] + "...")
                    continue
                if li_text:
                    items.append(li_text)
            if items:
                structured_content.append({"type": "ul", "items": items})

    # Join all paragraph text for convenience
    joined_p = "\n\n".join(
        [b["text"] for b in structured_content if b.get("type") == "p"]
    )

    container_words = len(container_text.split())
    joined_words = len(joined_p.split()) if joined_p else 0

    warnings = []
    missing_excerpt = ""
    if container_words > joined_words + 20:
        temp = container_text
        for block in structured_content:
            if block.get("type") == "p":
                temp = temp.replace(block["text"], "")
            elif block.get("type") == "ul":
                for item in block["items"]:
                    temp = temp.replace(item, "")
            elif "job_role" in block:
                temp = temp.replace(block["job_role"], "")
                for sub in block.get("content", []):
                    if sub.get("type") == "p":
                        temp = temp.replace(sub["text"], "")
                    elif sub.get("type") == "ul":
                        for item in sub["items"]:
                            temp = temp.replace(item, "")
        missing_excerpt = temp.strip()[:800]
        if missing_excerpt:
            warnings.append("Container has additional text not captured by structured tags.")

    if "<script" in resp.text.lower() and (container_words == 0 or joined_words == 0):
        warnings.append("Page might be JS-rendered.")

    if debug:
        print("===== DEBUG INFO =====")
        print("Container classes:", container.get("class"))
        print("Job roles found:", sum(1 for b in structured_content if "job_role" in b))
        print("Paragraphs:", sum(1 for b in structured_content if b.get("type") == "p"))
        print("Lists:", sum(1 for b in structured_content if b.get("type") == "ul"))
        print("Skipped blocks:", len(skipped_blocks))
        print("Warnings:", warnings)
        print("======================")

    return PostExtractionResult(
        job_role=resume_job_role,
        structured_content=structured_content,
        full_text=joined_p,
        container_text=container_text,
        missing_excerpt=missing_excerpt,
        skipped_blocks=skipped_blocks,
        warnings=warnings,
    )

def parse_resume(json_data):
    # Initialize the resume structure
    resume = {
        "job_role": json_data.get("job_role", ""),
        "professional_summary": [],
        "technical_skills": [],
        "experiences": []
    }
    
    structured_content = json_data.get("structured_content", [])
    
    # Flags to track current section
    in_summary = False
    in_technical_skills = False
    in_professional_experience = False
    
    # Variables for experience parsing
    current_experience = None
    current_job_role = None
    current_responsibilities = []
    current_environment = None
    experience_started = False
    
    # Iterate through each element in structured_content
    i = 0
    while i < len(structured_content):
        element = structured_content[i]
        
        # Check if we're entering SUMMARY section
        if element["type"] == "p" and "SUMMARY" in element["text"]:
            in_summary = True
            in_technical_skills = False
            in_professional_experience = False
            i += 1
            continue
        
        # Check if we're entering TECHNICAL SKILLS section
        elif element["type"] == "p" and "TECHNICAL SKILLS" in element["text"]:
            in_summary = False
            in_technical_skills = True
            in_professional_experience = False
            i += 1
            continue
        
        # Check if we're entering PROFESSIONAL EXPERIENCE section
        elif element["type"] == "p" and "PROFESSIONAL EXPERIENCE" in element["text"]:
            in_summary = False
            in_technical_skills = False
            in_professional_experience = True
            i += 1
            continue
        
        # Process SUMMARY section
        elif in_summary:
            if element["type"] == "ul":
                resume["professional_summary"].extend(element["items"])
            i += 1
            continue
        
        # Process TECHNICAL SKILLS section
        elif in_technical_skills:
            if element["type"] == "p":
                # Skip the "TECHNICAL SKILLS:" header itself
                if "TECHNICAL SKILLS" not in element["text"]:
                    resume["technical_skills"].append(element["text"])
            i += 1
            continue
        
        # Process PROFESSIONAL EXPERIENCE section
        elif in_professional_experience:
            # Check for "Confidential" P tags to identify new experiences
            if element["type"] == "p" and "Confidential" in element["text"]:
                # Save previous experience if it exists and has data
                if experience_started and current_job_role:
                    experience_data = {
                        "job_role": current_job_role,
                        "responsibilities": current_responsibilities.copy()
                    }
                    if current_environment:
                        experience_data["environment"] = current_environment
                    resume["experiences"].append(experience_data)
                
                # Reset for new experience
                current_job_role = None
                current_responsibilities = []
                current_environment = None
                experience_started = True
                
                # The next P tag after "Confidential" should be the job role
                if i + 1 < len(structured_content):
                    next_element = structured_content[i + 1]
                    if next_element["type"] == "p":
                        current_job_role = next_element["text"]
                        i += 2  # Skip both confidential and job role
                    else:
                        i += 1
                else:
                    i += 1
                continue
            
            # Check for UL tags (responsibilities)
            elif element["type"] == "ul" and current_job_role:
                current_responsibilities.extend(element["items"])
                i += 1
                continue
            
            # Check for "Environment" P tags
            elif element["type"] == "p" and "Environment:" in element["text"]:
                current_environment = element["text"].replace("Environment:", "").strip()
                i += 1
                continue
            
            # Check for "Environment" without colon (some entries might have different formatting)
            elif element["type"] == "p" and "Environment" in element["text"] and current_job_role:
                current_environment = element["text"].replace("Environment", "").strip()
                if current_environment.startswith(":"):
                    current_environment = current_environment[1:].strip()
                i += 1
                continue
            
            # Regular P tag in experience section (might be environment or other info)
            elif element["type"] == "p":
                # If we don't have a job role yet but we're in an experience, this might be it
                if not current_job_role and "Confidential" not in element["text"] and "PROFESSIONAL EXPERIENCE" not in element["text"]:
                    current_job_role = element["text"]
                i += 1
                continue
            
            else:
                i += 1
                continue
        
        else:
            i += 1
            continue
    
    # Don't forget to add the last experience if it exists
    if experience_started and current_job_role:
        experience_data = {
            "job_role": current_job_role,
            "responsibilities": current_responsibilities.copy()
        }
        if current_environment:
            experience_data["environment"] = current_environment
        resume["experiences"].append(experience_data)
    
    return resume

import re
import json

def validate_structured_resume(json_data):
    structured_content = json_data.get("structured_content", [])
    errors = []
    i = 0
    n = len(structured_content)
    section_order = []

    def expect_type(index, expected_type, context):
        """Helper: validate type field"""
        if index >= n:
            errors.append(f"Missing element at {context}")
            return False
        if structured_content[index].get("type") != expected_type:
            errors.append(f"Expected type '{expected_type}' at {context}, got '{structured_content[index].get('type')}'")
            return False
        return True

    def expect_text(index, expected_text, context, case_sensitive=True, startswith=False):
        """Helper: validate text content"""
        if index >= n:
            errors.append(f"Missing element at {context}")
            return False
        text = structured_content[index].get("text", "")
        cmp_text = text if case_sensitive else text.upper()
        cmp_expected = expected_text if case_sensitive else expected_text.upper()

        if startswith:
            if not cmp_text.startswith(cmp_expected):
                errors.append(f"Expected text starting with '{expected_text}' at {context}, got '{text}'")
                return False
        elif cmp_text != cmp_expected:
            errors.append(f"Expected text '{expected_text}' at {context}, got '{text}'")
            return False
        return True

    # --- 1. OBJECTIVE (optional) ---
    if i < n and structured_content[i].get("text", "").strip().upper() == "OBJECTIVE":
        section_order.append("OBJECTIVE")
        expect_type(i, "p", "OBJECTIVE header")
        i += 1
        if i < n and structured_content[i].get("type") == "ul":
            if not isinstance(structured_content[i].get("items"), list):
                errors.append("OBJECTIVE ul should have a list of items")
            i += 1

    # --- 2. SUMMARY ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "SUMMARY":
        errors.append("Missing SUMMARY section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("SUMMARY")
    expect_type(i, "p", "SUMMARY header")
    i += 1

    expect_type(i, "ul", "SUMMARY bullets")
    if not isinstance(structured_content[i].get("items"), list):
        errors.append("SUMMARY ul must contain list of strings")
    i += 1

    # --- 3. TECHNICAL SKILLS ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "TECHNICAL SKILLS":
        errors.append("Missing TECHNICAL SKILLS section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("TECHNICAL SKILLS")
    expect_type(i, "p", "TECHNICAL SKILLS header")
    i += 1

    while i < n and structured_content[i].get("type") == "p" and structured_content[i].get("text", "").strip().upper() != "PROFESSIONAL EXPERIENCE":
        expect_type(i, "p", "TECHNICAL SKILL item")
        i += 1

    # --- 4. PROFESSIONAL EXPERIENCE ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "PROFESSIONAL EXPERIENCE":
        errors.append("Missing PROFESSIONAL EXPERIENCE section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("PROFESSIONAL EXPERIENCE")
    expect_type(i, "p", "PROFESSIONAL EXPERIENCE header")
    i += 1

    # --- 5. Validate experience blocks ---
    while i < n:
        # Company line
        if not (structured_content[i].get("type") == "p" and structured_content[i].get("text", "").startswith("Confidential")):
            errors.append(f"Expected company line starting with 'Confidential' at index {i}")
            break
        i += 1

        # Job title
        expect_type(i, "p", "Job Role")
        i += 1

        # Responsibilities
        if not (structured_content[i].get("type") == "p" and re.match(r"(?i)^responsibilities", structured_content[i].get("text", "").strip())):
            errors.append(f"Expected 'Responsibilities:' line at index {i}")
            break
        i += 1

        # Responsibilities list
        expect_type(i, "ul", "Responsibilities UL")
        if not isinstance(structured_content[i].get("items"), list):
            errors.append(f"Responsibilities UL at index {i} must contain a list of strings")
        i += 1

        # Environment line (optional)
        if i < n and structured_content[i].get("type") == "p" and structured_content[i].get("text", "").startswith("Environment"):
            i += 1

    return {"is_valid": len(errors) == 0, "errors": errors, "sections_found": section_order}

# Alternative version that returns a Resume object
def parse_resume_to_object(url: str) -> Tuple[Optional[Resume], Optional[str]]:
    """
    Returns (Resume object, failed_url_if_any).
    """
    try:
        scraped_data = extract_post_body_safe(
            url,
            class_regex=r"(single-post-body|post-content|entry-content|article-body)",
            allow_fallback=True
        )

        # Validate structure
        valid_check = validate_structured_resume(scraped_data.model_dump())
        if not valid_check["is_valid"]:
            return None, url  # Invalid resume structure

        parsed_data = parse_resume(scraped_data.model_dump())
        resume_obj = Resume(**parsed_data)
        return resume_obj, None  # success

    except Exception as e:
        # Any parsing/scraping failure
        return None, url



# --- Run concurrent scraping and merge results ---
def scrape_and_parse_all(all_resume_links: List[str], max_workers: int = 12):
    org_resume_dict = {}
    failed_urls = []

    print(f"[+] Scraping {len(all_resume_links)} resumes in parallel...")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(parse_resume_to_object, url): url for url in all_resume_links}

        for future in tqdm(as_completed(futures), total=len(futures), desc="Scraping resumes", unit="resume"):
            url = futures[future]
            try:
                resume_obj, failed_url = future.result()
                if resume_obj:
                    org_resume_dict[url] = resume_obj.model_dump()  # or keep as object
                if failed_url:
                    failed_urls.append(failed_url)
            except Exception as e:
                print(f"[!] Failed {url}: {e}")
                failed_urls.append(url)

    return org_resume_dict, failed_urls


# --- Example usage ---
# all_resume_links = [...]  # your list of resume URLs
with open("resume_links.txt", "r") as f:
    all_resume_links = f.readlines()

org_resume_dict, failed_urls = scrape_and_parse_all(all_resume_links)



print(f"\n✅ Successfully parsed {len(org_resume_dict)} resumes")
print(f"❌ Failed to parse {len(failed_urls)} resumes")



[+] Scraping 4000 resumes in parallel...


Scraping resumes: 100%|██████████| 4000/4000 [08:19<00:00,  8.01resume/s]  


✅ Successfully parsed 1096 resumes
❌ Failed to parse 2904 resumes





In [58]:
org_resume_dict_2, failed_urls_2 = scrape_and_parse_all(failed_urls)



print(f"\n✅ Successfully parsed {len(org_resume_dict_2)} resumes")
print(f"❌ Failed to parse {len(failed_urls_2)} resumes")

[+] Scraping 2904 resumes in parallel...


Scraping resumes: 100%|██████████| 2904/2904 [05:26<00:00,  8.90resume/s]


✅ Successfully parsed 0 resumes
❌ Failed to parse 2904 resumes





In [59]:
failed_urls_2

['https://www.hireitpeople.com/resume-database/77-oracle-resumes/626615-oracle-pl-sql-developer-resume-san-antonio-tx-33',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/626617-oracle-pl-sql-developer-resume-san-antonio-tx-35',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/625471-oracle-applications-functional-consultant-resume-moosic-pa',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627452-java-oracle-programmer-resume-ca',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627505-java-oracle-programmer-resume-palo-alto',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627267-oracle-dba-resume-minneapolis-mn-10',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627034-lead-oracle-database-administrator-resume-highlands-ranch-co-1',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/626958-account-tech-subject-matter-expert-tech-lead-resume-5',
 'https://www.hireitpeopl

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
import re
import requests
from typing import List, Optional, Tuple
from pydantic import BaseModel
from tqdm import tqdm


class Experience(BaseModel):
    job_role: str
    responsibilities: List[str]
    environment: Optional[str] = None


class Resume(BaseModel):
    job_role: str
    professional_summary: List[str]
    technical_skills: List[str]
    experiences: List[Experience]


class PostExtractionResult(BaseModel):
    """Result model for post extraction step."""
    job_role: Optional[str]
    structured_content: List[dict]
    full_text: str
    container_text: str
    missing_excerpt: str
    skipped_blocks: List[str]
    warnings: List[str]
    
def normalize_breaks(soup):
    """Convert <br> tags to newline text nodes so .get_text() uses them."""
    for br in soup.find_all("br"):
        br.replace_with("\n")

def clean_whitespace(text):
    lines = [ln.strip() for ln in text.splitlines()]
    while lines and lines[0] == "":
        lines.pop(0)
    while lines and lines[-1] == "":
        lines.pop()
    return "\n".join([re.sub(r'\s+', ' ', ln) for ln in lines])

def extract_job_role(soup):
    media_bodies = soup.find_all("div", class_=re.compile(r"media-body"))
    if media_bodies:
        media_body = media_bodies[0]
        job_title_tag = media_body.find("h3")
        if job_title_tag:
            job_role = job_title_tag.get_text(strip=True)
            if job_role:
                return job_role
    return None

def extract_post_body_safe(
    url: str,
    target_class: Optional[str] = None,
    class_regex: Optional[str] = None,
    allow_fallback: bool = True,
    debug: bool = False,
    min_word_threshold: int = 120,
    retries: int = 3,
) -> PostExtractionResult:
    headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64)"}
    
    for attempt in range(retries):
        try:
            with requests.Session() as session:
                session.headers.update(headers)
                resp = session.get(url, timeout=20)
                resp.raise_for_status()
                if len(resp.text) < 1000:
                    raise ValueError("Response too short.")
                break
        except Exception as e:
            if attempt == retries - 1:
                raise
            time.sleep(random.uniform(1, 3))
    soup = BeautifulSoup(resp.text, "html.parser")

    normalize_breaks(soup)

    # Identify container
    container = None
    if target_class:
        container = soup.find("div", class_=target_class)
    if not container and class_regex:
        container = soup.find("div", class_=re.compile(class_regex))
    if not container and allow_fallback:
        divs = soup.find_all("div")
        if divs:
            container = max(divs, key=lambda d: len(d.get_text(strip=True)))
    if not container:
        raise ValueError("Could not find a suitable container.")

    raw_container_text = container.get_text(separator="\n", strip=True)
    container_text = clean_whitespace(raw_container_text)

    structured_content = []
    skipped_blocks = []  # 🔹 Track skipped text blocks

    resume_job_role = extract_job_role(soup)

    # 2️⃣ Handle normal paragraphs and lists not under media-body
    for element in container.find_all(["p", "ul"], recursive=True):
        if element.find_parent("div", class_=re.compile(r"media-body")):
            continue  # already captured above

        if element.name == "p":
            text = clean_whitespace(" ".join(element.stripped_strings))
            # 🚫 Skip overly long text blocks
            if len(text.split()) > min_word_threshold:
                skipped_blocks.append(text[:120] + "...")
                continue
            if text:
                structured_content.append({"type": "p", "text": text})

        elif element.name == "ul":
            items = []
            for li in element.find_all("li", recursive=False):
                li_text = clean_whitespace(" ".join(li.stripped_strings))
                # 🚫 Skip overly long list items
                if len(li_text.split()) > min_word_threshold:
                    skipped_blocks.append(li_text[:120] + "...")
                    continue
                if li_text:
                    items.append(li_text)
            if items:
                structured_content.append({"type": "ul", "items": items})

    # Join all paragraph text for convenience
    joined_p = "\n\n".join(
        [b["text"] for b in structured_content if b.get("type") == "p"]
    )

    container_words = len(container_text.split())
    joined_words = len(joined_p.split()) if joined_p else 0

    warnings = []
    missing_excerpt = ""
    if container_words > joined_words + 20:
        temp = container_text
        for block in structured_content:
            if block.get("type") == "p":
                temp = temp.replace(block["text"], "")
            elif block.get("type") == "ul":
                for item in block["items"]:
                    temp = temp.replace(item, "")
            elif "job_role" in block:
                temp = temp.replace(block["job_role"], "")
                for sub in block.get("content", []):
                    if sub.get("type") == "p":
                        temp = temp.replace(sub["text"], "")
                    elif sub.get("type") == "ul":
                        for item in sub["items"]:
                            temp = temp.replace(item, "")
        missing_excerpt = temp.strip()[:800]
        if missing_excerpt:
            warnings.append("Container has additional text not captured by structured tags.")

    if "<script" in resp.text.lower() and (container_words == 0 or joined_words == 0):
        warnings.append("Page might be JS-rendered.")

    if debug:
        print("===== DEBUG INFO =====")
        print("Container classes:", container.get("class"))
        print("Job roles found:", sum(1 for b in structured_content if "job_role" in b))
        print("Paragraphs:", sum(1 for b in structured_content if b.get("type") == "p"))
        print("Lists:", sum(1 for b in structured_content if b.get("type") == "ul"))
        print("Skipped blocks:", len(skipped_blocks))
        print("Warnings:", warnings)
        print("======================")

    return PostExtractionResult(
        job_role=resume_job_role,
        structured_content=structured_content,
        full_text=joined_p,
        container_text=container_text,
        missing_excerpt=missing_excerpt,
        skipped_blocks=skipped_blocks,
        warnings=warnings,
    )

def parse_resume(json_data):
    # Initialize the resume structure
    resume = {
        "job_role": json_data.get("job_role", ""),
        "professional_summary": [],
        "technical_skills": [],
        "experiences": []
    }
    
    structured_content = json_data.get("structured_content", [])
    
    # Flags to track current section
    in_summary = False
    in_technical_skills = False
    in_professional_experience = False
    
    # Variables for experience parsing
    current_experience = None
    current_job_role = None
    current_responsibilities = []
    current_environment = None
    experience_started = False
    
    # Iterate through each element in structured_content
    i = 0
    while i < len(structured_content):
        element = structured_content[i]
        
        # Check if we're entering SUMMARY section
        if element["type"] == "p" and "SUMMARY" in element["text"]:
            in_summary = True
            in_technical_skills = False
            in_professional_experience = False
            i += 1
            continue
        
        # Check if we're entering TECHNICAL SKILLS section
        elif element["type"] == "p" and "TECHNICAL SKILLS" in element["text"]:
            in_summary = False
            in_technical_skills = True
            in_professional_experience = False
            i += 1
            continue
        
        # Check if we're entering PROFESSIONAL EXPERIENCE section
        elif element["type"] == "p" and "PROFESSIONAL EXPERIENCE" in element["text"]:
            in_summary = False
            in_technical_skills = False
            in_professional_experience = True
            i += 1
            continue
        
        # Process SUMMARY section
        elif in_summary:
            if element["type"] == "ul":
                resume["professional_summary"].extend(element["items"])
            i += 1
            continue
        
        # Process TECHNICAL SKILLS section
        elif in_technical_skills:
            if element["type"] == "p":
                # Skip the "TECHNICAL SKILLS:" header itself
                if "TECHNICAL SKILLS" not in element["text"]:
                    resume["technical_skills"].append(element["text"])
            i += 1
            continue
        
        # Process PROFESSIONAL EXPERIENCE section
        elif in_professional_experience:
            # Check for "Confidential" P tags to identify new experiences
            if element["type"] == "p" and "Confidential" in element["text"]:
                # Save previous experience if it exists and has data
                if experience_started and current_job_role:
                    experience_data = {
                        "job_role": current_job_role,
                        "responsibilities": current_responsibilities.copy()
                    }
                    if current_environment:
                        experience_data["environment"] = current_environment
                    resume["experiences"].append(experience_data)
                
                # Reset for new experience
                current_job_role = None
                current_responsibilities = []
                current_environment = None
                experience_started = True
                
                # The next P tag after "Confidential" should be the job role
                if i + 1 < len(structured_content):
                    next_element = structured_content[i + 1]
                    if next_element["type"] == "p":
                        current_job_role = next_element["text"]
                        i += 2  # Skip both confidential and job role
                    else:
                        i += 1
                else:
                    i += 1
                continue
            
            # Check for UL tags (responsibilities)
            elif element["type"] == "ul" and current_job_role:
                current_responsibilities.extend(element["items"])
                i += 1
                continue
            
            # Check for "Environment" P tags
            elif element["type"] == "p" and "Environment:" in element["text"]:
                current_environment = element["text"].replace("Environment:", "").strip()
                i += 1
                continue
            
            # Check for "Environment" without colon (some entries might have different formatting)
            elif element["type"] == "p" and "Environment" in element["text"] and current_job_role:
                current_environment = element["text"].replace("Environment", "").strip()
                if current_environment.startswith(":"):
                    current_environment = current_environment[1:].strip()
                i += 1
                continue
            
            # Regular P tag in experience section (might be environment or other info)
            elif element["type"] == "p":
                # If we don't have a job role yet but we're in an experience, this might be it
                if not current_job_role and "Confidential" not in element["text"] and "PROFESSIONAL EXPERIENCE" not in element["text"]:
                    current_job_role = element["text"]
                i += 1
                continue
            
            else:
                i += 1
                continue
        
        else:
            i += 1
            continue
    
    # Don't forget to add the last experience if it exists
    if experience_started and current_job_role:
        experience_data = {
            "job_role": current_job_role,
            "responsibilities": current_responsibilities.copy()
        }
        if current_environment:
            experience_data["environment"] = current_environment
        resume["experiences"].append(experience_data)
    
    return resume

import re
import json

def validate_structured_resume(json_data):
    structured_content = json_data.get("structured_content", [])
    errors = []
    i = 0
    n = len(structured_content)
    section_order = []

    def expect_type(index, expected_type, context):
        """Helper: validate type field"""
        if index >= n:
            errors.append(f"Missing element at {context}")
            return False
        if structured_content[index].get("type") != expected_type:
            errors.append(f"Expected type '{expected_type}' at {context}, got '{structured_content[index].get('type')}'")
            return False
        return True

    def expect_text(index, expected_text, context, case_sensitive=True, startswith=False):
        """Helper: validate text content"""
        if index >= n:
            errors.append(f"Missing element at {context}")
            return False
        text = structured_content[index].get("text", "")
        cmp_text = text if case_sensitive else text.upper()
        cmp_expected = expected_text if case_sensitive else expected_text.upper()

        if startswith:
            if not cmp_text.startswith(cmp_expected):
                errors.append(f"Expected text starting with '{expected_text}' at {context}, got '{text}'")
                return False
        elif cmp_text != cmp_expected:
            errors.append(f"Expected text '{expected_text}' at {context}, got '{text}'")
            return False
        return True

    # --- 1. OBJECTIVE (optional) ---
    if i < n and structured_content[i].get("text", "").strip().upper() == "OBJECTIVE":
        section_order.append("OBJECTIVE")
        expect_type(i, "p", "OBJECTIVE header")
        i += 1
        if i < n and structured_content[i].get("type") == "ul":
            if not isinstance(structured_content[i].get("items"), list):
                errors.append("OBJECTIVE ul should have a list of items")
            i += 1

    # --- 2. SUMMARY ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "SUMMARY":
        errors.append("Missing SUMMARY section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("SUMMARY")
    expect_type(i, "p", "SUMMARY header")
    i += 1

    expect_type(i, "ul", "SUMMARY bullets")
    if not isinstance(structured_content[i].get("items"), list):
        errors.append("SUMMARY ul must contain list of strings")
    i += 1

    # --- 3. TECHNICAL SKILLS ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "TECHNICAL SKILLS":
        errors.append("Missing TECHNICAL SKILLS section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("TECHNICAL SKILLS")
    expect_type(i, "p", "TECHNICAL SKILLS header")
    i += 1

    while i < n and structured_content[i].get("type") == "p" and structured_content[i].get("text", "").strip().upper() != "PROFESSIONAL EXPERIENCE":
        expect_type(i, "p", "TECHNICAL SKILL item")
        i += 1

    # --- 4. PROFESSIONAL EXPERIENCE ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "PROFESSIONAL EXPERIENCE":
        errors.append("Missing PROFESSIONAL EXPERIENCE section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("PROFESSIONAL EXPERIENCE")
    expect_type(i, "p", "PROFESSIONAL EXPERIENCE header")
    i += 1

    # --- 5. Validate experience blocks ---
    while i < n:
        # Company line
        if not (structured_content[i].get("type") == "p" and structured_content[i].get("text", "").startswith("Confidential")):
            errors.append(f"Expected company line starting with 'Confidential' at index {i}")
            break
        i += 1

        # Job title
        expect_type(i, "p", "Job Role")
        i += 1

        # Responsibilities
        if not (structured_content[i].get("type") == "p" and re.match(r"(?i)^responsibilities", structured_content[i].get("text", "").strip())):
            errors.append(f"Expected 'Responsibilities:' line at index {i}")
            break
        i += 1

        # Responsibilities list
        expect_type(i, "ul", "Responsibilities UL")
        if not isinstance(structured_content[i].get("items"), list):
            errors.append(f"Responsibilities UL at index {i} must contain a list of strings")
        i += 1

        # Environment line (optional)
        if i < n and structured_content[i].get("type") == "p" and structured_content[i].get("text", "").startswith("Environment"):
            i += 1

    return {"is_valid": len(errors) == 0, "errors": errors, "sections_found": section_order}

# Alternative version that returns a Resume object
def parse_resume_to_object(url: str) -> Tuple[Optional[Resume], Optional[str]]:
    """
    Returns (Resume object, failed_url_if_any).
    """
    try:
        scraped_data = extract_post_body_safe(
            url,
            class_regex=r"(single-post-body|post-content|entry-content|article-body)",
            allow_fallback=True
        )

        # Validate structure
        valid_check = validate_structured_resume(scraped_data.model_dump())
        if not valid_check["is_valid"]:
            return None, url  # Invalid resume structure

        parsed_data = parse_resume(scraped_data.model_dump())
        resume_obj = Resume(**parsed_data)
        return resume_obj, None  # success

    except Exception as e:
        # Any parsing/scraping failure
        return None, url

parsed_data = {}
failed_urls = []
for job in all_links.keys():
    for resume_url in all_links[job]:
        org_resume_dict, failed_urls = scrape_and_parse_all(resume_url)
        if org_resume_dict is not None:
            parsed_data[resume_url] = org_resume_dict
        if failed_urls is not None:
            failed_urls.append(resume_url)


print(f"\n✅ Successfully parsed {len(org_resume_dict)} resumes")
print(f"❌ Failed to parse {len(failed_urls)} resumes")



[+] Sequentially scraping and parsing 115 resumes...


Processing resumes: 100%|██████████| 115/115 [07:46<00:00,  4.05s/resume]


[+] Sequentially scraping and parsing 113 resumes...


Processing resumes: 100%|██████████| 113/113 [07:19<00:00,  3.89s/resume]


[+] Sequentially scraping and parsing 118 resumes...


Processing resumes: 100%|██████████| 118/118 [07:57<00:00,  4.05s/resume]


[+] Sequentially scraping and parsing 118 resumes...


Processing resumes: 100%|██████████| 118/118 [07:44<00:00,  3.94s/resume]


[+] Sequentially scraping and parsing 121 resumes...


Processing resumes: 100%|██████████| 121/121 [08:04<00:00,  4.00s/resume]


[+] Sequentially scraping and parsing 118 resumes...


Processing resumes: 100%|██████████| 118/118 [07:50<00:00,  3.98s/resume]


[+] Sequentially scraping and parsing 118 resumes...


Processing resumes: 100%|██████████| 118/118 [07:46<00:00,  3.96s/resume]


[+] Sequentially scraping and parsing 118 resumes...


Processing resumes: 100%|██████████| 118/118 [07:48<00:00,  3.97s/resume]


[+] Sequentially scraping and parsing 109 resumes...


Processing resumes:  92%|█████████▏| 100/109 [06:42<00:36,  4.03s/resume]


In [None]:
all

In [55]:
for job in all_links.keys():
    print(job)


https://www.hireitpeople.com/resume-database/77-oracle-resumes
https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes


In [49]:
all_resume_links

['https://www.hireitpeople.com/resume-database/77-oracle-resumes/628150-oracle-pl-sql-developer-resume-deerfield-il-1',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/628148-oracle-pl-sql-developer-resume-deerfield-il',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627839-oracle-pl-sql-developer-resume-san-antonio-tx-50',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627837-oracle-pl-sql-developer-resume-san-antonio-tx-48',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627789-oracle-database-administrator-resume-dubuque-iowa-2',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627754-oracle-pl-sql-developer-resume-san-antonio-tx-47',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627582-oracle-pl-sql-developer-resume-san-antonio-tx-42',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627581-oracle-pl-sql-developer-resume-san-antonio-tx-41',
 'https://www.hireitp

In [33]:
org_resume_dict[url] = data

In [34]:
org_resume_dict

{'https://www.hireitpeople.com/resume-database/77-oracle-resumes/628150-oracle-pl-sql-developer-resume-deerfield-il-1': Resume(job_role='Oracle Pl/sql Developer Resume', professional_summary=['Having 7 years of experience as Oracle PL/SQL developer and implementation of applications using Oracle Applications. Strong experience in Oracle RDBMS, PLSQL Development, Oracle 9i,10g,11g, SQL, PL/SQL, Developer 2000/Forms and Report 10g/6i, Java, HTML, UNIX and Workflow.', 'Experience in Design, Development, Implementation and support as Oracle Developer in Production, Development and Test Environments using Oracle Tools.', 'Experience in using Oracle tools like SQL * Loader, Toad, SQL Navigator, SQL Developer and Data Loader.', 'Developed SQL Loader control programs, interfaces and PL/SQL validation scripts to transfer flat file data into oracle database and Oracle Applications base tables.', 'Experienced in RDBMS design, data modeling, data normalization and SQL tuning using Indexes.', 'Good

In [36]:
import json

valid_data = {url: res.model_dump() for url, res in org_resume_dict.items() if res is not None}
failed_urls = [url for url, res in org_resume_dict.items() if res is None]

with open("structured_resumes.json", "w") as f:
    json.dump(valid_data, f, indent=4)

if failed_urls:
    with open("failed_urls.txt", "w") as f:
        f.write("\n".join(failed_urls))
    print(f"[!] {len(failed_urls)} resumes failed and were logged.")

[!] 2902 resumes failed and were logged.


In [31]:
import json

with open("scraped_resumes.json", "w") as f:
    json.dump({url: res.model_dump() for url, res in org_resume_dict.items()}, f, indent=2)

In [18]:
len(org_resume_dict.values())

3998

## Validator

In [None]:
def validate_structured_resume(json_data):
    structured_content = json_data.get("structured_content", [])
    errors = []
    i = 0
    n = len(structured_content)
    section_order = []

    def expect_type(index, expected_type, context):
        """Helper: validate type field"""
        if index >= n:
            errors.append(f"Missing element at {context}")
            return False
        if structured_content[index].get("type") != expected_type:
            errors.append(f"Expected type '{expected_type}' at {context}, got '{structured_content[index].get('type')}'")
            return False
        return True

    def expect_text(index, expected_text, context, case_sensitive=True, startswith=False):
        """Helper: validate text content"""
        if index >= n:
            errors.append(f"Missing element at {context}")
            return False
        text = structured_content[index].get("text", "")
        cmp_text = text if case_sensitive else text.upper()
        cmp_expected = expected_text if case_sensitive else expected_text.upper()

        if startswith:
            if not cmp_text.startswith(cmp_expected):
                errors.append(f"Expected text starting with '{expected_text}' at {context}, got '{text}'")
                return False
        elif cmp_text != cmp_expected:
            errors.append(f"Expected text '{expected_text}' at {context}, got '{text}'")
            return False
        return True

    # --- 1. OBJECTIVE (optional) ---
    if i < n and structured_content[i].get("text", "").strip().upper() == "OBJECTIVE":
        section_order.append("OBJECTIVE")
        expect_type(i, "p", "OBJECTIVE header")
        i += 1
        if i < n and structured_content[i].get("type") == "ul":
            if not isinstance(structured_content[i].get("items"), list):
                errors.append("OBJECTIVE ul should have a list of items")
            i += 1

    # --- 2. SUMMARY ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "SUMMARY":
        errors.append("Missing SUMMARY section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("SUMMARY")
    expect_type(i, "p", "SUMMARY header")
    i += 1

    expect_type(i, "ul", "SUMMARY bullets")
    if not isinstance(structured_content[i].get("items"), list):
        errors.append("SUMMARY ul must contain list of strings")
    i += 1

    # --- 3. TECHNICAL SKILLS ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "TECHNICAL SKILLS":
        errors.append("Missing TECHNICAL SKILLS section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("TECHNICAL SKILLS")
    expect_type(i, "p", "TECHNICAL SKILLS header")
    i += 1

    while i < n and structured_content[i].get("type") == "p" and structured_content[i].get("text", "").strip().upper() != "PROFESSIONAL EXPERIENCE":
        expect_type(i, "p", "TECHNICAL SKILL item")
        i += 1

    # --- 4. PROFESSIONAL EXPERIENCE ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "PROFESSIONAL EXPERIENCE":
        errors.append("Missing PROFESSIONAL EXPERIENCE section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("PROFESSIONAL EXPERIENCE")
    expect_type(i, "p", "PROFESSIONAL EXPERIENCE header")
    i += 1

    # --- 5. Validate experience blocks ---
    while i < n:
        # Company line
        if not (structured_content[i].get("type") == "p" and structured_content[i].get("text", "").startswith("Confidential")):
            errors.append(f"Expected company line starting with 'Confidential' at index {i}")
            break
        i += 1

        # Job title
        expect_type(i, "p", "Job Role")
        i += 1

        # Responsibilities
        if not (structured_content[i].get("type") == "p" and re.match(r"(?i)^responsibilities[:\s]*", structured_content[i].get("text", "").strip())):
            errors.append(f"Expected 'Responsibilities:' line at index {i}")
            break
        i += 1

        # Responsibilities list
        expect_type(i, "ul", "Responsibilities UL")
        if not isinstance(structured_content[i].get("items"), list):
            errors.append(f"Responsibilities UL at index {i} must contain a list of strings")
        i += 1

        # Environment line (optional)
        if i < n and structured_content[i].get("type") == "p" and re.match(r"(?i)^environment[:\s]", structured_content[i].get("text", "").strip()):
            i += 1


    return {"is_valid": len(errors) == 0, "errors": errors, "sections_found": section_order}

### New architecture

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
import re
import requests
from typing import List, Optional, Tuple
from pydantic import BaseModel
from tqdm import tqdm


class Experience(BaseModel):
    job_role: str
    responsibilities: List[str]
    environment: Optional[str] = None


class Resume(BaseModel):
    job_role: str
    professional_summary: List[str]
    technical_skills: List[str]
    experiences: List[Experience]


class PostExtractionResult(BaseModel):
    """Result model for post extraction step."""
    job_role: Optional[str]
    structured_content: List[dict]
    full_text: str
    container_text: str
    missing_excerpt: str
    skipped_blocks: List[str]
    warnings: List[str]
    
def normalize_breaks(soup):
    """Convert <br> tags to newline text nodes so .get_text() uses them."""
    for br in soup.find_all("br"):
        br.replace_with("\n")

def clean_whitespace(text):
    lines = [ln.strip() for ln in text.splitlines()]
    while lines and lines[0] == "":
        lines.pop(0)
    while lines and lines[-1] == "":
        lines.pop()
    return "\n".join([re.sub(r'\s+', ' ', ln) for ln in lines])

def extract_job_role(soup):
    media_bodies = soup.find_all("div", class_=re.compile(r"media-body"))
    if media_bodies:
        media_body = media_bodies[0]
        job_title_tag = media_body.find("h3")
        if job_title_tag:
            job_role = job_title_tag.get_text(strip=True)
            if job_role:
                return job_role
    return None

def extract_post_body_safe(
    url: str,
    target_class: Optional[str] = None,
    class_regex: Optional[str] = None,
    allow_fallback: bool = True,
    debug: bool = False,
    min_word_threshold: int = 120,
    retries: int = 3,
) -> PostExtractionResult:
    headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64)"}
    
    for attempt in range(retries):
        try:
            with requests.Session() as session:
                session.headers.update(headers)
                resp = session.get(url, timeout=20)
                resp.raise_for_status()
                if len(resp.text) < 1000:
                    raise ValueError("Response too short.")
                break
        except Exception as e:
            if attempt == retries - 1:
                raise
            time.sleep(random.uniform(1, 3))
    soup = BeautifulSoup(resp.text, "html.parser")

    normalize_breaks(soup)

    # Identify container
    container = None
    if target_class:
        container = soup.find("div", class_=target_class)
    if not container and class_regex:
        container = soup.find("div", class_=re.compile(class_regex))
    if not container and allow_fallback:
        divs = soup.find_all("div")
        if divs:
            container = max(divs, key=lambda d: len(d.get_text(strip=True)))
    if not container:
        raise ValueError("Could not find a suitable container.")

    raw_container_text = container.get_text(separator="\n", strip=True)
    container_text = clean_whitespace(raw_container_text)

    structured_content = []
    skipped_blocks = []  # 🔹 Track skipped text blocks

    resume_job_role = extract_job_role(soup)

    # 2️⃣ Handle normal paragraphs and lists not under media-body
    for element in container.find_all(["p", "ul"], recursive=True):
        if element.find_parent("div", class_=re.compile(r"media-body")):
            continue  # already captured above

        if element.name == "p":
            text = clean_whitespace(" ".join(element.stripped_strings))
            # 🚫 Skip overly long text blocks
            if len(text.split()) > min_word_threshold:
                skipped_blocks.append(text[:120] + "...")
                continue
            if text:
                structured_content.append({"type": "p", "text": text})

        elif element.name == "ul":
            items = []
            for li in element.find_all("li", recursive=False):
                li_text = clean_whitespace(" ".join(li.stripped_strings))
                # 🚫 Skip overly long list items
                if len(li_text.split()) > min_word_threshold:
                    skipped_blocks.append(li_text[:120] + "...")
                    continue
                if li_text:
                    items.append(li_text)
            if items:
                structured_content.append({"type": "ul", "items": items})

    # Join all paragraph text for convenience
    joined_p = "\n\n".join(
        [b["text"] for b in structured_content if b.get("type") == "p"]
    )

    container_words = len(container_text.split())
    joined_words = len(joined_p.split()) if joined_p else 0

    warnings = []
    missing_excerpt = ""
    if container_words > joined_words + 20:
        temp = container_text
        for block in structured_content:
            if block.get("type") == "p":
                temp = temp.replace(block["text"], "")
            elif block.get("type") == "ul":
                for item in block["items"]:
                    temp = temp.replace(item, "")
            elif "job_role" in block:
                temp = temp.replace(block["job_role"], "")
                for sub in block.get("content", []):
                    if sub.get("type") == "p":
                        temp = temp.replace(sub["text"], "")
                    elif sub.get("type") == "ul":
                        for item in sub["items"]:
                            temp = temp.replace(item, "")
        missing_excerpt = temp.strip()[:800]
        if missing_excerpt:
            warnings.append("Container has additional text not captured by structured tags.")

    if "<script" in resp.text.lower() and (container_words == 0 or joined_words == 0):
        warnings.append("Page might be JS-rendered.")

    if debug:
        print("===== DEBUG INFO =====")
        print("Container classes:", container.get("class"))
        print("Job roles found:", sum(1 for b in structured_content if "job_role" in b))
        print("Paragraphs:", sum(1 for b in structured_content if b.get("type") == "p"))
        print("Lists:", sum(1 for b in structured_content if b.get("type") == "ul"))
        print("Skipped blocks:", len(skipped_blocks))
        print("Warnings:", warnings)
        print("======================")

    return PostExtractionResult(
        job_role=resume_job_role,
        structured_content=structured_content,
        full_text=joined_p,
        container_text=container_text,
        missing_excerpt=missing_excerpt,
        skipped_blocks=skipped_blocks,
        warnings=warnings,
    )

import re

def parse_resume(json_data):
    resume = {
        "job_role": json_data.get("job_role", ""),
        "professional_summary": [],
        "technical_skills": [],
        "experiences": []
    }

    structured_content = json_data.get("structured_content", [])

    # Section flags
    in_summary = False
    in_technical_skills = False
    in_professional_experience = False

    # Experience tracking
    current_experience = None
    current_job_role = None
    current_responsibilities = []
    current_environment = None
    experience_started = False

    i = 0
    while i < len(structured_content):
        element = structured_content[i]
        text = element.get("text", "").strip()

        # ----------------------------
        # SECTION IDENTIFICATION
        # ----------------------------
        upper_text = text.upper()

        if element["type"] == "p" and "SUMMARY" in upper_text:
            in_summary, in_technical_skills, in_professional_experience = True, False, False
            i += 1
            continue

        elif element["type"] == "p" and (
            "TECHNICAL SKILLS" in upper_text or "AREAS OF EXPERIENCE" in upper_text
        ):
            # ④ Alternate label for technical skills handled here
            in_summary, in_technical_skills, in_professional_experience = False, True, False
            i += 1
            continue

        elif element["type"] == "p" and "PROFESSIONAL EXPERIENCE" in upper_text:
            in_summary, in_technical_skills, in_professional_experience = False, False, True
            i += 1
            continue

        # ----------------------------
        # SUMMARY SECTION
        # ----------------------------
        elif in_summary:
            if element["type"] == "ul":
                resume["professional_summary"].extend(element["items"])
            elif element["type"] == "p" and text:
                resume["professional_summary"].append(text)
            i += 1
            continue

        # ----------------------------
        # TECHNICAL SKILLS SECTION
        # ----------------------------
        elif in_technical_skills:
            # ⑤ Handle both <p> and <ul> tags
            if element["type"] == "p" and "TECHNICAL SKILLS" not in upper_text and "AREAS OF EXPERIENCE" not in upper_text:
                resume["technical_skills"].extend([s.strip() for s in re.split(r"[;,\n]", text) if s.strip()])
            elif element["type"] == "ul":
                resume["technical_skills"].extend(element["items"])
            i += 1
            continue

        # ----------------------------
        # PROFESSIONAL EXPERIENCE SECTION
        # ----------------------------
        elif in_professional_experience:
            # ① Handle “Confidential” text merging
            if element["type"] == "p" and "CONFIDENTIAL" in upper_text:
                # Finish the previous experience if valid
                if experience_started and current_job_role and current_responsibilities:
                    exp = {"job_role": current_job_role, "responsibilities": current_responsibilities.copy()}
                    if current_environment:
                        exp["environment"] = current_environment
                    resume["experiences"].append(exp)

                # Reset
                current_job_role = None
                current_responsibilities = []
                current_environment = None
                experience_started = True

                # Separate confidential if merged with text
                if not re.fullmatch(r"Confidential", text, re.IGNORECASE):
                    # e.g. "Confidential - Software Engineer"
                    parts = re.split(r"Confidential\s*[-:]\s*", text, flags=re.IGNORECASE)
                    if len(parts) > 1:
                        current_job_role = parts[1].strip()
                else:
                    # If purely "Confidential", check the next element for job role
                    if i + 1 < len(structured_content):
                        next_el = structured_content[i + 1]
                        if next_el["type"] in ["p", "strong"]:
                            current_job_role = next_el["text"].strip()
                            i += 1  # skip next
                i += 1
                continue

            # ② Handle <strong> tags for job role
            elif element["type"] == "strong":
                current_job_role = text
                i += 1
                continue

            # ③ Skip incomplete subsections (no job role but responsibilities found)
            elif element["type"] == "ul":
                if current_job_role:
                    current_responsibilities.extend(element["items"])
                i += 1
                continue

            # ⑥ Handle environment variations
            elif element["type"] == "p" and re.search(r"(?i)\bEnvironment\b", text):
                current_environment = re.sub(r"(?i)Environment[:\s]*", "", text).strip()
                i += 1
                continue

            # Handle other paragraphs (possibly job role if none yet)
            elif element["type"] == "p" and not current_job_role and "CONFIDENTIAL" not in upper_text:
                current_job_role = text
                i += 1
                continue

            else:
                i += 1
                continue

        else:
            i += 1
            continue

    # ----------------------------
    # ADD LAST EXPERIENCE IF VALID
    # ----------------------------
    if experience_started and current_job_role and current_responsibilities:
        exp = {"job_role": current_job_role, "responsibilities": current_responsibilities.copy()}
        if current_environment:
            exp["environment"] = current_environment
        resume["experiences"].append(exp)

    # ⑦ Filter out unhealthy experience subsections
    resume["experiences"] = [
        exp for exp in resume["experiences"]
        if exp.get("job_role") and exp.get("responsibilities")
    ]

    return resume


import re
import json

import re

def validate_structured_resume(json_data):
    structured_content = json_data.get("structured_content", [])
    errors = []
    i = 0
    n = len(structured_content)
    section_order = []

    def expect_type(index, expected_type, context):
        if index >= n:
            errors.append(f"Missing element at {context}")
            return False
        if structured_content[index].get("type") != expected_type:
            errors.append(f"Expected type '{expected_type}' at {context}, got '{structured_content[index].get('type')}'")
            return False
        return True

    def expect_text(index, expected_text, context, case_sensitive=False):
        if index >= n:
            errors.append(f"Missing element at {context}")
            return False
        text = structured_content[index].get("text", "")
        if not case_sensitive:
            if text.strip().upper() != expected_text.upper():
                errors.append(f"Expected text '{expected_text}' at {context}, got '{text}'")
                return False
        else:
            if text.strip() != expected_text:
                errors.append(f"Expected text '{expected_text}' at {context}, got '{text}'")
                return False
        return True

    # --- 1. OBJECTIVE (optional) ---
    if i < n and structured_content[i].get("text", "").strip().upper() == "OBJECTIVE":
        section_order.append("OBJECTIVE")
        expect_type(i, "p", "OBJECTIVE header")
        i += 1
        if i < n and structured_content[i].get("type") == "ul":
            if not isinstance(structured_content[i].get("items"), list):
                errors.append("OBJECTIVE ul should have a list of items")
            i += 1

    # --- 2. SUMMARY ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "SUMMARY":
        errors.append("Missing SUMMARY section (must be 'SUMMARY')")
        return {"is_valid": False, "errors": errors}

    section_order.append("SUMMARY")
    expect_type(i, "p", "SUMMARY header")
    i += 1

    expect_type(i, "ul", "SUMMARY bullets")
    if not isinstance(structured_content[i].get("items"), list):
        errors.append("SUMMARY ul must contain list of strings")
    i += 1

    # --- 3. TECHNICAL SKILLS / AREAS OF EXPERIENCE ---
    tech_header_text = structured_content[i].get("text", "").strip().upper() if i < n else ""
    if tech_header_text not in ["TECHNICAL SKILLS", "AREAS OF EXPERIENCE"]:
        errors.append("Missing TECHNICAL SKILLS or AREAS OF EXPERIENCE section (must be all caps)")
        return {"is_valid": False, "errors": errors}

    section_order.append("TECHNICAL SKILLS")
    expect_type(i, "p", "TECHNICAL SKILLS header")
    i += 1

    # Technical skills may appear as paragraphs until PROFESSIONAL EXPERIENCE
    while i < n:
        text = structured_content[i].get("text", "").strip().upper()
        if text == "PROFESSIONAL EXPERIENCE":
            break
        expect_type(i, "p", f"TECHNICAL SKILL item at index {i}")
        i += 1

    # --- 4. PROFESSIONAL EXPERIENCE ---
    if i >= n or structured_content[i].get("text", "").strip().upper() != "PROFESSIONAL EXPERIENCE":
        errors.append("Missing PROFESSIONAL EXPERIENCE section (must be 'PROFESSIONAL EXPERIENCE')")
        return {"is_valid": False, "errors": errors}

    section_order.append("PROFESSIONAL EXPERIENCE")
    expect_type(i, "p", "PROFESSIONAL EXPERIENCE header")
    i += 1

    # --- 5. Validate each experience block ---
    while i < n:
        text_upper = structured_content[i].get("text", "").strip().upper()

        # Company line must start with 'CONFIDENTIAL'
        if not (structured_content[i].get("type") == "p" and text_upper.startswith("CONFIDENTIAL")):
            errors.append(f"Expected 'Confidential' company line at index {i}, got '{structured_content[i].get('text', '')}'")
            break
        i += 1

        # Job title: can be <p> or <strong>
        if i < n:
            if structured_content[i].get("type") not in ["p", "strong"]:
                errors.append(f"Expected job role (<p> or <strong>) after Confidential at index {i}, got '{structured_content[i].get('type')}'")
                break
            i += 1
        else:
            errors.append("Missing job title after Confidential")
            break

        # Responsibilities line (strict)
        if i >= n or not re.match(r"(?i)^responsibilities", structured_content[i].get("text", "").strip()):
            errors.append(f"Expected 'Responsibilities:' line at index {i}")
            break
        expect_type(i, "p", "Responsibilities label")
        i += 1

        # Responsibilities list
        expect_type(i, "ul", "Responsibilities UL")
        if not isinstance(structured_content[i].get("items"), list):
            errors.append(f"Responsibilities UL at index {i} must contain a list of strings")
        i += 1

        # Optional Environment
        if i < n and structured_content[i].get("type") == "p" and re.match(r"(?i)^environment", structured_content[i].get("text", "").strip()):
            i += 1

    return {"is_valid": len(errors) == 0, "errors": errors, "sections_found": section_order}

# Alternative version that returns a Resume object
def parse_resume_to_object(url: str) -> Tuple[Optional[Resume], Optional[str]]:
    """
    Returns (Resume object, failed_url_if_any).
    """
    try:
        scraped_data = extract_post_body_safe(
            url,
            class_regex=r"(single-post-body|post-content|entry-content|article-body)",
            allow_fallback=True
        )

        # Validate structure
        valid_check = validate_structured_resume(scraped_data.model_dump())
        if not valid_check["is_valid"]:
            return None, url  # Invalid resume structure

        parsed_data = parse_resume(scraped_data.model_dump())
        resume_obj = Resume(**parsed_data)
        return resume_obj, None  # success

    except Exception as e:
        # Any parsing/scraping failure
        return None, url

def scrape_and_parse_all(all_resume_links: List[str], max_workers: int = 12):
    org_resume_dict = {}
    failed_urls = []

    print(f"[+] Scraping {len(all_resume_links)} resumes in parallel...")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(parse_resume_to_object, url): url for url in all_resume_links}

        for future in tqdm(as_completed(futures), total=len(futures), desc="Scraping resumes", unit="resume"):
            url = futures[future]
            try:
                resume_obj, failed_url = future.result()
                if resume_obj:
                    org_resume_dict[url] = resume_obj.model_dump()  # or keep as object
                if failed_url:
                    failed_urls.append(failed_url)
                    print(f"Failed to parse {failed_url}, total failed: {len(failed_urls)}")
            except Exception as e:
                print(f"[!] Failed {url}: {e}")
                failed_urls.append(url)

    return org_resume_dict, failed_urls


# --- Example usage ---
# all_resume_links = [...]  # your list of resume URLs

org_resume_dict, failed_urls = scrape_and_parse_all(all_resume_links)



print(f"\n✅ Successfully parsed {len(org_resume_dict)} resumes")
print(f"❌ Failed to parse {len(failed_urls)} resumes")



print(f"\n✅ Successfully parsed {len(org_resume_dict)} resumes")
print(f"❌ Failed to parse {len(failed_urls)} resumes")



[+] Scraping 4000 resumes in parallel...


Scraping resumes:   0%|          | 2/4000 [00:02<57:34,  1.16resume/s]  

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/628084-oracle-soa-bpel-consultant-resume-wilmington-delaware-1, total failed: 1


Scraping resumes:   0%|          | 20/4000 [00:03<07:09,  9.27resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/627269-oracle-dba-resume-ohio-1, total failed: 2
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/627033-lead-oracle-database-administrator-resume-highlands-ranch-co, total failed: 3
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/627035-lead-oracle-database-administrator-resume-highlands-ranch-co-2, total failed: 4


Scraping resumes:   1%|          | 27/4000 [00:05<10:37,  6.23resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/626059-oracle-dba-resume-wilmington-de-6, total failed: 5
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/626476-senior-oracle-dba-resume-dallas-tx-4, total failed: 6
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/626475-senior-oracle-dba-resume-dallas-tx-3, total failed: 7


Scraping resumes:   1%|          | 33/4000 [00:05<08:01,  8.24resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/625484-oracle-apps-dba-resume-pleasanton-ca, total failed: 8
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/625707-oracle-database-administrator-resume-827, total failed: 9
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/625711-oracle-database-administrator-resume-831, total failed: 10
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/625486-oracle-apps-dba-resume-pleasanton-ca-2, total failed: 11


Scraping resumes:   1%|          | 43/4000 [00:06<07:00,  9.41resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/625432-oracle-vcp-production-support-resume-sidney-oh-1, total failed: 12


Scraping resumes:   1%|▏         | 56/4000 [00:08<06:29, 10.14resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623766-oracle-apps-functional-consultant-resume-la-ca, total failed: 13
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623701-database-dba-developer-resume, total failed: 14
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623660-oracle-dba-resume-pittsburg-pa-3, total failed: 15


Scraping resumes:   1%|▏         | 59/4000 [00:08<05:24, 12.13resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623659-oracle-dba-resume-pittsburg-pa-2, total failed: 16
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623588-oracle-pl-sql-developer-resume-livingston-nj-2, total failed: 17


Scraping resumes:   2%|▏         | 61/4000 [00:08<06:21, 10.32resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623428-oracle-apps-technical-consultant-plsql-developer-resume-columbus-oh, total failed: 18


Scraping resumes:   2%|▏         | 66/4000 [00:09<07:49,  8.38resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623372-oracle-dba-resume-nn, total failed: 19
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623286-sr-oracle-dba-resume-san-francisco-ca-8, total failed: 20
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623418-application-programmer-it-analyst-resume-research-triangle-park-nc-1, total failed: 21
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623284-sr-oracle-dba-resume-san-francisco-ca-8, total failed: 22
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623017-oracle-dba-resume-detroit-mi-14, total failed: 23


Scraping resumes:   2%|▏         | 71/4000 [00:09<06:01, 10.88resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/623015-oracle-dba-resume-detroit-mi-11, total failed: 24
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/622799-sr-oracle-dba-resume-los-angeles-ca-7, total failed: 25


Scraping resumes:   2%|▏         | 82/4000 [00:10<05:27, 11.95resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/621160-sr-oracle-database-admin-resume-1, total failed: 26
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/621613-oracle-database-administrator-resume-columbus-oh-nnn, total failed: 27


Scraping resumes:   2%|▏         | 84/4000 [00:11<05:12, 12.53resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/620327-sr-database-administrator-resume-bloomington-il-3, total failed: 28
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/620326-sr-database-administrator-resume-bloomington-il-2, total failed: 29


Scraping resumes:   2%|▏         | 88/4000 [00:12<08:22,  7.78resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/620047-oracle-dba-resume-austin, total failed: 30


Scraping resumes:   2%|▏         | 95/4000 [00:12<05:53, 11.04resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/619897-sr-oracle-dba-resume-atlanta-ga-22, total failed: 31
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/619445-senior-oracle-functional-consultant-resume-hoboken-nj-1, total failed: 32
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/619383-senior-oracle-functional-consultant-resume-east-greenville-pa-1, total failed: 33


Scraping resumes:   2%|▎         | 100/4000 [00:13<08:35,  7.57resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/619280-oracle-technical-consultant-resume-sanjose-ca, total failed: 34
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/619166-oracle-techno-functional-consultant-resume-sanjose-ca, total failed: 35


Scraping resumes:   3%|▎         | 107/4000 [00:13<06:04, 10.69resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/617103-oracle-dba-resume-itasca-il, total failed: 36
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/616371-senior-dba-resume-bartlesville-oklahoma, total failed: 37


Scraping resumes:   3%|▎         | 110/4000 [00:14<08:14,  7.87resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/615938-oracle-dba-consultant-resume-phoenix-az, total failed: 38


Scraping resumes:   3%|▎         | 112/4000 [00:14<09:04,  7.14resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/615904-oracle-database-administrator-resume-richardson-tx-3, total failed: 39
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/615553-senior-oracle-dba-and-developer-resume-mechanicsburg-pa, total failed: 40
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/615626-sme-implementation-engineer-resume-roseland-nj, total failed: 41
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/615886-system-analyst-resume-422, total failed: 42


Scraping resumes:   3%|▎         | 130/4000 [00:16<05:50, 11.04resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/614415-sr-oracle-sql-dba-middleware-resume-irving-tx, total failed: 43
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/614224-sr-oracle-dba-resume-columbus-ohio-1, total failed: 44
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/614223-sr-oracle-dba-resume-columbus-ohio, total failed: 45


Scraping resumes:   3%|▎         | 136/4000 [00:17<08:14,  7.82resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613778-oracle-database-administrator-resume-dallas-tx-21, total failed: 46
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613614-senior-oracle-database-consultant-resume-piscataway-nj-4, total failed: 47


Scraping resumes:   3%|▎         | 138/4000 [00:17<09:11,  7.01resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613484-sr-consultant-resume-new-york-7, total failed: 48
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613308-sr-oracle-dba-resume-nc-6, total failed: 49
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613240-oracle-database-administrator-resume-816, total failed: 50
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613229-sr-oracle-soa-bpel-osb-b2b-developer-resume-shelton-ct-5, total failed: 51
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613309-sr-oracle-dba-resume-nc-8, total failed: 52
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613238-oracle-database-administrator-resume-814, total failed: 53
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/613199-oracle-dba-resume-houston-tx-25, total 

Scraping resumes:   4%|▎         | 148/4000 [00:18<07:49,  8.20resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/612968-hyperion-consultant-resume-bellevue-wa, total failed: 56


Scraping resumes:   4%|▍         | 154/4000 [00:19<07:30,  8.54resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/612770-lead-oracle-apps-database-consultant-resume-o-fallon-mo-12, total failed: 57
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/612901-oracle-ebs-technical-lead-resume-maumee-oh, total failed: 58
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/612767-sr-oracle-dba-resume-ri-2, total failed: 59


Scraping resumes:   4%|▍         | 159/4000 [00:19<05:56, 10.78resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/612748-db2-data-base-administrator-resume-columbus-4, total failed: 60


Scraping resumes:   4%|▍         | 163/4000 [00:20<10:17,  6.22resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/612255-sr-teradata-consultant-dba-development-performance-tuning-resume, total failed: 61
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/612404-oracle-ebs-senior-techno-functional-consultant-resume-cincinnati-oh-4, total failed: 62
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/611682-lead-oracle-apps-database-consultant-resume-o-fallon-mo-9, total failed: 63
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/611864-lead-oracle-apps-database-consultant-resume-o-fallon-mo-10, total failed: 64


Scraping resumes:   4%|▍         | 167/4000 [00:21<10:10,  6.28resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/611632-db2-assessment-expert-resume-state-of-texas, total failed: 65
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/611738-consultant-resume-stamford-ct-2, total failed: 66
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/611828-sr-oracle-apps-dba-resume-salem-or-1, total failed: 67


Scraping resumes:   4%|▍         | 172/4000 [00:22<08:47,  7.25resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/611587-sr-oracle-dba-resume-louisville-ky-5, total failed: 68
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/611585-sr-oracle-dba-resume-louisville-ky-3, total failed: 69


Scraping resumes:   4%|▍         | 175/4000 [00:23<13:16,  4.80resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/610661-oracle-dba-resume-knoxville-tn-3, total failed: 70
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/610659-oracle-dba-resume-knoxville-tn-1, total failed: 71


Scraping resumes:   5%|▍         | 184/4000 [00:23<07:03,  9.00resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/610053-sr-oracle-administrator-resume-ny-1, total failed: 72
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/609881-oracle-dba-resume-oklahoma-city-oklahoma, total failed: 73
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/609883-oracle-dba-resume-oklahoma-city-oklahoma-2, total failed: 74
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/609788-sr-oracle-dba-resume-louisville-ky-2, total failed: 75


Scraping resumes:   5%|▍         | 186/4000 [00:24<08:57,  7.10resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/609786-sr-oracle-dba-resume-louisville-ky, total failed: 76


Scraping resumes:   5%|▍         | 194/4000 [00:25<06:20, 10.01resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/609015-oracle-database-administrator-resume-alexandria-va-1, total failed: 77
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608962-senior-business-intelligence-consultant-resume-foster-city-ca-2, total failed: 78
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608821-senior-obiee-biapps-consultant-resume-pheonix-az-2, total failed: 79
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608850-senior-obiee-consultant-resume-milpitas-ca-7, total failed: 80
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608960-senior-business-intelligence-consultant-resume-foster-city-ca, total failed: 81


Scraping resumes:   5%|▍         | 199/4000 [00:26<10:07,  6.26resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608734-techno-functional-consultant-resume-mountainview-ca-4, total failed: 82


Scraping resumes:   5%|▌         | 207/4000 [00:26<05:36, 11.28resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608671-lead-oracle-apps-database-consultant-resume-orlando-fl-1, total failed: 83
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608586-lead-oracle-apps-database-consultant-resume-o-fallon-mo-7, total failed: 84
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608546-oracle-dba-resume-1327, total failed: 85


Scraping resumes:   5%|▌         | 212/4000 [00:27<08:30,  7.42resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608079-sr-oracle-dba-resume-nc-5, total failed: 86
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608050-production-oracle-dba-resume-charlotte-nc, total failed: 87
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608078-sr-oracle-dba-resume-nc-3, total failed: 88


Scraping resumes:   6%|▌         | 220/4000 [00:27<05:14, 12.01resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607690-exadata-and-oracle-middleware-administrator-resume-california-1, total failed: 89
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/608049-apps-dba-oracle-dba-resume-houston-texas, total failed: 90
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607912-senior-oracle-dba-mysql-dba-resume-monterey-ca, total failed: 91
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607964-oracle-apps-dba-security-resume, total failed: 92
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607687-sr-oracle-applications-dba-resume-santa-ana-california, total failed: 93


Scraping resumes:   6%|▌         | 226/4000 [00:28<06:28,  9.72resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607481-oracle-ebs-senior-techno-functional-consultant-resume-cincinnati-oh, total failed: 94
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607499-teradata-dba-sql-server-deployment-apps-development-sr-specialist-resume-nashville-tn, total failed: 95
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607277-oracle-dba-resume-1326, total failed: 96


Scraping resumes:   6%|▌         | 230/4000 [00:29<06:49,  9.20resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607107-database-adminitrator-resume-research-triangle-park-nc, total failed: 97
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607055-principal-dba-resume-detroit-mi, total failed: 98
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/607402-data-architect-sr-dba-resume-orlando-fl, total failed: 99
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606751-consultant-database-administrator-resume-fremont-ca, total failed: 100
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606707-sr-oracle-database-administrator-dba-resume-md, total failed: 101


Scraping resumes:   6%|▌         | 238/4000 [00:29<05:52, 10.68resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606674-teradata-dba-resume-rochester-ny, total failed: 102
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606621-oracle-database-administration-dba-resume, total failed: 103
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606653-senior-oracle-dba-microsoft-sql-server-dba-mysql-dba-resume-monterey-ca, total failed: 104


Scraping resumes:   6%|▌         | 240/4000 [00:30<07:28,  8.38resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606539-senior-oracle-dba-resume-aliquippa-pa-3, total failed: 105
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606537-senior-oracle-dba-resume-aliquippa-pa-1, total failed: 106


Scraping resumes:   6%|▌         | 242/4000 [00:30<07:51,  7.97resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606154-epic-cache-dba-resume, total failed: 107
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606534-production-database-administrator-resume-dallas-tx, total failed: 108
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606433-oracle-dba-product-specialist-resume-baltimore-md, total failed: 109
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606436-oracle-dba-mobile-data-admin-resume-arlington-va, total failed: 110
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606536-production-database-administrator-resume-dallas-tx-2, total failed: 111
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/606414-lead-database-architect-resume-birmingham-al, total failed: 112


Scraping resumes:   6%|▌         | 249/4000 [00:31<05:35, 11.16resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605866-db2-database-administrator-resume-tn, total failed: 113
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605855-senior-oracle-apps-dba-resume-tx, total failed: 114


Scraping resumes:   6%|▋         | 251/4000 [00:31<07:33,  8.28resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605485-oracle-database-administrator-resume-aberdeen-proving-grounds-md, total failed: 115
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605846-sr-oracle-dba-resume-ri-1, total failed: 116


Scraping resumes:   6%|▋         | 255/4000 [00:32<07:17,  8.56resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605261-senior-dba-resume-55, total failed: 117
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605454-db2-data-base-administrator-resume-columbus-3, total failed: 118
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605117-oracle-database-administrator-resume-785, total failed: 119
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605235-oracle-dba-resume-1324, total failed: 120
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605248-lead-sme-dba-database-architect-resume-rtp-nc, total failed: 121


Scraping resumes:   7%|▋         | 261/4000 [00:32<04:54, 12.71resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605058-sr-hadoop-administrator-resume-san-jose, total failed: 122
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/604949-banner-ods-administrator-oracle-dba-resume-oklahoma-city, total failed: 123
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/605050-oracle-dba-resume-1323, total failed: 124
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/604933-oracle-database-administrator-resume-784, total failed: 125


Scraping resumes:   7%|▋         | 263/4000 [00:32<07:15,  8.58resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/604905-sr-dba-resume-brisbane-ca-1, total failed: 126


Scraping resumes:   7%|▋         | 273/4000 [00:33<05:03, 12.29resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/604275-obia-consultant-architect-resume-1, total failed: 127
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/604232-aws-solution-architect-resume-sanjose-ca, total failed: 128
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/604205-primary-support-engineer-resume, total failed: 129
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/604198-oracle-database-consultant-resume-14, total failed: 130
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/604173-sr-oracle-dba-resume-maryland-3, total failed: 131


Scraping resumes:   7%|▋         | 280/4000 [00:35<09:21,  6.63resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603853-database-administrator-resume-jacksonville-fl-6, total failed: 132
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603829-senior-consultant-db2-dba-codeworks-resume-madison-wi, total failed: 133


Scraping resumes:   7%|▋         | 289/4000 [00:35<06:08, 10.07resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603635-senior-hyperion-consultant-resume-san-francisco, total failed: 134


Scraping resumes:   7%|▋         | 293/4000 [00:36<08:32,  7.23resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603616-database-administrator-resume-atlanta-ga-11, total failed: 135
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603154-oracle-dba-resume-atlanta-ga-11, total failed: 136
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603179-oracle-dba-resume-philadelphia-pa-1, total failed: 137
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603181-oracle-dba-resume-philadelphia-pa-3, total failed: 138
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603247-senior-it-architect-consultant-resume-wa, total failed: 139


Scraping resumes:   7%|▋         | 299/4000 [00:36<04:18, 14.31resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/603153-oracle-dba-resume-atlanta-ga-10, total failed: 140
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602926-jr-oracle-dba-resume-tx, total failed: 141


Scraping resumes:   8%|▊         | 302/4000 [00:37<05:26, 11.32resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602901-technical-architect-oracle-apps-dba-resume-richardson-tx, total failed: 142
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602851-senior-oracle-database-administrator-resume-chicago-il-4, total failed: 143
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602595-mongodb-dba-manager-database-operations-resume-atlanta-ga, total failed: 144


Scraping resumes:   8%|▊         | 307/4000 [00:37<07:34,  8.13resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602739-technical-analyst-resume-200, total failed: 145
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602358-python-developer-resume-1572, total failed: 146
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602546-oracle-dba-resume-irving-texas-4, total failed: 147
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602357-python-developer-resume-1571, total failed: 148
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/601928-vp-dba-resume-newyork-ny, total failed: 149
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/602440-teradata-database-administrator-resume-bellevue-wa, total failed: 150


Scraping resumes:   8%|▊         | 313/4000 [00:38<05:24, 11.37resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/601565-dba-oracle-sr-mysql-ms-sql-server-resume-pleasanton-ca, total failed: 151
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/601180-senior-oracle-dba-with-exadata-architect-resume-buffalo-ny, total failed: 152


Scraping resumes:   8%|▊         | 315/4000 [00:38<07:56,  7.73resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/600821-sr-oracle-dba-resume-731, total failed: 153
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/600686-oracle-applications-dba-resume-jacksonville, total failed: 154


Scraping resumes:   8%|▊         | 319/4000 [00:39<07:17,  8.41resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/600091-data-architect-oracle-dba-resume, total failed: 155
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/600570-senior-operations-professional-oracle-dba-resume, total failed: 156


Scraping resumes:   8%|▊         | 321/4000 [00:39<06:30,  9.41resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/599700-party-master-lead-resume-washington-dc, total failed: 157
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/599954-infrastructure-project-manager-and-coordinator-resume-jersey-city-nj, total failed: 158
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/599780-data-base-architect-sr-oracle-dba-resume-woodbridge-new-jersey, total failed: 159
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/599907-sr-oracle-dba-goldengate-sme-paraccel-dba-resume, total failed: 160


Scraping resumes:   8%|▊         | 333/4000 [00:41<08:07,  7.53resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/599139-db2-data-base-administrator-resume-columbus-1, total failed: 161
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/599177-senior-oracle-dba-resume-ca-9, total failed: 162
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/599140-db2-data-base-administrator-resume-columbus-2, total failed: 163
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/599179-senior-oracle-dba-resume-ca-11, total failed: 164


Scraping resumes:   9%|▊         | 341/4000 [00:42<07:31,  8.10resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/598801-db2-data-base-administrator-resume-richmond-va, total failed: 165


Scraping resumes:   9%|▊         | 349/4000 [00:42<05:21, 11.36resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/598364-informix-dba-resume-omaha-ne-2, total failed: 166
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/598393-senior-dba-database-architect-and-performance-engineer-resume-atlanta-ga, total failed: 167


Scraping resumes:   9%|▉         | 351/4000 [00:43<05:52, 10.35resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/597990-oracle-dba-resume-1305, total failed: 168


Scraping resumes:   9%|▉         | 353/4000 [00:43<06:52,  8.84resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/597949-software-engineer-perl-developer-db2-dba-resume-austin-tx, total failed: 169
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/597989-oracle-dba-resume-1304, total failed: 170


Scraping resumes:   9%|▉         | 357/4000 [00:44<09:01,  6.73resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/597534-oracle-applications-specialist-resume, total failed: 171


Scraping resumes:   9%|▉         | 365/4000 [00:44<06:21,  9.52resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/597496-sr-oracle-cloud-dba-consultant-resume-los-angeles-area-ca-2, total failed: 172
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/597495-sr-oracle-dba-system-engineer-resume-chicago-illinois, total failed: 173
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/597493-oracle-dba-production-support-consultant-resume-raleigh-nc, total failed: 174


Scraping resumes:   9%|▉         | 374/4000 [00:46<07:05,  8.52resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596849-managing-developer-owner-resume, total failed: 175
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596782-lead-oracle-apps-database-consultant-resume-o-fallon-mo-5, total failed: 176
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596783-lead-oracle-apps-database-consultant-resume-o-fallon-mo-6, total failed: 177


Scraping resumes:   9%|▉         | 378/4000 [00:46<07:16,  8.29resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596685-lead-oracle-apps-database-consultant-resume-o-fallon-mo-3, total failed: 178
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596683-lead-oracle-apps-database-consultant-resume-o-fallon-mo-1, total failed: 179


Scraping resumes:   9%|▉         | 379/4000 [00:46<07:59,  7.56resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596640-lead-oracle-apps-database-consultant-resume-o-fallon-mo, total failed: 180
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596638-lead-oracle-apps-database-consultant-resume-o-fallon-mo, total failed: 181


Scraping resumes:  10%|▉         | 386/4000 [00:47<05:54, 10.18resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596590-oracle-identity-access-manager-11g-r2-siteminder-administrator-resume-san-francisco-ca, total failed: 182
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/596438-dba-pb-team-lead-resume, total failed: 183


Scraping resumes:  10%|▉         | 392/4000 [00:48<06:43,  8.93resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/595712-oracle-applications-dba-resume-knoxville-tn, total failed: 184
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/595714-senior-oracle-database-administrator-resume-warren-nj, total failed: 185


Scraping resumes:  10%|▉         | 394/4000 [00:49<11:19,  5.31resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/595688-postgresql-cassandra-dba-resume-atlanta-ga, total failed: 186
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/595366-sr-oracle-dba-resume-madison-wi-1, total failed: 187
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594883-oracle-pl-sql-developer-dba-resume-los-angeles-ca, total failed: 188
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/595364-sr-oracle-apps-dba-resume-atlanta-georgia-1, total failed: 189


Scraping resumes:  10%|█         | 401/4000 [00:49<06:09,  9.74resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594673-devops-engineer-and-oracle-dba-resume, total failed: 190


Scraping resumes:  10%|█         | 403/4000 [00:49<06:58,  8.59resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594611-techno-functional-consultant-resume-mountainview-ca-1, total failed: 191


Scraping resumes:  10%|█         | 413/4000 [00:50<05:44, 10.41resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594333-lead-oracle-developer-resume-chicago-il, total failed: 192
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594539-techno-functional-consultant-resume-palo-alto-ca-1, total failed: 193
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594300-python-developer-resume-1564, total failed: 194
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594174-big-data-developer-resume-virginia-1, total failed: 195
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594316-sr-oracle-cloud-dba-consultant-resume-pleasanton-ca-2, total failed: 196
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/594350-senior-oracle-database-administrator-resume-san-mateo-california, total failed: 197
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle

Scraping resumes:  10%|█         | 416/4000 [00:51<06:26,  9.27resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/593974-database-support-service-lead-resume-camden-nj, total failed: 199


Scraping resumes:  10%|█         | 419/4000 [00:52<13:04,  4.56resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/593242-senior-oracle-dba-resume-pa-2, total failed: 200
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592871-oracle-sql-boston-ma-resume, total failed: 201
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/593240-senior-oracle-dba-resume-pa, total failed: 202
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/593556-senior-physical-oracle-dba-resume, total failed: 203
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592767-oracle-senior-dba-resume, total failed: 204
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/593152-senior-soa-consultant-resume-houston-tx-1, total failed: 205


Scraping resumes:  11%|█         | 429/4000 [00:53<09:15,  6.43resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592756-oracle-dba-resume-memphis-tennessee-2, total failed: 206


Scraping resumes:  11%|█         | 431/4000 [00:54<11:12,  5.31resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592598-senior-oracle-dba-resume-ny-manhattan-ny, total failed: 207
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592753-ms-sql-dba-resume-22, total failed: 208
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592625-db2-database-administrator-resume-dallas-tx, total failed: 209
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592623-db2-database-administrator-resume-dallas-tx, total failed: 210


Scraping resumes:  11%|█         | 441/4000 [00:55<06:59,  8.48resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592221-datawarehouse-etl-developer-resume-bethpage-ny, total failed: 211


Scraping resumes:  11%|█         | 445/4000 [00:56<10:31,  5.63resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591657-senior-build-release-engineer-resume-k, total failed: 212
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/592176-lead-oracle-dba-resume-green-bay-wisconsin, total failed: 213


Scraping resumes:  11%|█▏        | 457/4000 [00:57<07:29,  7.89resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591639-oracle-dba-resume-l, total failed: 214
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591523-oracle-developer-resume-mortgage-ia-p4, total failed: 215
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591614-sr-data-modeler-data-analyst-resume-z, total failed: 216
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591645-sr-oracle-administrator-resume-m, total failed: 217
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591452-oracle-database-administrator-resume-dallas-texas-2, total failed: 218


Scraping resumes:  12%|█▏        | 461/4000 [00:58<07:09,  8.24resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591404-sr-oracle-apps-dba-resume-irving-tx, total failed: 219
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591067-odi-consultant-resume-atlanta-1, total failed: 220
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/591075-oracle-database-consultant-resume-12, total failed: 221


Scraping resumes:  12%|█▏        | 470/4000 [00:59<06:32,  8.98resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/590794-oracle-developer-resume-mi-1, total failed: 222
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/590782-senior-oracle-dba-resume-rockford-il, total failed: 223


Scraping resumes:  12%|█▏        | 476/4000 [00:59<05:26, 10.81resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/590376-apps-dba-resume-los-angeles-ca-1, total failed: 224
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/590012-data-modeler-analyst-resume-harrisburg-pa, total failed: 225
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/590375-apps-dba-resume-los-angeles-ca, total failed: 226


Scraping resumes:  12%|█▏        | 480/4000 [01:00<06:27,  9.09resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589907-oracle-dba-resume-1279, total failed: 227
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589906-oracle-dba-resume-1278, total failed: 228
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589799-cloud-architect-resume-houston-tx-4, total failed: 229


Scraping resumes:  12%|█▏        | 482/4000 [01:00<06:58,  8.41resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589739-sr-oracle-dba-resume-washington-dc-10, total failed: 230


Scraping resumes:  12%|█▏        | 488/4000 [01:01<04:47, 12.23resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589540-oracle-cloud-payroll-consultant-resume-palo-alto-ca-1, total failed: 231
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589373-oracle-dba-resume-1276, total failed: 232
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589539-oracle-cloud-hcm-consultant-resume-princeton-nj, total failed: 233
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589371-oracle-dba-resume-1274, total failed: 234


Scraping resumes:  12%|█▏        | 490/4000 [01:01<07:23,  7.91resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589152-oracle-apps-dba-resume-long-beach-2, total failed: 235
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589079-oracle-applications-dba-support-resume-lehi-ut-2, total failed: 236


Scraping resumes:  12%|█▏        | 492/4000 [01:01<07:01,  8.32resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/589077-oracle-applications-dba-ebs-implementation-support-resume-ashburn-va, total failed: 237
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588772-db2-luw-dba-resume-denver-colorado, total failed: 238
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588813-etl-team-lead-developer-resume-new-york-ny, total failed: 239


Scraping resumes:  12%|█▏        | 494/4000 [01:02<06:48,  8.59resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588733-sr-oracle-database-applications-admin-resume-scottsdale-az-1, total failed: 240
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588694-sr-oracle-dba-and-oracle-goldengate-administrator-resume-irving-tx, total failed: 241


Scraping resumes:  12%|█▎        | 500/4000 [01:02<05:41, 10.25resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588526-senior-oracle-application-dba-resume-seattle-wa-1, total failed: 242
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588410-oracle-ebs-financial-functional-consultant-resume-shrewsbury-ma, total failed: 243
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588453-oracle-ebs-financial-functional-consultant-resume-atlanta-ga-4, total failed: 244


Scraping resumes:  13%|█▎        | 505/4000 [01:03<08:42,  6.68resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588301-sr-oracle-dba-resume-atlanta-ga-20, total failed: 245
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/588258-oracle-fusion-middleware-administrator-resume-sacramento-california-1, total failed: 246


Scraping resumes:  13%|█▎        | 507/4000 [01:03<07:41,  7.57resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/587850-sr-architect-resume-portland-maine-1, total failed: 247


Scraping resumes:  13%|█▎        | 509/4000 [01:04<07:56,  7.32resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/587646-senior-oracle-application-dba-resume-seattle-wa, total failed: 248
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/587608-lead-dba-resume-princeton-new-jersey, total failed: 249
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/587609-oracle-dba-resume-bentonville-arkansas, total failed: 250


Scraping resumes:  13%|█▎        | 513/4000 [01:04<08:47,  6.61resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/587344-sr-dba-resume-sterling-va, total failed: 251
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/587220-enterprise-data-architect-modeler-etl-specialist-resume-whippany-nj, total failed: 252
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/586843-oracle-adf-developer-resume-10, total failed: 253


Scraping resumes:  13%|█▎        | 519/4000 [01:05<06:33,  8.84resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/586256-oracle-dba-resume-il-25, total failed: 254
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/586622-senior-oracle-apps-techno-functional-consultant-resume-11, total failed: 255


Scraping resumes:  13%|█▎        | 521/4000 [01:05<06:53,  8.42resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/586243-oracle-dba-resume-il-23, total failed: 256
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/586254-oracle-dba-with-rac-golden-gate-resume-il, total failed: 257
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/586253-oracle-dba-resume-il-24, total failed: 258
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585923-sr-oracle-dba-resume-fl-3, total failed: 259


Scraping resumes:  13%|█▎        | 525/4000 [01:06<07:26,  7.79resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585850-lead-oracle-apps-dba-resume-new-york-1, total failed: 260
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585849-lead-oracle-apps-dba-resume-new-york, total failed: 261


Scraping resumes:  13%|█▎        | 529/4000 [01:06<06:04,  9.54resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585745-oracle-database-administrator-resume-plano-tx-5, total failed: 262


Scraping resumes:  13%|█▎        | 532/4000 [01:07<07:15,  7.96resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585270-oracle-dba-and-database-designer-resume-baltimore-md-9, total failed: 263
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585708-oracle-pl-sql-developer-resume-richmond-virginia, total failed: 264
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585269-oracle-dba-and-database-designer-resume-baltimore-md-8, total failed: 265
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585641-oracle-developer-analyst-resume-los-angeles-ca, total failed: 266
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585647-oracle-apps-dba-lead-resume-shelton-ct-1, total failed: 267


Scraping resumes:  14%|█▎        | 540/4000 [01:07<05:09, 11.18resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585268-oracle-dba-and-database-designer-resume-baltimore-md-7, total failed: 268
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585265-oracle-dba-and-database-designer-resume-baltimore-md-4, total failed: 269
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585267-oracle-dba-and-database-designer-resume-baltimore-md-6, total failed: 270
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585266-oracle-dba-and-database-designer-resume-baltimore-md-5, total failed: 271


Scraping resumes:  14%|█▎        | 542/4000 [01:07<05:47,  9.94resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585263-oracle-dba-and-database-designer-resume-baltimore-md-2, total failed: 272
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585262-oracle-dba-and-database-designer-resume-baltimore-md-1, total failed: 273
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585264-oracle-dba-and-database-designer-resume-baltimore-md-3, total failed: 274


Scraping resumes:  14%|█▎        | 546/4000 [01:08<06:11,  9.31resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585260-oracle-dba-and-database-designer-resume, total failed: 275
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585261-oracle-dba-and-database-designer-resume-baltimore-md, total failed: 276
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/585038-sr-postgresql-dba-resume-11, total failed: 277


Scraping resumes:  14%|█▍        | 553/4000 [01:09<05:30, 10.44resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/584775-oracle-database-administrator-resume-ca-2, total failed: 278
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/584465-fusion-cloud-scm-functional-analyst-resume-st-louis-mo-1, total failed: 279
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/584148-senior-financials-functional-analyst-resume-st-louis-mo-1, total failed: 280
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/584371-senior-database-consultant-resume-washington-dc-4, total failed: 281


Scraping resumes:  14%|█▍        | 556/4000 [01:09<05:58,  9.60resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/584103-senior-technical-consultant-resume-winston-salem-nc, total failed: 282


Scraping resumes:  14%|█▍        | 558/4000 [01:09<05:58,  9.61resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/583421-database-administrator-engineering-l2-l3-support-resume-peapack-gladstone-nj-2, total failed: 283
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/583420-database-administrator-engineering-l2-l3-support-resume-peapack-gladstone-nj-1, total failed: 284


Scraping resumes:  14%|█▍        | 565/4000 [01:10<05:19, 10.76resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/583105-oracle-database-administrator-resume-bloomfield-ct, total failed: 285
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/583010-sr-oracle-plsql-developer-resume-oneonta-ny, total failed: 286
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582885-sr-oracle-dba-resume-710, total failed: 287


Scraping resumes:  14%|█▍        | 568/4000 [01:10<05:47,  9.86resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582849-senior-oracle-dba-resume-waltham-ma-2, total failed: 288


Scraping resumes:  14%|█▍        | 570/4000 [01:10<05:52,  9.72resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582751-oracle-applications-techno-functional-consultant-resume-california-2, total failed: 289
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582781-techno-functional-lead-consultant-resume-5, total failed: 290
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582767-oracle-application-database-administrator-resume-san-diego-california, total failed: 291


Scraping resumes:  14%|█▍        | 574/4000 [01:11<06:12,  9.20resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582626-oracle-dba-resume-new-jersey-14, total failed: 292
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582624-oracle-dba-resume-new-jersey-12, total failed: 293


Scraping resumes:  14%|█▍        | 575/4000 [01:11<06:22,  8.95resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582606-senior-oracle-database-administrator-resume-san-francisco-california-4, total failed: 294
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582625-oracle-dba-resume-new-jersey-13, total failed: 295
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582558-senior-oracle-database-administrator-resume-oaks-pennsylvania-8, total failed: 296


Scraping resumes:  14%|█▍        | 579/4000 [01:11<05:20, 10.66resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582556-senior-oracle-database-administrator-resume-oaks-pennsylvania-6, total failed: 297


Scraping resumes:  15%|█▍        | 583/4000 [01:12<05:18, 10.74resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582509-senior-team-lead-powerbuilder-dba-resume-amarillo-tx, total failed: 298
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582391-senior-oracle-database-administrator-resume-tx-4, total failed: 299
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582390-senior-oracle-database-administrator-resume-tx-2, total failed: 300


Scraping resumes:  15%|█▍        | 589/4000 [01:13<06:50,  8.30resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/582246-oracle-ebs-apps-dba-consultant-resume-3, total failed: 301


Scraping resumes:  15%|█▍        | 596/4000 [01:13<03:52, 14.66resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/581848-oracle-dba-resume-1263, total failed: 302
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/581903-senior-database-developer-oracle-dba-resume-atlanta-ga-1, total failed: 303
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/581682-senior-oracle-database-administrator-resume-oaks-pennsylvania-4, total failed: 304
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/581847-oracle-dba-resume-1262, total failed: 305
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/581722-sr-dba-db2-pure-scale-implementation-dba-resume-sacramento-ca, total failed: 306


Scraping resumes:  15%|█▍        | 598/4000 [01:13<06:03,  9.36resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/581488-oracle-financials-functional-resume-san-jose-ca-1, total failed: 307


Scraping resumes:  15%|█▌        | 602/4000 [01:14<06:05,  9.30resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580895-oracle-dba-resume-1260, total failed: 308
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580865-sr-oracle-apps-dba-resume-atlanta-ga, total failed: 309
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580864-sr-oracle-apps-dba-resume-dubuque-ia, total failed: 310


Scraping resumes:  15%|█▌        | 606/4000 [01:14<05:32, 10.21resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580750-oracle-pl-sql-developer-resume-nyc-ny-4, total failed: 311
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580645-oracle-ebs-apps-dba-consultant-resume-2, total failed: 312
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580489-sr-architect-resume-seattle-wa-2, total failed: 313


Scraping resumes:  15%|█▌        | 609/4000 [01:14<05:52,  9.63resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580474-tech-lead-senior-developer-resume-15, total failed: 314


Scraping resumes:  15%|█▌        | 615/4000 [01:15<05:26, 10.38resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580206-sr-data-warehouse-consultant-lead-resume-seatte-wa-1, total failed: 315
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/580089-oracle-developer-report-writer-resume-plano-tx, total failed: 316


Scraping resumes:  16%|█▌        | 621/4000 [01:16<05:23, 10.43resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/579584-postgresql-dba-developer-resume-chicago-il, total failed: 317
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/579494-oracle-dba-resume-tx-4, total failed: 318


Scraping resumes:  16%|█▌        | 626/4000 [01:16<05:30, 10.20resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/579492-oracle-dba-resume-tx-2, total failed: 319
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/579049-application-developer-resume-926, total failed: 320
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/579311-oracle-dba-resume-tx-1, total failed: 321


Scraping resumes:  16%|█▌        | 628/4000 [01:16<05:26, 10.34resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/578661-oracle-financial-functional-consultant-resume-seal-beach-ca-1, total failed: 322


Scraping resumes:  16%|█▌        | 637/4000 [01:18<06:00,  9.33resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/578005-oracle-developer-resume-freeport-ny, total failed: 323


Scraping resumes:  16%|█▌        | 643/4000 [01:18<05:01, 11.12resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/577892-oracle-pl-sql-developer-resume-boston-ma-29, total failed: 324
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/577735-sr-oracle-apps-dba-consultant-resume-red-wood-city-ca, total failed: 325
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/577737-sr-oracle-apps-dba-consultant-resume-red-wood-city-ca-2, total failed: 326


Scraping resumes:  16%|█▌        | 645/4000 [01:18<05:54,  9.46resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/577547-database-administrator-resume-florida, total failed: 327
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/577656-sr-technical-consultant-oracle-dba-resume-san-jose-ca, total failed: 328


Scraping resumes:  16%|█▌        | 647/4000 [01:19<06:32,  8.53resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/577324-sr-oracle-database-administrator-resume-atlanta-ga-6, total failed: 329


Scraping resumes:  16%|█▋        | 651/4000 [01:19<05:56,  9.40resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/577411-oracle-pl-sql-developer-resume-boston-ma-28, total failed: 330
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/577323-sr-oracle-database-administrator-resume-atlanta-ga-5, total failed: 331


Scraping resumes:  16%|█▋        | 656/4000 [01:19<05:08, 10.84resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/576991-database-administrator-resume-reston-va-6, total failed: 332


Scraping resumes:  16%|█▋        | 658/4000 [01:20<05:39,  9.83resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/576930-oracle-technical-consultant-resume-reston-va-1, total failed: 333


Scraping resumes:  17%|█▋        | 663/4000 [01:20<05:18, 10.49resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/576712-oracle-soa-resume-allentown-pa-3, total failed: 334


Scraping resumes:  17%|█▋        | 668/4000 [01:21<05:11, 10.70resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/576210-sr-oracle-application-developer-resume-tx-1, total failed: 335
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/576275-database-consultant-resume-walnut-creek-ca-1, total failed: 336
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/576075-senior-oracle-dba-resume-orlando-fl-1, total failed: 337


Scraping resumes:  17%|█▋        | 670/4000 [01:21<05:25, 10.24resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/576074-senior-oracle-dba-resume-orlando-fl, total failed: 338
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/576073-senior-oracle-dba-resume-orlando-fl, total failed: 339


Scraping resumes:  17%|█▋        | 672/4000 [01:21<07:15,  7.64resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575999-oracle-soa-resume-allentown-pa-1, total failed: 340


Scraping resumes:  17%|█▋        | 682/4000 [01:22<05:00, 11.05resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575793-dba-database-developer-resume-bethlehem-pa-1, total failed: 341
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575781-senior-oracle-dba-resume-ny-1, total failed: 342
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575740-oracle-dba-resume-atlanta-chicago, total failed: 343
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575737-oracle-database-administrator-resume-columbus-oh-12, total failed: 344


Scraping resumes:  17%|█▋        | 684/4000 [01:23<07:25,  7.45resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575628-oracle-dba-resume-chicago-il-35, total failed: 345
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575629-oracle-dba-resume-chicago-il-35, total failed: 346


Scraping resumes:  17%|█▋        | 689/4000 [01:23<05:16, 10.45resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575387-oracle-apps-dba-resume-boston-ma-3, total failed: 347
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575386-oracle-apps-dba-resume-boston-ma-2, total failed: 348


Scraping resumes:  17%|█▋        | 693/4000 [01:23<05:45,  9.57resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575318-oracle-microsoft-sql-server-dba-resume-bay-city-mi, total failed: 349
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575320-oracle-microsoft-sql-server-dba-resume-bay-city-mi-2, total failed: 350


Scraping resumes:  17%|█▋        | 698/4000 [01:24<07:05,  7.77resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575041-oracle-dba-resume-1234, total failed: 351
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575042-oracle-dba-resume-1235, total failed: 352
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575055-oracle-pl-sql-developer-resume-oh-12, total failed: 353
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575195-oracle-rac-dba-resume-irving-tx, total failed: 354
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/575120-oracle-database-administrator-resume-herndon-va-1, total failed: 355


Scraping resumes:  18%|█▊        | 710/4000 [01:26<07:16,  7.54resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574480-oracle-apps-dba-resume-north-richland-hills-tx-2, total failed: 356
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574478-oracle-apps-dba-resume-north-richland-hills-tx, total failed: 357
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574479-oracle-apps-dba-resume-north-richland-hills-tx-1, total failed: 358


Scraping resumes:  18%|█▊        | 719/4000 [01:26<05:09, 10.61resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574334-oracle-developer-resume-451, total failed: 359
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574349-oracle-apps-dba-resume-boston-ma-1, total failed: 360
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574255-oracle-apps-dba-resume-sunnyvale-ca-2, total failed: 361


Scraping resumes:  18%|█▊        | 721/4000 [01:27<07:54,  6.90resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574254-oracle-apps-dba-resume-sunnyvale-ca-1, total failed: 362


Scraping resumes:  18%|█▊        | 722/4000 [01:27<11:02,  4.95resume/s]

Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574198-team-leader-resume-nj-1, total failed: 363
Failed to parse https://www.hireitpeople.com/resume-database/78-oracle-dba-resumes/574197-team-leader-resume-nj, total failed: 364


Scraping resumes:  18%|█▊        | 733/4000 [01:29<06:37,  8.22resume/s]


In [3]:
all_resume_links

['https://www.hireitpeople.com/resume-database/77-oracle-resumes/628150-oracle-pl-sql-developer-resume-deerfield-il-1',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/628148-oracle-pl-sql-developer-resume-deerfield-il',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627839-oracle-pl-sql-developer-resume-san-antonio-tx-50',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627837-oracle-pl-sql-developer-resume-san-antonio-tx-48',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627789-oracle-database-administrator-resume-dubuque-iowa-2',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627754-oracle-pl-sql-developer-resume-san-antonio-tx-47',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627582-oracle-pl-sql-developer-resume-san-antonio-tx-42',
 'https://www.hireitpeople.com/resume-database/77-oracle-resumes/627581-oracle-pl-sql-developer-resume-san-antonio-tx-41',
 'https://www.hireitp