In [None]:
import os
from pathlib import Path

# Ensure notebook runs with MlServer as working directory
repo_root = Path.cwd()
mlserver_dir = repo_root / "MlServer"
if repo_root.name != "MlServer":
    if mlserver_dir.exists():
        os.chdir(mlserver_dir)
    else:
        raise FileNotFoundError("MlServer directory not found relative to current working directory.")

print(f"cwd set to: {Path.cwd()}")

In [None]:
# Load model
from utils.model_loader import ModelLoader

model_loader = ModelLoader()
model_loader.load_model()

In [None]:
%load_ext autoreload
%autoreload 2

%aimport utils.parsers.parse_pdf
%aimport utils.extractors.contact_extractor
%aimport utils.extractors.work_experience_extractor
%aimport utils.extractors.education_extractor
%aimport utils.extractors.skill_extractor

In [None]:
# Imports
import time
import torch

from utils.parsers import parse_pdf
from utils.extractors import ContactExtractor, WorkExperienceExtractor, EducationExtractor, SkillExtractor

In [None]:
data_dir = Path("data")

In [None]:
# Load all resumes from data/resumes directory
resume_files = sorted((data_dir / "resumes").glob("*.pdf"))
for resume_file in resume_files:
    print(resume_file.name)


In [None]:
# Process all resumes
resume_texts = {}

for resume_file in resume_files:
    print(f'\n{"=" * 50}')
    print(resume_file.name)
    print("=" * 50)
    
    resume_text = parse_pdf(resume_file)
    print(f"{resume_text[:100]}")

    resume_texts[resume_file.stem] = resume_text

#### Contact information

In [None]:
for file_name, resume_text in resume_texts.items():
    print(f'\n{"=" * 50}')
    print(file_name)
    print("=" * 50)

    contact_extractor = ContactExtractor(resume_text)
    contact_info = contact_extractor.extract_contact()

    print(contact_info)
    

#### Work Experience

In [None]:
for file_name, resume_text in resume_texts.items():
    print(f'\n{"=" * 50}')
    print(file_name)
    print("=" * 50)

    work_experience_extractor = WorkExperienceExtractor(resume_text)
    
    start = time.time()
    work_experience = work_experience_extractor.extract()
    end = time.time()

    print(work_experience)      
    print(f"Generation time: {end - start:.2f} seconds")

#### Education

In [None]:
for file_name, resume_text in resume_texts.items():
    print(f'\n{"=" * 50}')
    print(file_name)
    print("=" * 50)

    education_extractor = EducationExtractor(resume_text)

    start = time.time()
    education = education_extractor.extract()
    end = time.time()

    print(education)
    print(f"Generation time: {end - start:.2f} seconds")

#### Skills

In [None]:
for file_name, resume_text in resume_texts.items():
    print(f'\n{"=" * 50}')
    print(file_name)
    print("=" * 50)

    skill_extractor = SkillExtractor(resume_text)

    start = time.time()
    skills = skill_extractor.extract()
    end = time.time()

    print(skills)
    print(f"Generation time: {end - start:.2f} seconds")