# Resume Chain

Steps:

1. Read in the resume and the job spec.
2. Scores the resume in terms of fit based on the job spec.
3. Makes suggestions to improve the scoring of the match.

In [1]:
import os

from langchain.llms import OpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

OPENAI_API_KEY = ''
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

## Load Resume

Precondition Checks:

1. If the pdf has 2 or less pages.
2. If the document is that of a resume.

## Extracting Information From the Resume 

In [39]:
import os

class ResumeExtractor(object):
    def __init__(self, path : str) -> None:

        # Precondition checks.
        if path == "" or path == None:
            raise ValueError(f"The path provided: {path} is not a valid path.")
        if not path.endswith(".pdf"):
            raise ValueError(f"The path provided: {path} is not a valid pdf path.") 

        self.path = path
        loader = UnstructuredPDFLoader(path)
        self.pages  = loader.load_and_split()
        if (len(self.pages) > 1):
            raise ValueError(f"The resume provided has more than 1 page. Please send a resume with just one page.")
        embeddings = OpenAIEmbeddings()

        # Just one page resumes accepted.
        self.docsearch = Chroma.from_documents([self.pages[0]], embeddings).as_retriever(search_kwargs={ "k": 1 })
        self.chain = load_qa_chain(OpenAI(temperature=0, max_tokens=3000), chain_type="stuff")

    def ask(self, question : str) -> str:
        docs = self.docsearch.get_relevant_documents(question)
        output = self.chain.run(input_documents=docs, question=question)
        return output

    def extract_details(self) -> str:
        query_to_extract_info = """Using the document, answer the following questions and output valid json with property names enclosed with double quotes with keys: "is_resume", "skills", "years_of_experience", "experience_summary", "achievements", "highest_education", "specialization":

        1. Is this document of a resume? Answer in "True" or "False". The answer should correspond to the "is_resume" key.
        2. What are the candidates skills? The answer should be a json list associated with the "skills" key.
        3. How many years of experience does the candidate have? The answer should correspond to the "years_of_experience" key.
        4. Based on the candidate's experience, extract achievements that are backed by numbers that the candidate has made in the form of a json list associated with the "achievements" key.
        5. What is the candidate's highest education? The answer should either be: High School, Bachelors, Masters, PhD or specify NA if you don't know. This answer should correspond to the "highest_education" key.
        6. What is the candidate's major or field of study? The answer should correspond to the "specialization" key."""
        return self.ask(query_to_extract_info)

In [13]:
software_engineer_doc            = ResumeExtractor("./ResumeExamples/SoftwareEngineer.pdf")
investment_banking_associate_doc = ResumeExtractor("./ResumeExamples/InvestmentBankingAssociate.pdf")
business_development_manager_doc = ResumeExtractor("./ResumeExamples/BusinessDevelopmentManager.pdf")

Using embedded DuckDB without persistence: data will be transient
Using embedded DuckDB without persistence: data will be transient
Using embedded DuckDB without persistence: data will be transient


In [14]:
software_engineer_doc.pages[0].page_content

'FIRST LAST Bay Area, California • +1-234-456-789 • professionalemail@resumeworded.com • linkedin.com/in/username\n\nSoftware Engineer with over six years of experience in full-stack development and leading product cycle from conception to completion. Guided a team of 5-15 members through 5+ product launches at a recent experience in a high growth technology startup.\n\nPROFESSIONAL EXPERIENCE\n\nResume Worded, New York, NY Software Engineer\n\n2020 – Present\n\nCreated an invoicing system for subscription services that managed monthly invoices and\n\nprinted invoices to be sent to customers; increased conversion rate by 15%.\n\nCollaborated with internal teams, including graphic design and QA testers to develop and\n\nlaunch a new application in just 6 months, ahead of schedule by 6 months.\n\nWrote reusable unit test documents to ensure quality control and detect bugs by increasing\n\nover 35% efﬁciency rate.\n\nGrowthsi, San Diego, CA Software Engineer\n\n2016 – 2020\n\nAnalyzed inf

In [10]:
business_dev_manager_details = business_development_manager_doc.extract_details()
software_engineering_details = software_engineer_doc.extract_details()

In [11]:
business_dev_manager_details

'\n{"is_resume": "True", "skills": ["Microsoft Access", "Excel", "CRM", "D2C ECommerce", "SEO/SEM", "Project Management Professional (PMP)", "ABC Certification", "Certified Business Analysis Professional (2013)"], "years_of_experience": 8, "experience_summary": "Generated 300+ leads per month in a team of 5+ sales and marketing employees. Landed 25 global accounts in 8 months through new account strategy. Cut costs by $1 million annually through redesigned marketing spending. Collaborated with leadership, engineering, and marketing teams to develop new offerings that raised sales $300k. Increased customer retention by 25% through facilitating the addition of new product lines unexplored by competitors. Persuaded 5 partners to invest over $200,000 in other entities. Coordinated and hosted 15+ conferences, events, and trips for the inside sales team of 10. Generated new business and long-term account opportunities through prospecting and cold-calling, resulting in over $60k in closed new

In [12]:
import json

business_dev_manager_details = json.loads(business_dev_manager_details)
software_engineering_details = json.loads(software_engineering_details)

In [16]:
print(software_engineering_details)

{'is_resume': 'True', 'skills': ['CSS', 'Javascript', 'Python', 'Advanced SAP', 'HTML and XML', 'Scrum Methodology', 'Database management software', 'Software Development Life Cycle'], 'years_of_experience': 6, 'experience_summary': 'Software Engineer with over six years of experience in full-stack development and leading product cycle from conception to completion. Guided a team of 5-15 members through 5+ product launches at a recent experience in a high growth technology startup.', 'achievements': ['increased conversion rate by 15%', 'launched a new application in just 6 months, ahead of schedule by 6 months', 'increased efﬁciency rate by 35%', 'increased mobile trafﬁc by 22%', 'created website leads increase by 15%', 'increased task success rate by 25%'], 'highest_education': 'Bachelors', 'specialization': 'Electrical Engineering'}


In [15]:
print(business_dev_manager_details)

{'is_resume': 'True', 'skills': ['Microsoft Access', 'Excel', 'CRM', 'D2C ECommerce', 'SEO/SEM', 'Project Management Professional (PMP)', 'ABC Certification', 'Certified Business Analysis Professional (2013)'], 'years_of_experience': 8, 'experience_summary': 'Generated 300+ leads per month in a team of 5+ sales and marketing employees. Landed 25 global accounts in 8 months through new account strategy. Cut costs by $1 million annually through redesigned marketing spending. Collaborated with leadership, engineering, and marketing teams to develop new offerings that raised sales $300k. Increased customer retention by 25% through facilitating the addition of new product lines unexplored by competitors. Persuaded 5 partners to invest over $200,000 in other entities. Coordinated and hosted 15+ conferences, events, and trips for the inside sales team of 10. Generated new business and long-term account opportunities through prospecting and cold-calling, resulting in over $60k in closed new an

## From The Job Description, Get The Details

In [7]:
from langchain.chains import RetrievalQA

class JobDescriptionExtractor(object):
    def __init__(self, path : str):
        # Precondition checks.
        if path == "" or path == None:
            raise ValueError(f"The path provided: {path} is not a valid path.")

        self.path = path
        self.loader = TextLoader(path)
        documents = self.loader.load()
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        texts = text_splitter.split_documents(documents)
        embeddings = OpenAIEmbeddings()
        self.docsearch = Chroma.from_documents(texts, embeddings).as_retriever(search_kwargs = { "k" : 1 })
        #self.chain = load_qa_chain(llm = OpenAI(temperature= 0, max_tokens=3000), chain_type="stuff")
        llm = OpenAI(temperature= 0, max_tokens=3000)
        self.chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=self.docsearch)

    def ask(self, question : str) -> str:
        #docs = self.docsearch.get_relevant_documents(question)
        #output = self.chain.run(input_documents=docs, question=question)
        output = self.chain.run(question)
        return output

    def extract_details(self) -> str:
        query_to_extract_info = """Using the document, answer the following questions and output valid json with property names enclosed with double quotes with keys: "is_job_description", "skills_required", "responsibilities", "qualifications", "preferences":

        1. Is this document of a job description? Answer in "True" or "False". The answer should correspond to the "is_job_description" key.
        2. What are the skills required? The answer should be a json list associated with the "skills_required" key.
        3. What are the responsibilities? The answer should be a json list associated with the "responsibilities" key.
        4. What are the qualifications required? The answer should be in the form of a json list associated with the "qualifications" key.
        5. What are the preferences or preferred qualifications or skills? The answer should be in the form of a json list associated with the preferences key.
        """ 
        return self.ask(query_to_extract_info)

In [3]:
software_engineer_job_description = JobDescriptionExtractor("./JobDescriptions/SoftwareEngineering.txt")
investment_banking_job_description = JobDescriptionExtractor("./JobDescriptions/InvestmentBankingAssociate.txt")

Using embedded DuckDB without persistence: data will be transient
Using embedded DuckDB without persistence: data will be transient


In [9]:
investment_banking_job_details = investment_banking_job_description.extract_details()
software_engineer_job_details  = software_engineer_job_description.extract_details()

In [10]:
print(json.loads(investment_banking_job_details))
print(json.loads(software_engineer_job_details))

{'is_job_description': 'True', 'skills_required': ['Strong leadership skills', 'Ability to assess risks inherent in complex credit transactions and mitigate, structure and negotiate accordingly', 'Excellent problem solving, oral, and written communication skills', 'Extensive knowledge of products and services'], 'responsibilities': ['Champion a culture of innovation and a customer centric mindset', 'Stay up-to-date with industry trends to identify opportunities for innovation or strategic partnerships', 'Find ways to increase efficiency with existing technical infrastructure through automation, while embracing the innovative opportunities offered by new technologies'], 'qualifications': ['3+ years with above average performance results in a similar banking role or credit/lending related experience'], 'preferences': ['Bachelors degree preferred', 'Superior knowledge of the market dynamics and landscape preferred', 'Outstanding professional reputation and integrity']}
{'is_job_descriptio

## PDF Annotations

In [12]:
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfinterp import PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator

fp = open('./ResumeExamples/SoftwareEngineer.pdf', 'rb')
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
pages = PDFPage.get_pages(fp)

x, y = 0, 0
text_to_coordinates = {}

for page in pages:
    print('Processing next page...')
    interpreter.process_page(page)
    layout = device.get_result()
    for lobj in layout:
        if isinstance(lobj, LTTextBox):
            x, y, text = lobj.bbox[0], lobj.bbox[3], lobj.get_text()
            text_to_coordinates[text] = x, y
            print('At %r is text: %s' % ((x, y), text))

Processing next page...
At (259.1873025, 772.496003033) is text: FIRST LAST

At (69.09142125, 749.4960016384999) is text: Bay Area, California • +1-234-456-789 • professionalemail@resumeworded.com • linkedin.com/in/username

At (57.148429875, 723.33999356025) is text: Software Engineer with over six years of experience in full-stack development and leading product
cycle from conception to completion. Guided a team of 5-15 members through 5+ product launches

At (157.553787, 696.33999356025) is text: at a recent experience in a high growth technology startup.

At (222.37683, 669.84099435675) is text: PROFESSIONAL EXPERIENCE

At (54.0, 646.1839943475) is text: Resume Worded, New York, NY
Software Engineer

At (476.600415, 646.1839943475) is text: 2020 – Present

At (72.0, 618.2365130778369) is text: ● Created an invoicing system for subscription services that managed monthly invoices and

At (90.0, 604.0900095602501) is text: printed invoices to be sent to customers; increased conversion

In [27]:
from pdf_annotate import PdfAnnotator, Location, Appearance
a = PdfAnnotator('./ResumeExamples/SoftwareEngineer.pdf')

x, y = text_to_coordinates['FIRST LAST\n']

a.add_annotation(
    'square',
    Location(x1=x, y1=y, x2=x+30, y2=y+30, page=0),
    Appearance(stroke_color=(1, 0, 0), stroke_width=5),
)

    
a.write('./b.pdf')  # or use overwrite=True if you feel lucky