In [None]:
# resume information extraction

In [12]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [19]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("./sample_resume.pdf")
pages = loader.load()


In [20]:
len(pages)

1

In [21]:
txt = ' '.join([d.page_content for d in pages])

In [22]:
from typing import List
from pydantic import BaseModel, Field
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

In [23]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

In [24]:
class Overview(BaseModel):
    """Overview of a resume text."""
    email: str = Field (description="Provide the email address of candidate if it mentioned in content.")
    phone_number: str = Field (description="Provide the phone number of candidate if it is mentioned in content.")
    webpage: str = Field (description="Provide webpage(s) that are relevant to candidate if is it mentioned in content.")
    Linkedin: str = Field (description="Provide a LinkedIn webpage or URL of candidate if it is mentioned in content.")
    github: str = Field (description="Provide a github webpage or github URL of candidate if it is mentioned in content.")
    summary: str = Field(description="Provide a concise summary of the content.")
    language: str = Field(description="Provide the language that the content is written in.")
    technical_tools: str = Field(description = "Provide technical tools or technical skills that are related to content")
    keywords: str = Field(description="Provide keywords related to the content.")

In [25]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information from a candidate's resume, if information is not explicitly provided do not guess. Extract partial info"),
    ("human", "{input}")
])

In [26]:
model = ChatOpenAI(temperature=0)

In [27]:
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser

In [28]:
overview_tagging_function = [
    convert_pydantic_to_openai_function(Overview)
]
extracting_model = model.bind(
    functions=overview_tagging_function,
    function_call={"name":"Overview"}
)
extracting_chain = prompt | extracting_model | JsonOutputFunctionsParser()

In [29]:
extracting_chain.invoke({"input": txt})

{'email': 'first.last@selu.edu',
 'phone_number': '555-555-5555',
 'webpage': 'issuu.com/eengineersample',
 'Linkedin': '',
 'github': '',
 'summary': 'Experienced in designing solutions for environmental problems that require integration of environmental and human health constraints compatible with economic growth, sustainable development, and ethical standards. Proficient with a variety of modeling and drafting software including AutoCAD and ALGOR. Efficiently manage projects and collaborate with teams to accelerate the overarching goals of the company.',
 'language': 'English',
 'technical_tools': 'AutoCAD, ALGOR',
 'keywords': 'environmental problems, design solutions, modeling software, drafting software, project management, collaboration'}