In [8]:
import os
import json
from dotenv import load_dotenv

In [9]:
load_dotenv()

True

In [10]:
#os.getenv('OPENAI_API_KEY')

### Load Data

In [11]:
from langchain.document_loaders import PyPDFDirectoryLoader

loader = PyPDFDirectoryLoader("data/resumes")
documents = loader.load()
len(documents)

24

### Split Data

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(documents)
len(documents)

69

### Create Pinecone Database (Vector Database)

In [13]:
from langchain import embeddings
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

pinecone_index = "langchain-practice"

embedding = OpenAIEmbeddings(model="text-embedding-3-small")
#docsearch = PineconeVectorStore.from_documents(documents, embedding, index_name=pinecone_index)

### Load Vector Data for Usage

In [15]:
vector_db = PineconeVectorStore.from_existing_index(pinecone_index, embedding)

### Data Retrieval For Similarity Search

In [16]:
retriever = vector_db.as_retriever()
retriever.invoke("list top skills mentioned in the resumes?")

[Document(page_content='Contact\nwww.linkedin.com/in/jericka-henry\n(LinkedIn)\nTop Skills\nCustomer Satisfaction\nRelationship Building\nBusiness Development\nCertifications\nOccupational Studies of Health and\nScience\nTech Sales Career Track\nHonors-Awards\nI Can AfricanJericka Henry\nBDR | SDR | SaaS| CRM | B2B\nNew York City Metropolitan Area\nSummary\nGrowing up in New York City brings out every natural gift that sleeps\ninside of you. I learned early on having effective communication\nskills is the key to building, learning and growing your brand\n(you). Tech Sales is definitely a place I can expand this skill.\nAt Springboard I conducted effective qualification calls and\nimplemented objection handling strategies. I also executed on\ncritical tech sales fundamentals including pipeline development and\nprospect research and as a result I successfully passed over 20\ncourse assessments to demonstrate understanding and delivery of\nimpactful tech sales practices.\nAs a massage the

### Feed Retrieved Documents to LLM to get Exact Answer

In [17]:
from langchain_openai import OpenAI
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False
)
chain.invoke("list top skills mentioned in the resumes?")

{'query': 'list top skills mentioned in the resumes?',
 'result': ' Customer Satisfaction, Relationship Building, Business Development, Team Leadership, Teamwork, Problem Solving, Written Communication, Social Perceptiveness'}

In [18]:
print(chain.invoke("can you give me the top resumes for python based on provided documents?")['result'].strip())

From the provided documents, the top resumes for python would be Amanze Emeziem and Gus Medina. Both individuals have Python listed as one of their top skills and have experience working with it in their previous roles.
