In [1]:
import os
import json
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [7]:
#os.getenv('OPENAI_API_KEY')

### Load Data

In [15]:
from langchain.document_loaders import PyPDFDirectoryLoader

loader = PyPDFDirectoryLoader("resumes")
documents = loader.load()
len(documents)

64

### Split Data

In [28]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(documents)
len(documents)

110

### Create Pinecone Database (Vector Database)

In [33]:
from langchain import embeddings
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

pinecone_index = "langchain-practice"

embedding = OpenAIEmbeddings(model="text-embedding-3-small")
#docsearch = PineconeVectorStore.from_documents(documents, embedding, index_name=pinecone_index)

### Load Vector Data for Usage

In [34]:
vector_db = PineconeVectorStore.from_existing_index(pinecone_index, embedding)

### Data Retrieval For Similarity Search

In [38]:
retriever = vector_db.as_retriever()
retriever.invoke("list top skills mentioned in the resumes?")

[Document(page_content='Contact\nwww.linkedin.com/in/jericka-henry\n(LinkedIn)\nTop Skills\nCustomer Satisfaction\nRelationship Building\nBusiness Development\nCertifications\nOccupational Studies of Health and\nScience\nTech Sales Career Track\nHonors-Awards\nI Can AfricanJericka Henry\nBDR | SDR | SaaS| CRM | B2B\nNew York City Metropolitan Area\nSummary\nGrowing up in New York City brings out every natural gift that sleeps\ninside of you. I learned early on having effective communication\nskills is the key to building, learning and growing your brand\n(you). Tech Sales is definitely a place I can expand this skill.\nAt Springboard I conducted effective qualification calls and\nimplemented objection handling strategies. I also executed on\ncritical tech sales fundamentals including pipeline development and\nprospect research and as a result I successfully passed over 20\ncourse assessments to demonstrate understanding and delivery of\nimpactful tech sales practices.\nAs a massage the

### Feed Retrieved Documents to LLM to get Exact Answer

In [37]:
from langchain_openai import OpenAI
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False
)
chain.invoke("list top skills mentioned in the resumes?")

{'query': 'list top skills mentioned in the resumes?',
 'result': '\n1. Customer Satisfaction\n2. Relationship Building\n3. Business Development\n4. Occupational Studies of Health and Science\n5. Tech Sales Career Track\n6. BDR (Business Development Representative)\n7. SDR (Sales Development Representative)\n8. SaaS (Software as a Service)\n9. CRM (Customer Relationship Management)\n10. B2B (Business to Business)\n11. Team Leadership\n12. Teamwork\n13. Problem Solving\n14. Python (Programming Language)\n15. C (Programming Language)\n16. Cascading Style Sheets (CSS)\n17. Financial Coaching\n18. Credit Counseling\n19. Financial Literacy'}

In [45]:
print(chain.invoke("can you give me the top resumes for python based on provided documents?")['result'].strip())

Based on the provided documents, there are three individuals who have listed Python as a top skill: Amanze Emeziem, Gustavo Medina, and Gaspar Jaen Maisonet. These individuals have varying levels of experience and backgrounds, but all have listed Python as a top skill on their LinkedIn profiles.
