In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import WebBaseLoader,JSONLoader
from langchain.chains import create_retrieval_chain
from langchain.prompts import ChatPromptTemplate
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_text_splitters import RecursiveCharacterTextSplitter
# from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import requests
from bs4 import BeautifulSoup
langchain_api_key=os.getenv("LANGCHAIN_API_KEY")
groq_api_key=os.getenv("GROQ_API_KEY")


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# loader=WebBaseLoader("https://nscpolteksby.ac.id/ebook/files/Ebook/Business%20Administration/ARMSTRONGS%20HANDBOOK%20OF%20HUMAN%20RESOURCE%20MANAGEMENT%20PRACTICE/26%20-%20Job-Role-%20Competency%20and%20Skills%20Analysis.pdf")
ploader=PyPDFLoader(file_path="26 - Job-Role- Competency and Skills Analysis.pdf")
qloader=PyPDFLoader(file_path="job-description-compendium-by-workable.pdf")
rloader=PyPDFLoader(file_path="resume-and-cover-letter-examples.pdf")
sloader=JSONLoader(file_path="job_roles.json",jq_schema='.',text_content=False)


In [3]:
loads=[]
loads=ploader.load()
loads.extend(qloader.load())
loads.extend(rloader.load())
loads.extend(sloader.load())
len(loads)

236

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
chunks = text_splitter.split_documents(loads)
chunks

[Document(metadata={'producer': '3-Heights(TM) PDF Producer 4.5.5.0 (http://www.pdf-tools.com)', 'creator': 'PyPDF', 'creationdate': '', 'moddate': '2015-04-08T09:04:27+00:00', 'source': '26 - Job-Role- Competency and Skills Analysis.pdf', 'total_pages': 23, 'page': 0, 'page_label': '1'}, page_content='26\nJob, Role, Competency\nand Skills Analysis\nKey concepts and terms\nAccountablity proﬁ le •\nCompetency analysis •\nCompetency modelling •\nCritical-incident technique •\nDescriptor •\nFaults analysis •\nFunctional analysis •\nGeneric role •\nJob •\nJob analysis •\nJob breakdown •\nJob description •\nJob learning analysis •\nKSA (knowledge, skills and abilities  •\nstatement)\nLearning speciﬁ cation •\nManual skills analysis •\nPerson speciﬁ cation •\nPersonal construct •\nRepertory grid analysis •\nRole •'),
 Document(metadata={'producer': '3-Heights(TM) PDF Producer 4.5.5.0 (http://www.pdf-tools.com)', 'creator': 'PyPDF', 'creationdate': '', 'moddate': '2015-04-08T09:04:27+00:00', 

In [5]:
from langchain.vectorstores import FAISS
len(chunks)
texts = [doc.page_content for doc in chunks]
txt=[]
for i in texts:
    text=text_splitter.split_text(i);
    txt.extend(text)

embedder = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
vectordb = FAISS.from_texts(txt, embedder)


  embedder = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
2025-11-12 23:52:17.770448: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    api_key=groq_api_key,
)

In [7]:
retriever=vectordb.as_retriever(search_kwargs={"k":3})
prompt=ChatPromptTemplate.from_messages([
    ("human","Answer the following Question based on the following {context} Question:{input}")
    
])
doc_chain = create_stuff_documents_chain(llm, prompt)
qa_chain = create_retrieval_chain(retriever, doc_chain)


In [8]:

result = qa_chain.invoke({"input":"what is the purpose of job description?"})
print(result["answer"])

According to the text, the purpose of a job description is to attract applications. However, it should also serve another purpose, which is to convey a broad picture of the job, distinguish it from other jobs, and establish the role of the job holder and their contribution towards achieving the company's and their unit's objectives.


In [9]:

vectordb.save_local("faiss_index1")
