#### Simple Rag Chatbot based on my Resume

In [None]:
# import all dependencies
import os

from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

In [11]:
# load the important credentials from .env file
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")

os.environ["LANGSMITH_TRACING"] = os.getenv("LANGSMITH_TRACING")
os.environ["LANGSMITH_ENDPOINT"] = os.getenv("LANGSMITH_ENDPOINT")
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")
os.environ["PINECONE_INDEX_NAME"] = os.getenv("PINECONE_INDEX_NAME")

In [12]:
# Load my Resume
loader = PyMuPDFLoader("Ankit_Chitrakar_Updated.pdf")
resume = loader.load()

resume

[Document(metadata={'producer': 'pdfTeX-1.40.24', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-10-28T06:45:05+00:00', 'source': 'Ankit_Chitrakar_Updated.pdf', 'file_path': 'Ankit_Chitrakar_Updated.pdf', 'total_pages': 1, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2024-10-28T06:45:05+00:00', 'trapped': '', 'modDate': 'D:20241028064505Z', 'creationDate': 'D:20241028064505Z', 'page': 0}, page_content='ANKIT CHITRAKAR\n+91-6289258039 â‹„chitrakarankit2003@gmail.com â‹„Sonarpur, Kolkata-700149(West Bengal), India\nLinkedIn â‹„GitHub â‹„Portfolio Website\nEDUCATION\n- B.Tech in ECE, Narula Institute of Technology (MAKAUT) 8.80 CGPA\n2020 - 2024\n- Higher Secondary(PCMC), WBCHSE board. 88.8%\n2019 - 2020\n- Secondary 10th, WBBSE board. 87.85%\n2017 - 2018\nFREELANCING & WORK EXPERIENCE\n1. Junior Programmer at CBNITS India Pvt Ltd (Link)\nJuly 2024 - Present\nâ€¢ Achieved significant contributions as a backend developer for a US-based

In [15]:
# Split the whole document into smaller chunks 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
all_resume_chunks = text_splitter.split_documents(resume)
all_resume_chunks

[Document(metadata={'producer': 'pdfTeX-1.40.24', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-10-28T06:45:05+00:00', 'source': 'Ankit_Chitrakar_Updated.pdf', 'file_path': 'Ankit_Chitrakar_Updated.pdf', 'total_pages': 1, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2024-10-28T06:45:05+00:00', 'trapped': '', 'modDate': 'D:20241028064505Z', 'creationDate': 'D:20241028064505Z', 'page': 0}, page_content='ANKIT CHITRAKAR\n+91-6289258039 â‹„chitrakarankit2003@gmail.com â‹„Sonarpur, Kolkata-700149(West Bengal), India\nLinkedIn â‹„GitHub â‹„Portfolio Website\nEDUCATION\n- B.Tech in ECE, Narula Institute of Technology (MAKAUT) 8.80 CGPA\n2020 - 2024\n- Higher Secondary(PCMC), WBCHSE board. 88.8%\n2019 - 2020\n- Secondary 10th, WBBSE board. 87.85%\n2017 - 2018\nFREELANCING & WORK EXPERIENCE\n1. Junior Programmer at CBNITS India Pvt Ltd (Link)\nJuly 2024 - Present\nâ€¢ Achieved significant contributions as a backend developer for a US-based

In [None]:
# Convert Text into vectors and store into Vector DB
# Embedding
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
embedding

# Vector Store
vector_store = PineconeVectorStore.from_documents(
    documents=all_resume_chunks,
    embedding=embedding,
    pinecone_api_key=os.environ.get("PINECONE_API_KEY"),
    index_name=os.environ.get("PINECONE_INDEX_NAME"),
    namespace="Resume_Chat_QnA"
)
vector_store

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1adec173150>

In [21]:
# Setting up the Prompts for better result
system_prompt = SystemMessagePromptTemplate.from_template(
    "You are a helpful AI assistant that helps people find information."
)
human_prompt = HumanMessagePromptTemplate.from_template(
    """
    Use the following context to answer the question: {context}
    Question: {question}
    Answer in a concise manner.
    """
)

promt_template = ChatPromptTemplate.from_messages([system_prompt, human_prompt])
promt_template

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful AI assistant that helps people find information.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n    Use the following context to answer the question: {context}\n    Question: {question}\n    Answer in a concise manner.\n    '), additional_kwargs={})])

In [28]:
# Make a efficient retriver to fetch documents from the vector store based on top socrers
retriver = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={
        "k": 5,
        "score_threshold": 0.5
    },
)

retriver

# After fetch the documents from retriver they had to be formatted enough for llm to be understood
def format_docs(docs) -> str:
    return "\n\n".join([doc.page_content for doc in docs])

In [30]:
# Now settting up the large language model for invoking
llm = ChatOpenAI(model="gpt-4o", temperature=0.3, max_completion_tokens=2000)
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001ADEF317450>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001ADF0815650>, root_client=<openai.OpenAI object at 0x000001ADF3CECBD0>, root_async_client=<openai.AsyncOpenAI object at 0x000001ADEF317910>, model_name='gpt-4o', temperature=0.3, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True, max_tokens=2000)

In [31]:
# Now everything is ready lets chain the whole retrival flow for step by step execution

rag_chain = (
    {
        "context": retriver | format_docs,
        "question": RunnablePassthrough(),
    }
    | promt_template
    | llm
    | StrOutputParser()
)

rag_chain

{
  context: VectorStoreRetriever(tags=['PineconeVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x000001ADEC173150>, search_type='similarity_score_threshold', search_kwargs={'k': 5, 'score_threshold': 0.5})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful AI assistant that helps people find information.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n    Use the following context to answer the question: {context}\n    Question: {question}\n    Answer in a concise manner.\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resourc

In [35]:
# Use Streaming instead of invoke for better user interaction
def ask_question(question):
    """Ask a question to the llm with streaming response from llm model"""
    print(f"ðŸ¤” Question: {question}\n")
    print("ðŸ¤– Answer: ", end="", flush=True)

    full_response = ""
    for chunk in rag_chain.stream(question):
        print(chunk, end="", flush=True)
        full_response += chunk

In [41]:
ask_question(question="what do you rate this resume as a fresher out of 10?")

ðŸ¤” Question: what do you rate this resume as a fresher out of 10?

ðŸ¤– Answer: I would rate this resume as a fresher 8 out of 10. It effectively showcases relevant education, technical skills, and practical experience through freelancing and projects, which are valuable for a fresher. However, it could be improved by organizing the content more clearly and ensuring all sections are complete and concise.