In [1]:
%%capture
# Core LangChain (0.3.x ecosystem)
!pip install --user "langchain==0.3.*" 
!pip install --user "langchain-core==0.3.*"
# Groq integration
!pip install --user "langchain-groq==0.3.7"
# HuggingFace + Transformers (for free local embeddings)
!pip install --user "transformers==4.41.2"
!pip install --user "huggingface-hub==0.23.4"
!pip install --user "sentence-transformers==2.5.1"
# Vector store
!pip install --user "chromadb"
# Utilities
!pip install --user "wget==3.2"
# Torch (CPU build)
!pip install --user --upgrade torch --index-url https://download.pytorch.org/whl/cpu



In [52]:
#import all the modules and libraries needed
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain_core.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

from langchain_groq import ChatGroq
import os

In [53]:
#set the grop api as an enviornment variable
os.environ["GROQ_API_KEY"]="your-api-key"

In [63]:
#get the path of the pdf as input and check if the file exists locally or not
pdf_path = input("Enter the path of the pdf(must exist locally):").strip()
if not os.path.exists(pdf_path):
    raise FileNotFoundError("The path is incorrect/The file does not exist.")

Enter the path of the pdf(must exist locally): C:\Users\Ashwa\Downloads\resumeeeee.pdf


In [64]:
#load the file, extract the text
loader = PyPDFLoader(pdf_path)
text = loader.load()

In [75]:
splitter = RecursiveCharacterTextSplitter(chunk_size=800,
                                          chunk_overlap = 100,
                                          separators=["\n\n", "\n", " ", ""])
chunks = splitter.split_documents(text)

In [76]:
print(len(chunks))

4


In [77]:
#vector embedding the document
embedder = HuggingFaceEmbeddings()
docsearch = Chroma.from_documents(chunks,embedder)

In [78]:
#create an llm instance and a retriever
retriever = docsearch.as_retriever()
llm = ChatGroq(model='llama3-8b-8192',temperature=0)

In [82]:
#building a retrieval q&a chain
qa = RetrievalQA.from_chain_type(llm=llm,chain_type="stuff",retriever=retriever,return_source_documents=False)
qa.invoke("What are the projects mentioned in the resume?")["result"]

'The projects mentioned in the resume are:\n\n1. AI Learning Website\n2. User Churn Prediction\n3. Stock Price Predictor\n4. Movie Recommendation Engine'

In [83]:
#to add memory and a prompt template
template = """Use the document and relevant past history to answer the question below. If you do not know the answer or are not sure, say I do not know. DO NOT HALLUCINATE.
                {context}
            Question: {question}
            Answer: """

prompt = PromptTemplate.from_template(template)
args = {"prompt":prompt}
memory = ConversationBufferMemory(memory_key="chat_history",return_messages=True)
QA = ConversationalRetrievalChain.from_llm(llm = llm,
                                          memory = memory,
                                          retriever=retriever,
                                          return_source_documents=False,
                                          chain_type="stuff",
                                            get_chat_history = lambda h:h,
                                          combine_docs_chain_kwargs=args)

QA.invoke({"question":"What are the certifications listed on the resume?"})["answer"]

'According to the provided document, the certifications listed on the resume are:\n\n1. Google Advanced Data Analytics Professional Certification\n2. IBM AI Developer Professional Certification'

In [84]:
#creating an agent

def QA_BOT():
    memory = ConversationBufferMemory(memory_key="chat_history",return_messages=True)
    QA = ConversationalRetrievalChain.from_llm(llm = llm,
                                          memory = memory,
                                          retriever=retriever,
                                          return_source_documents=False,
                                          chain_type="stuff",
                                            get_chat_history = lambda h:h,
                                          combine_docs_chain_kwargs=args)
    while True:
        query = input("Question:")
        if query.lower() in ('exit','quit','bye'):
            break

        result = QA.invoke({"question":query})
        print(result["answer"])


In [85]:
QA_BOT()

Question: Give me a summary of the projects and skills listed on the resume


Based on the provided document, here is a summary of the projects and skills listed on the resume:

**Projects:**

1. AI Learning Website: A platform with a chatbot, PDF/YouTube summarizers, and flashcard generation, built using Flask and SQLite.
2. User Churn Prediction: A machine learning model that predicted user churn with 98% accuracy using 10K+ user records, featuring feature engineering, exploratory data analysis, and model comparison.
3. Stock Price Predictor: A time-series model that forecasted trends using LSTM with real-time yFinance data, showcasing deep learning applications in financial domains.
4. Movie Recommendation Engine: A hybrid recommender that combined content-based filtering with simple collaborative logic, handling sparse data and cold-start cases.

**Skills:**

1. Programming languages: Python
2. Deep Learning frameworks: TensorFlow
3. Data Science libraries: Pandas, NumPy, Matplotlib, Seaborn, Scikit-learn
4. Web development: Flask, Streamlit, Gradio
5. Natur

Question: exit


In [86]:
print(memory.buffer)

[HumanMessage(content='What are the certifications listed on the resume?', additional_kwargs={}, response_metadata={}), AIMessage(content='According to the provided document, the certifications listed on the resume are:\n\n1. Google Advanced Data Analytics Professional Certification\n2. IBM AI Developer Professional Certification', additional_kwargs={}, response_metadata={})]
