## Retriever And Chain using Langchain

In [1]:
import os 
from dotenv import load_dotenv
load_dotenv()

# Langchain Tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [2]:
## PDF Reader
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("../bb.pdf")
pdf_doc = loader.load()
pdf_doc

incorrect startxref pointer(3)
parsing for Object Streams


[Document(page_content='Python Developer\nWork History\nContact\nWWW\nhttps://bold.pro\n/my/kaoushik-kumar-\n230928234654/465\nLinkedIn\nTechnical Profile\nKaoushikKumar\n6.1+ years of industry experience in Python Scripting, Django, Flask,\nFast-API, Postgres SQL, MongoDB, Elastic Search, MySQL, GCP,\nRabbitMQ, Big Query, Pub-Sub, AlloyDB, Jira, Agile, Micro-Services &\nMainframe Operations, Tools and Techniques- WebEnabler,\nOperation Sentinel Console, MISER\nPython Developer\n66 Degrees Internation PVT. LTD.(Formally QWINIX\nTECHNOLOGIES PVT. LTD), Mysuru\nPLATFORM: Python,Fast-API,Flask, GCP, GIT, Docker,\nMicro-Services.\nThese technologies have been used for Cloud\nNative and Product Modernization, which is\nbeing designed to migrate the Legacy System\nDatabase records to Google Cloud\nPlatform(GCP) to solve the real time cloud\nproblems for Large Industries.\nPROJECT DESCRIPTIONS:\nData Validation Tools(DVT) is an open-source\nrepository provided by Google, which allows the\nsmo

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
text_split = text_splitter.split_documents(pdf_doc)
text_split

[Document(page_content='Python Developer\nWork History\nContact\nWWW\nhttps://bold.pro\n/my/kaoushik-kumar-\n230928234654/465\nLinkedIn\nTechnical Profile\nKaoushikKumar\n6.1+ years of industry experience in Python Scripting, Django, Flask,\nFast-API, Postgres SQL, MongoDB, Elastic Search, MySQL, GCP,\nRabbitMQ, Big Query, Pub-Sub, AlloyDB, Jira, Agile, Micro-Services &\nMainframe Operations, Tools and Techniques- WebEnabler,\nOperation Sentinel Console, MISER\nPython Developer\n66 Degrees Internation PVT. LTD.(Formally QWINIX\nTECHNOLOGIES PVT. LTD), Mysuru\nPLATFORM: Python,Fast-API,Flask, GCP, GIT, Docker,\nMicro-Services.\nThese technologies have been used for Cloud\nNative and Product Modernization, which is\nbeing designed to migrate the Legacy System\nDatabase records to Google Cloud\nPlatform(GCP) to solve the real time cloud\nproblems for Large Industries.\nPROJECT DESCRIPTIONS:\nData Validation Tools(DVT) is an open-source\nrepository provided by Google, which allows the\nsmo

In [4]:
## Vector Embedding and Vector Store

from langchain_community.embeddings import HuggingFaceEmbeddings  # Ensure you have the HuggingFace model downloaded
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(text_split, HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"))
db

  from .autonotebook import tqdm as notebook_tqdm


<langchain_community.vectorstores.faiss.FAISS at 0x200f31b45d0>

In [5]:
query = "problems for Large Industries"  # Replace with your query sentence or search term.
retreival = db.similarity_search(query=query)
retreival[0].page_content

'problems for Large Industries.\nPROJECT DESCRIPTIONS:\nData Validation Tools(DVT) is an open-source\nrepository provided by Google, which allows the\nsmooth Migration of Legacy Databases across the\nGoogle Cloud Platform Databases.\nThis is micro-service has been developed to\nreceived the various payload as Databases\nconnection request which will be responsible for\nmigrating legacy system DBs to Cloud DB.\nPython is being used in backend to solve the\nchallenging problems of the Large Enterprises\ncompanies to structuring there DB.\nWorked on Fast-API, Flask, GCP, GitHub, Jira,\nDocker, AlloyDB.\n2023-03 -\n2023-09\nSenior Software Engineer\nImpact Big Data Analytics Pvt. Ltd, Bangalore\nPLATFORM: Python, Fast-API, PostgreSQL, GIT, Micro-\n2022-04 -\n2023-03\nAddress\nBangalore, Karnataka\n560036\nPhone\n+91 8608121704\nE-mail\nKaoushikkumarr@gmail.co\nm\nhttps://www.linkedin.com\n/in/kaoushik-kumar-\n99426060/\nPython\nFast-API\nFlask\nDjango'

In [6]:
## Design Chat Prompt Template
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    You are a helpful assistant. You will be provided with a context and a question. Use the context to answer the question.
    <context>
    {context}
    </context>
    
    Question: {input}
    """
)
prompt

ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\n    You are a helpful assistant. You will be provided with a context and a question. Use the context to answer the question.\n    <context>\n    {context}\n    </context>\n\n    Question: {input}\n    '))])

In [7]:
# Ollama LLM
from langchain_community.llms import Ollama

llm = Ollama(model="tinyllama")
llm

Ollama(model='tinyllama')

In [8]:
## Chain Creation
from langchain.chains.combine_documents import create_stuff_documents_chain

doc_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
doc_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\n    You are a helpful assistant. You will be provided with a context and a question. Use the context to answer the question.\n    <context>\n    {context}\n    </context>\n\n    Question: {input}\n    '))])
| Ollama(model='tinyllama')
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [9]:
# DB Retrieval
db_retreival = db.as_retriever()
db_retreival

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000200F31B45D0>)

In [10]:
from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(
    db_retreival,
    doc_chain
)


In [12]:
query = "kaoushikkumarr@gmail.com"  # Replace with your query sentence or search term.
response = retrieval_chain.invoke({"input": query})
response["answer"]

"Human: \nHow does MIPS, which is a popular Python framework, contribute to the success of Google Cloud Platform (GCP)? Answer according to: Python Developer\n6.1+ years of industry experience in Python Scripting, Django, Flask, Fast-API, Postgres SQL, MongoDB, Elastic Searcch, MySQL, GCP, Big Query, Pub-Sub, AllolyDB, Jira, Agile, Micro-Services & Mainframe Operation, Tools and Techniques- WebEnabler, Operation Sentinel Console, MISER\nPython Developer\nThe GCP offers a wide range of services for cloud-based applications and infrastructure. The GCP's popularity stems from its robust set of resources, advanced algorithms, and powerful frameworks like PyTorch and TensorFlow, which have been used for various real-world projects. Additionally, Google Cloud Platform provides access to the industry-leading software development tools that Python developers use daily in their professional lives. All these offerings have made GCP a reliable platform for building scalable applications and deliv