# Retriever & Chain with Langchain

In [1]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("European_policy.pdf")
docs = loader.load()
docs

 Document(metadata={'producer': 'PDFlib+PDI 7.0.4p1 (.NET/Win32)', 'creator': 'PyPDF', 'creationdate': '2012-02-24T21:46:37+03:00', 'moddate': '2012-02-24T21:46:37+03:00', 'source': 'European_policy.pdf', 'total_pages': 684, 'page': 1, 'page_label': '2'}, page_content='2 EN Official Journal ofthe European Communities 1.6.96\nDECIDES :\nArticle 1\nThe inventory ofingredients used incosmetic products provided for inArticle5aofDirective\n76/768/EECand reproduced intheAnnex isadopted.\nArticle 2\nThe INCI (International Nomenclature Cosmetic Ingredient)namescontained intheinventory\nshall constitute the common nomenclature for the purposes of Article7 (2) of Directive\n76/768/EEC.\nDone inBrussels,8May 1996.\nFor the Commission\nEmma BONINO\nMember of the Commission'),
 Document(metadata={'producer': 'PDFlib+PDI 7.0.4p1 (.NET/Win32)', 'creator': 'PyPDF', 'creationdate': '2012-02-24T21:46:37+03:00', 'moddate': '2012-02-24T21:46:37+03:00', 'source': 'European_policy.pdf', 'total_pages': 684,

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
text_splitter.split_documents(docs)[:5]

[Document(metadata={'producer': 'PDFlib+PDI 7.0.4p1 (.NET/Win32)', 'creator': 'PyPDF', 'creationdate': '2012-02-24T21:46:37+03:00', 'moddate': '2012-02-24T21:46:37+03:00', 'source': 'European_policy.pdf', 'total_pages': 684, 'page': 0, 'page_label': '1'}, page_content='EN1.6.96 Official Journal of the European Communities No L132/1\nII\n(Actswhose publication isnot obligatory)\nCOMMISSION\nCOMMISSION DECISION\nof8May 1996\nestablishing aninventory and acommon nomenclature ofingredients employed in\ncosmetic products\n(Text with EEArelevance)\n(96/335/EC)\nTHE COMMISSION OF THE EUROPEAN COMMUNI \xad\nTIES,\nhaving regard to the Treaty establishing the European\nCommunity ,\nHaving regard to Council Directive 76/768/EEC of\n27July 1976on the approximation of the laws of the\nMember States relating to cosmetic products 0 , as\namended byCommission Directive 95/34/EC(2),and in\nparticular Article 5aand Article7(2)thereof,\nAfter consultation of the Scientific Committee on\nCosmetology,\nWh

In [7]:
documents = text_splitter.split_documents(docs)
documents

[Document(metadata={'producer': 'PDFlib+PDI 7.0.4p1 (.NET/Win32)', 'creator': 'PyPDF', 'creationdate': '2012-02-24T21:46:37+03:00', 'moddate': '2012-02-24T21:46:37+03:00', 'source': 'European_policy.pdf', 'total_pages': 684, 'page': 0, 'page_label': '1'}, page_content='EN1.6.96 Official Journal of the European Communities No L132/1\nII\n(Actswhose publication isnot obligatory)\nCOMMISSION\nCOMMISSION DECISION\nof8May 1996\nestablishing aninventory and acommon nomenclature ofingredients employed in\ncosmetic products\n(Text with EEArelevance)\n(96/335/EC)\nTHE COMMISSION OF THE EUROPEAN COMMUNI \xad\nTIES,\nhaving regard to the Treaty establishing the European\nCommunity ,\nHaving regard to Council Directive 76/768/EEC of\n27July 1976on the approximation of the laws of the\nMember States relating to cosmetic products 0 , as\namended byCommission Directive 95/34/EC(2),and in\nparticular Article 5aand Article7(2)thereof,\nAfter consultation of the Scientific Committee on\nCosmetology,\nWh

In [8]:
# OpenAIEmbeddings is better than OllamaEmbeddings. But I didn't pay for paid service of OpenAI.
# from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(documents[:20], OllamaEmbeddings())


  db = FAISS.from_documents(documents[:20], OllamaEmbeddings())


In [9]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x164271350>

In [None]:
query = "Please explain which regulations cosmetics companies have to keep in Europe."
result = db.similarity_search(query)
result[0].page_content

'refers to anew terminology developed by Colipa to take into account the need for atruly\ninternational approach . It should be noted that Article 5a of the cosmetic products\nDirective refers to CTFA,which has been replaced byINCI asthe correct designation for\nthe nomenclature .An INCI name may cover several chemical entities .\nFor cosmetic colorants ,the colour index (CI)number or the name listed inAnnex IVhas\nto beused for ingredient labelling ,asindicated inArticle 6(1)(g)ofthe cosmetic products\nDirective .The CInumber therefore becomes the INCI name for these ingredients .\n2. INN name\nThis abbreviation refers to the International non-proprietary name recommended bythe\nWorld Health Organization .Itislisted where applicable .\n3. Ph. Eur.name\nThis abbreviation refers to the name in the European pharmacopoeia .It is listed where\napplicable .\n4. CAS number\nThis abbreviation refers tothe code number developed bythe Chemical Abstracts Service.'

In [11]:
from langchain_community.llms import Ollama
llm = Ollama(model="llama2")
llm

  llm = Ollama(model="llama2")


Ollama()

In [12]:
## Design ChatPrompt Template

from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context.
Think step by step before providing a detailed answer.
I will tip you $1000 if the user finds the answer helpful.
<context>
{context}
</context>
Question: {input}""")

In [None]:
## Chain Introduction
## Create Stuff Document Chain
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)

In [14]:
    """
    Retreiver: A retriever is an interface that returns documents given an unstructured query.
    It is more general than a vector store.
    A retriever does not need to be able to store documents, only to return (or retrieve) them.
    Vector stores can be used as the backbone of a retriever, but there are other types of retrievers as well
    https://python.langchain.com/v0.1/docs/modules/data_connection/retrievers/
    """

    retriever = db.as_retriever()
    retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x164271350>, search_kwargs={})

In [16]:
    """
    Retrieval chain(create_retrieval_chain): This chain takes in a user inquiry,
    which is then passed to the retriever to fetch relevant documents.
    Those documents (and original inputs) are then passed to an LLM to generate a response
    https://python.langchain.com/v0.1/docs/modules/chains/
    """

    from langchain.chains import create_retrieval_chain
    retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [26]:
response = retrieval_chain.invoke({"input":"What happened on 1 January 1997?"})

In [27]:
response["answer"]

'Based on the provided context, it seems that 1 January 1997 was the date when certain changes regarding the classification of cosmetic substances took effect. The context mentions that "On 1 January 1997, Directive 96/32/EC came into force, which introduced new requirements for the classification of cosmetic substances."\n\nTherefore, the answer to your question is: On 1 January 1997, the new requirements for the classification of cosmetic substances introduced by Directive 96/32/EC came into effect.'