In [26]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQAWithSourcesChain, RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import os

In [2]:
PINECONE_API_KEY = "7b953ee9-5bda-4822-99b6-b8b7632aa542"

In [3]:
def data_loader(data):
    """
    Loading the data from the pdf file
    Directory required: from langchain.document_loaders import PyPDFLoader, DirectoryLoader
    """
    loader = DirectoryLoader(data,
                             glob='*.pdf',
                             loader_cls= PyPDFLoader)
    doc = loader.load()
    return doc

In [4]:
extracted_text = data_loader("Data/")

In [5]:
extracted_text[0].page_content

"Laboratory Name :ENVIROCARE LABS PVT. LTD., A-7, A-8 ENVIRO HOUSE, MIDC, WAGLE ESTATE MAIN ROAD,\nTHANE, MAHARASHTRA, INDIA\nAccreditation Standard ISO/IEC 17025:2017\nCertiﬁcate Number TC-8284 Page No 1 of 1279\nValidity 23/12/2023 to 22/12/2025 Last Amended on 14/02/2024\nThis is annexure to 'Certiﬁcate of Accreditation' and does not require any signature.S.No Discipline / Group Materials or Products testedComponent, parameter or\ncharacteristic tested /\nSpeciﬁc Test Performed /\nTests or type of tests\nperformedTest Method Speciﬁcation\nagainst which tests are\nperformed and / or the\ntechniques / equipment\nused\nPermanent Facility\n1BIOLOGICAL- ANIMAL\nFOOD & FEEDPet FoodsAerobic microbial count at\n/Total Viable count /Total Plate\ncount /standard Plate Count\n/Total bacterial count/Aerobic\nbacterial count/Aerobic Plate\nCountBAM Chap. 3,8th edition\n2BIOLOGICAL- ANIMAL\nFOOD & FEEDPet FoodsAerobic microbial count at\n/Total Viable count /Total Plate\ncount /standard Plate Cou

In [6]:
def text_split(extracted_data, size = 500, overlap = 50):
    """
    Splitting the data into text chunks
    default Chunk Size = 500, chunk_overlap = 50
    Directory required: from langchain.text_splitter import RecursiveCharacterTextSplitter 
    Returns: text_chunks
    """
    text_spilter = RecursiveCharacterTextSplitter(chunk_size = size, chunk_overlap = overlap)
    text_chunks = text_spilter.split_documents(extracted_data)
    return text_chunks

In [7]:
text_chunk = text_split(extracted_text)

In [8]:
print(f"len of the chunk size: ", len(text_chunk))

len of the chunk size:  8537


In [9]:
def download_embedding():
    embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding

In [10]:
embedding = download_embedding()

  warn_deprecated(


In [11]:
#intializing pinecone
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
index = 'envirochatbot'
docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunk], embedding=embedding, index_name = index)

In [12]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [34]:
llm=CTransformers(model="Model/llama-2-7b-chat.ggmlv3.q8_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8},device = 'gpu')

In [35]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [36]:
user_input=input(f"Input Prompt:")
result=qa({"query": user_input})
print("query : ", result["query"])
print("\nResponse : ", result["result"])
print("\nsource_documents : ", result["source_documents"])

Response :  For fruits, the following tests are typically performed:
Total Ash: This test is performed to determine the amount of ash present in the fruit pulp or puree.
Fruit Bar/ Toffee: This test is used to check if the fruit bar or toffee has been produced as per the specified standards.
Fruit Cereal cereal cereal cereal Flakes and Other products (Cerealysis/Pulp/ Cereal Cereal cereal cereal Bar/ Veget /Cerealimentals and vegetables: This test for cereals Bar/ Vina & Cereal Cereal Cereal Cereal Cereal cereal cereal cerealysis: This test: This test cereal cere and cere and cere/ cere/Cereal Cereal Cereal Cereal Cereal Cereal cereal Flakesh cereal Cereal Cereal Cereal Cereal Cereal cereal cereals/ Concentrate: This test cereal Cereal Cereal cerealysis/ Cereal Cereal Cerealised Flakes (Cereal Cereinanagerates Concentrex cereal cereal cereal and other products including puree cereal Cereal Cerealysis: This test cerealdeh Cereal Cereal/Pulp or Pulp / cerealyzmealge-Cereal Cerealdeh cere

In [37]:
result

{'query': 'test performed for fruits',
 'result': 'For fruits, the following tests are typically performed:\nTotal Ash: This test is performed to determine the amount of ash present in the fruit pulp or puree.\nFruit Bar/ Toffee: This test is used to check if the fruit bar or toffee has been produced as per the specified standards.\nFruit Cereal cereal cereal cereal Flakes and Other products (Cerealysis/Pulp/ Cereal Cereal cereal cereal Bar/ Veget /Cerealimentals and vegetables: This test for cereals Bar/ Vina & Cereal Cereal Cereal Cereal Cereal cereal cereal cerealysis: This test: This test cereal cere and cere and cere/ cere/Cereal Cereal Cereal Cereal Cereal Cereal cereal Flakesh cereal Cereal Cereal Cereal Cereal Cereal cereal cereals/ Concentrate: This test cereal Cereal Cereal cerealysis/ Cereal Cereal Cerealised Flakes (Cereal Cereinanagerates Concentrex cereal cereal cereal and other products including puree cereal Cereal Cerealysis: This test cerealdeh Cereal Cereal/Pulp or P

In [38]:
print("query : ", result["query"])
print("\nResponse : ", result["result"])
print("\nsource_documents : ", result["source_documents"])

query :  test performed for fruits

Response :  For fruits, the following tests are typically performed:
Total Ash: This test is performed to determine the amount of ash present in the fruit pulp or puree.
Fruit Bar/ Toffee: This test is used to check if the fruit bar or toffee has been produced as per the specified standards.
Fruit Cereal cereal cereal cereal Flakes and Other products (Cerealysis/Pulp/ Cereal Cereal cereal cereal Bar/ Veget /Cerealimentals and vegetables: This test for cereals Bar/ Vina & Cereal Cereal Cereal Cereal Cereal cereal cereal cerealysis: This test: This test cereal cere and cere and cere/ cere/Cereal Cereal Cereal Cereal Cereal Cereal cereal Flakesh cereal Cereal Cereal Cereal Cereal Cereal cereal cereals/ Concentrate: This test cereal Cereal Cereal cerealysis/ Cereal Cereal Cerealised Flakes (Cereal Cereinanagerates Concentrex cereal cereal cereal and other products including puree cereal Cereal Cerealysis: This test cerealdeh Cereal Cereal/Pulp or Pulp / 