In [2]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


In [3]:
PINECONE_API_KEY = "05ab418b-9272-4645-b940-6d5681e195e4"
PINECONE_API_ENV= "gcp-starter"

In [4]:
#Extract data from PDF
def load_pdf(data):
    loader = DirectoryLoader(data,  
                    glob ="*.pdf",
                    loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [5]:
extracted_data = load_pdf("Data/")

In [6]:
#Creating Text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks


In [7]:
text_chunks = text_split(extracted_data)
print(len(text_chunks))


254


In [8]:
def download_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [9]:
embeddings = download_huggingface_embeddings()

In [10]:
#Initializing Pinecone
pinecone.init(api_key=PINECONE_API_KEY,
              environment= PINECONE_API_ENV)
index_name = "agribot"
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)


In [11]:
docsearch = Pinecone.from_existing_index(index_name, embeddings)

query = "What are rabi crops"

docs = docsearch.similarity_search(query, k=3)
print("Result", docs)

Result [Document(page_content='hoe, cono weeder are common on Indian farms.', metadata={}), Document(page_content='hoe, cono weeder are common on Indian farms.', metadata={}), Document(page_content='hoe, cono weeder are common on Indian farms.', metadata={})]


In [13]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context : {context}
Question: {question}

Only return hte helpful answer below and nothing else.
Helpful answer:
"""

In [15]:
PROMPT = PromptTemplate(template = prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [18]:
llm = CTransformers(model = "model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    config = {'max_new_tokens':512,
                              "temperature":0.8})

In [19]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k':2}),
    return_source_documents = True,
    chain_type_kwargs=chain_type_kwargs)


In [None]:
while True:
    user_input=input(f"Input Prompt:")
    result = qa({"query": user_input})
    print("Response :" , result["result"])