In [30]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_pinecone import PineconeVectorStore
import pinecone
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
import os
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate

In [8]:
def data_reader(Data):
    """
    docment loader function.
    package reeuire: from langchain.document_loaders import PyPDFLoader, DirectoryLoader
    """
    loader = DirectoryLoader(Data,
                    glob= '*.pdf',
                    loader_cls= PyPDFLoader)
    doc = loader.load()
    return doc

In [9]:
extracted_text = data_reader("data/")

In [10]:
len(extracted_text)

1431

In [11]:
def text_split(extracted_data, size = 500, overlap = 50):
    """
    Splitting the data into text chunks
    default Chunk Size = 500, chunk_overlap = 50
    Package required: from langchain.text_splitter import RecursiveCharacterTextSplitter 
    Returns: text_chunks
    """
    text_spilter = RecursiveCharacterTextSplitter(chunk_size = size, chunk_overlap = overlap)
    text_chunks = text_spilter.split_documents(extracted_data)
    return text_chunks

In [12]:
text_chunk = text_split(extracted_data=extracted_text)

In [13]:
print("Number of chunks: ",len(text_chunk))

Number of chunks:  8269


In [38]:
def download_embedding():
    """
    Downloading embedding model from HuggingFace
    Package required: from langchain.embeddings import HuggingFaceBgeEmbeddings
    """

    embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs= {'device' : 'cpu'})
    return embedding

In [39]:
embedding = download_embedding()

In [14]:
#intializing pinecone and storing the data in vector DB
load_dotenv()
PINE_CONE_API = os.environ.get('PINE_CONE_API')
index = os.environ.get('PINE_CONE_INDEX')

os.environ['PINECONE_API_KEY'] = PINE_CONE_API
index = index
docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunk], embedding=embedding, index_name = index)

In [15]:
def prompt_creator():
    """
    Creating Prompt for the llm
    Package Required: from langchain import PromptTemplate
    return: dict
    """

    prompt_template="""
    Use the following pieces of information to answer the user's question.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.

    Context: {context}
    Question: {question}

    Only return the helpful answer below and nothing else.
    Helpful answer:
    """
    PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain_type_kwargs={"prompt": PROMPT}
    return chain_type_kwargs

In [16]:
prompt = prompt_creator()

In [17]:
llm=CTransformers(model="Model/llama-2-7b-chat.ggmlv3.q8_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8},device = 'gpu')

In [19]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=prompt)

In [29]:
user_input=input(f"Input Prompt:")
result=qa({"query": user_input})
print("query : ", result["query"])
print("\nResponse : ", result["result"])
print("\nsource_documents : ", result["source_documents"])

query :  what is lok sabha election

Response :   Lok Sabha elections are held every five years to elect members of the Lok Sabha, which is the lower house of the Indian Parliament. The elections are conducted by the Election Commission of India, and voters cast their ballots for candidates representing various political parties or independents. The election is a direct poll, where people vote directly for their representatives in the Lok Sabha.

source_documents :  [Document(page_content='of Houses and its  members. \uf020\n \n\uf0b7 Lok Sabha is also called House of people or Lower House, Rajya Sabha is also  \ncalled as Upper House  or Council of  states. Lok Sabha represents  people of  \nIndia as whole and Rajya Sabha represents  the States and Union territories. \uf020\n \n\uf0b7 After Rajya Sabha passes such resolution with absolute majority then resolution  \nhas to be passed  by Lok Sabha  also (Article  67).\uf020'), Document(page_content='Lok Sabha. Nicobar, Dadra  & \n\uf0b