In [4]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.llms import CTransformers
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate

  from tqdm.autonotebook import tqdm


In [8]:
def load_data(data):
    loader= DirectoryLoader(data, glob='*.pdf', loader_cls=PyPDFLoader)
    documents= loader.load()
    return documents

extracted_data= load_data('Data/')    

In [9]:
def text_split(extracted_data):
    text_splitter= RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks= text_splitter.split_documents(extracted_data)
    return text_chunks

In [10]:
text_chunks= text_split(extracted_data)
print(len(text_chunks))

4570


In [14]:
def get_embeddings():
    embeddings= HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [15]:
embeddings= get_embeddings()
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [21]:
import os
os.environ['PINECONE_API_KEY']='14d9011c-b004-45ef-b6c0-431ba120a7c4'

index_name='bot'

In [23]:
docsearch= PineconeVectorStore.from_documents(text_chunks, embedding=embeddings, index_name=index_name)

In [24]:
doc_search= PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embeddings)

In [26]:
query='What is an Operating System?'
sim_search= doc_search.similarity_search(query)

In [27]:
sim_search[0]

Document(page_content='at the history and general characteristics of the two operating systems that serve as\nexamples throughout this book. All of the material in this chapter is covered in\ngreater depth later in the book.\n2.1 OPERATING SYSTEM OBJECTIVES AND FUNCTIONS\nAn OS is a program that controls the execution of application programs and acts as\nan interface between applications and the computer hardware. It can be thought of\nas having three objectives:\n•Convenience: An OS makes a computer more convenient to use.', metadata={'page': 45.0, 'source': 'Data\\Operating Systems_William Stalling.pdf'})

In [46]:
prompt_template='''
Answer the question asked by the user below. If the question is not answerable reply with 'I do not know the answer', don't try to make up an answer.
Context: {context}
Question: {question}
Provide only relevant, helpful, harmless and honest answers.
Useful answer:
'''

In [47]:
PROMPT= PromptTemplate(template=prompt_template, input_variables=["context","question"])
chain_t_kwargs={"prompt":PROMPT}

In [48]:
llm= CTransformers(model="model\llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama", config={'max_new_tokens':512, 'temperature':0.8})

In [49]:

questionanswer= RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(search_kwargs={'k':2}), return_source_documents=True, chain_type_kwargs=chain_t_kwargs)

In [52]:
user_input="What is a memory module?"
result=questionanswer.invoke({"query": user_input})
print("Response : ", result["result"])

Response :  A memory module is a component that stores data in the form of bit patterns. It consists of a set of locations defined by sequentially numbered addresses, and each location can hold either an instruction or data. The I/O module acts as a bridge between external devices, the processor, and memory, transferring data between them.
