In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Pinecone as PC
from pinecone import Pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers


  from tqdm.autonotebook import tqdm


In [2]:
PINECONE_API_KEY="8273f958-c1a5-4497-8f8c-99fff6730a73"
PINECONE_API_ENV="us-east-1-aws"

In [3]:
def load_pdf(data):
    loader=DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    document=loader.load()
    return document

In [4]:
extracted_data=load_pdf("data/")

Create text chunks

In [5]:
def text_split(extracted_data):
    text_spliter=RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
    text_chunks=text_spliter.split_documents(extracted_data)
    return text_chunks

In [6]:
text_chunks=text_split(extracted_data)
print("Length of my chunks: ",len(text_chunks))

Length of my chunks:  5460


In [7]:
# Download embedding model
def download_huggingface_embedding():
    embedding = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding

In [8]:
embedding=download_huggingface_embedding()



In [9]:
from pinecone.grpc import PineconeGRPC
pc = PineconeGRPC(api_key=PINECONE_API_KEY)

pc.list_indexes()

{'indexes': [{'deletion_protection': 'disabled',
              'dimension': 384,
              'host': 'maths-chatbot-8m5l21j.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'maths-chatbot',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [13]:
import os
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

In [11]:
# #Initializing the Pinecone
# # pinecone.init(api_key=PINECONE_API_KEY,
# #               environment=PINECONE_API_ENV)
# pc=Pinecone(api_key=PINECONE_API_KEY)
# index_name="maths-chatbot"
# docsearch=PC.from_texts([t.page_content for t in text_chunks], embedding, index_name=index_name)

In [14]:
index_name="maths-chatbot"
docsearch=PC.from_existing_index(index_name, embedding)
query="What is machine learning"
result=docsearch.similarity_search(query, k=3)
print("result: ", result)


result:  [Document(metadata={}, page_content='As mentioned in Chapter 1, there are two different senses in which we\nuse the phrase “machine learning algorithm”: training and prediction. We'), Document(metadata={}, page_content='there are three major components of a machine learning system: data,\nmodels, and learning. The main question of machine learning is “What do'), Document(metadata={}, page_content='to figure out the underlying process that explains the observations. In this\nsense, machine learning is close to statistics in its goals to construct a')]


In [15]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [16]:
prompt=PromptTemplate(template=prompt_template, input_variables=["context","question"])
chain_type_kwargs={"prompt":prompt}

In [17]:
llm=CTransformers(model="model/llama-2-7b.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':600,
                          'temperature':0.8})

In [18]:
qa=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [19]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response: ", result["result"])

  result=qa({"query": user_input})


Response:  Ronaldo is a famous soccer player that plays for Juventus.




KeyboardInterrupt: 