In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [2]:
PINECONE_API_KEY = "YOUR_OPENAI_KEY"
PINECONE_API_ENV = "YOUR_ENV"

In [3]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [5]:
extracted_data = load_pdf("Data/")

In [6]:
len(extracted_data)

10794

In [7]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [8]:
text_chunks = text_split(extracted_data)
len(text_chunks)

101380

In [9]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [None]:
%pip install sentence-transformers

Note: you may need to restart the kernel to use updated packages.


In [10]:
embeddings = download_hugging_face_embeddings()
embeddings

  from .autonotebook import tqdm as notebook_tqdm


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [11]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [None]:
%pip install langchain-pinecone

Collecting langchain-pinecone
  Downloading langchain_pinecone-0.0.3-py3-none-any.whl.metadata (1.4 kB)
Collecting pinecone-client<4,>=3 (from langchain-pinecone)
  Using cached pinecone_client-3.1.0-py3-none-any.whl.metadata (14 kB)
Downloading langchain_pinecone-0.0.3-py3-none-any.whl (8.3 kB)
Using cached pinecone_client-3.1.0-py3-none-any.whl (210 kB)
Installing collected packages: pinecone-client, langchain-pinecone
Successfully installed langchain-pinecone-0.0.3 pinecone-client-3.1.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
from langchain_pinecone import Pinecone
import os
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
index_name = "test"

vector_database_index = Pinecone.from_documents(
                                            index_name = index_name,
                                            documents = text_chunks,
                                            embedding = embeddings)

 

In [14]:
#If we already have an index we can load it like this
docsearch=Pinecone.from_existing_index(index_name, embeddings)

query = "Suggest me a Smartphone"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content="thanks you daraz realme C53 (6/128) If you are looking for Smart Phones, I have the perfect suggestion for you. \nIt's realme C53 (6/128), a product that has received Positive feedback \nfrom other customers. This product belongs to the Smart Phones category, \nand it has a Positive reputation. It can make you feel Love when you use \nit. You can find it in Daraz, where you can also read more reviews and \nratings.\nĺকনার  আেগ ĺদেখ আসুন  িভিডও।। সাচ ক˙ন  PRB786 িলেখ youtube,facebook,", metadata={'page': 12.0, 'source': 'Data\\data.pdf'}), Document(page_content="Mobile Phone If you are looking for Smart Phones, I have the perfect suggestion for you. \nIt's Symphony Z45 Smartphone Mobile Phone, a product that has received \nPositive feedback from other customers. This product belongs to the \nSmart Phones category, and it has a Positive reputation. It can make you \nfeel Love when you use it. You can find it in Daraz, where you can also \nread more reviews 

In [15]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [16]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [None]:
%pip install CTransformers

Note: you may need to restart the kernel to use updated packages.


In [23]:
llm=CTransformers(model="Model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':1024,
                          'temperature':0.8})

In [25]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [None]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])