In [1]:
from langchain.document_loaders import PyMuPDFLoader , DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA



In [3]:
def load_pdf(data_folder):
    loader = DirectoryLoader(data_folder, glob="*.pdf", loader_cls=PyMuPDFLoader)
    documents = loader.load()
    text_documents = [doc for doc in documents if doc.page_content]
    return text_documents


In [4]:
extracted_data = load_pdf("data/")

In [5]:
def text_split(text_documents):
    text_chunks = []
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    for doc in text_documents:
        chunks = text_splitter.split_documents([doc])
        text_chunks.extend(chunks)
    return text_chunks


In [6]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 1033


In [7]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [8]:
embeddings = download_hugging_face_embeddings()


  from .autonotebook import tqdm as notebook_tqdm


In [10]:
texts = [t.page_content for t in text_chunks]
faiss_index = FAISS.from_texts(texts, embeddings)

In [13]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

llm = CTransformers(
    model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
    model_type="llama",
    config={'max_new_tokens': 512, 'temperature': 0.8}
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=faiss_index.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [14]:
while True:
    user_input = input("Input Prompt: ")
    if user_input == 'Q':
        break
    result = qa({"query": user_input})
    print("Response:", result["result"])


  warn_deprecated(


Response: A queue is a data structure that stores elements in first-in-first-out (FIFO) order.
Response: A list in Python is a collection of items that can be of any data type, including other lists. Lists are mutable, meaning their elements can be modified after they are created. They are also sequences, which means they support indexing and slicing.
Response: A list and an array are similar in that they both can store collections of data. However, there are some key differences between the two. Lists are more flexible and can contain nested structures, while arrays are fixed-length and cannot be modified once created. In other words, lists are mutable, while arrays are immutable.
Response: Fibonacci is a sequence of numbers where each number is the sum of the two preceding numbers, starting from 0 and 1. The sequence begins like this: 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, ... (sequence starts at 0 and 1)
The Fibonacci sequence is a classic example of a recursive algorithm.
