## Imports

In [None]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from huggingface_hub import hf_hub_download
import textwrap
import glob

## Constants

In [None]:
HUGGING_FACE_API_KEY="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"

In [None]:
'openai-gpt'
'stabilityai/stablelm-tuned-alpha-3b'
'facebook/mbart-large-50'
'google/flan-t5-xl'

In [None]:
model = HuggingFaceHub(
    repo_id='google/flan-t5-large',
    model_kwargs={
        "temperature": 0.1,
        "max_length": 1024,
    },
    huggingfacehub_api_token=HUGGING_FACE_API_KEY
)


### Testing the model

In [None]:
template = """Question : {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=['question'])

In [None]:
test_chain = LLMChain(prompt=prompt, llm=model)

question = "Where is Cameroon found?"

print(test_chain.run(question))

## Loading, splitting the PDF

In [None]:
loader = PyPDFLoader("../data/12_rules.pdf")
pages = loader.load_and_split()
splitter = RecursiveCharacterTextSplitter(chunk_size = 700, chunk_overlap = 0)
texts = splitter.split_documents(pages)

In [None]:
texts[:5]

In [None]:
# print(pages[200].page_content)

## Embedding and similarity search

In [None]:
hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
faiss_index = Chroma.from_documents(texts, hf_embeddings)
print('Done')

In [None]:
results = faiss_index.similarity_search("What is chaos?", k=2)

for result in results:
    print(str(result.metadata["page"]) + ":", result.page_content+"\n")

## Question answering

In [None]:
chain = load_qa_with_sources_chain(model, chain_type='refine')
query = "What is chaos?"
documents = faiss_index.similarity_search(query)
result = chain({"input_documents": documents, "question": query})
result

In [None]:
prompt_template = """{question}"""

llm_chain = LLMChain(llm=model, prompt=PromptTemplate.from_template(prompt_template))

In [None]:
llm_chain.predict(question="What is a good name for a boy that starts with B?")

In [None]:
from langchain.chains import RetrievalQA
chain = RetrievalQA.from_chain_type(llm=model,
chain_type="stuff",
retriever=faiss_index.as_retriever(),
input_key="question")

chain.run('Who are the authors of GPT4all technical report?')

In [None]:
qa2 = ConversationalRetrievalChain.from_llm(model,
                                    faiss_index.as_retriever(search_kwargs={"k": 3}), return_source_documents=True)
chat_history = []
query = "You are my assistant. Answer the question with as much accuracy as possible given the context of the book extracts. What are the 12 rules of life? Answer in english"
result = qa2({"question": query, "chat_history": chat_history})
result