# 01. Indexes
In this notebook we will cover index, their creation, usage and maintainance.

In [None]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
import langchain as lc

import warnings
warnings.filterwarnings('ignore')

load_dotenv(find_dotenv())
openai.api_key = os.environ["OPENAI_API_KEY"]

#### 1.1 Loaders
To use our own dataset with LLM, we have to first load them into a vector database. 

In [None]:
# Let assume we have some FAQ documents. and we want to use them
# when someone query to AI.
loader = lc.document_loaders.DirectoryLoader( "./FAQ",
                                             glob="**/*.txt",
                                             loader_cls=lc.document_loaders.TextLoader,
                                             show_progress=True)
docs = loader.load() # It should load 3 files.

#### 1.2 Text Splitter
Now we have to create the chunk of text. So that we have enough data.

In [None]:
text_splitter = lc.text_splitter.RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100)

documents = text_splitter.split_documents(docs)
documents

#### 1.3 Embeddings
Now time to convert our text into OpenAI embedding or any LLM embedding.

In [None]:
embeddings = lc.embeddings.OpenAIEmbeddings()

#### 1.4 Loading Text Embedding (Vectors) into VectorDB using (FAISS)

In [None]:
!pip install faiss-cpu

In [None]:
import pickle

vectorstore = lc.vectorstores.FAISS.from_documents(documents, embeddings)

with open("vectorstore.pkl", "wb") as f:
    pickle.dump(vectorstore, f)

#### 1.5 Loading the Database

In [None]:
# Although it already loaded.

with open("vectorstore.pkl", "rb") as f:
    vectorstore = pickle.load(f)

#### 1.6 Prompts
We can define the starting point of conversation as well.

In [None]:
prompt_template = """You are a helpful assistant for our restaurant.

{context}

Question: {question}
Answer here:"""
PROMPT = lc.PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

#### 1.6 Chains
With chains we can manipulate the I/O of the LLM

In [None]:
llm = lc.OpenAI()
qa = lc.chains.RetrievalQA.from_chain_type(llm=llm,
                                           chain_type='stuff',
                                           retriever=vectorstore.as_retriever(),
                                           chain_type_kwargs={"prompt":PROMPT})
query = "When does the restaurant open?"
qa.run(query)

#### 1.7 Memory
In the example just shown, each request stands alone. A great strength of an LLM, however, is that it can take the entire chat history into account when responding. For this, however, a chat history must be built up from the different questions and answers. With different memory classes this is very easy in Langchain.

In [None]:
memory = lc.memory.ConversationBufferMemory(
    memory_key='chat_history',
    return_messages=True,
    output_key="answer")

#### 1.8. Use Memory in Chains
The memory class can now easily be used in a chain. This is recognizable, for example, by the fact that when one speaks of "it", the bot understands the rabbit in this context.

In [None]:
# We cannot use memory with all type of chains. So we are using
# ConversationalRetrievalChain here for memory

qa = lc.chains.ConversationalRetrievalChain.from_llm(
    llm=llm,
    memory=memory,
    retriever=vectorstore.as_retriever(),
    combine_docs_chain_kwargs={"prompt": PROMPT},
)


query = "Do you offer vegan food?"
qa({"question": query})
qa({"question": "How much does it cost?"})