In [None]:
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

## Setting the LLM

In [None]:
with open("openai_api.txt", "r") as f:
    OPENAI_API = f.read()

llm = OpenAI(
    model_name = "gpt-3.5-turbo-instruct",
    openai_api_key = OPENAI_API
)

embedding_llm = OpenAIEmbeddings(
    model = "text-embedding-ada-002",
    openai_api_key = OPENAI_API
)

## Retrieval Question/Answering

This chain help us question answering over a Knowledge Base.

Vectorstore Retriever Options:
* There are two main ways to retrieve documents relevant to a query - **Similarity Search** and **Max Marginal Relevance Search** (MMR Search). Similarity Search is the default, but you can use MMR by adding the `search_type` parameter: _docsearch.as_retriever(search_type="mmr")_

* You can also modify the search by passing specific search arguments through the retriever to the search function, using the `search_kwargs` keyword argument.
    * `k` defines how many documents are returned; defaults to 4.
    * `score_threshold` allows you to set a minimum relevance for documents returned by the retriever, if you are using the "similarity_score_threshold" search type.
    * `fetch_k` determines the amount of documents to pass to the MMR algorithm; defaults to 20.
    * `lambda_mult` controls the diversity of results returned by the MMR algorithm, with 1 being minimum diversity and 0 being maximum. Defaults to 0.5.
    * `filter` allows you to define a filter on what documents should be retrieved, based on the documents' metadata. This has no effect if the Vectorstore doesn't store any metadata.


In [None]:
## Setting the Docsearch Object

loader = TextLoader("state_of_the_union.txt")
docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)

text = text_splitter.split_documents(docs)

docsearch = Chroma.from_documents(
    documents = text,
    embedding = embedding_llm
)

In [None]:
## Creating the QA Chain

qa = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = docsearch.as_retriever()
)

In [None]:
query = "What Advengers do?"
qa.run(query)