## Prepare environment

Open a terminal and run the following commands:
```
mkdir data
```

```
curl https://www.gutenberg.org/cache/epub/11/pg11.txt > data/001-alice/alice.txt
```

In [None]:
!pip install python-dotenv

In [None]:
from dotenv import load_dotenv
load_dotenv()

## Load data

In [None]:
!pip install langchain
!pip install unstructured

In [None]:
from langchain.document_loaders import DirectoryLoader

def load_docs(directory):
  loader = DirectoryLoader(directory)
  documents = loader.load()
  return documents

In [None]:
dirToLoad = '../data/001-alice'
documents = load_docs(dirToLoad)
len(documents)

## Split text

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  texts = text_splitter.split_documents(documents)
  return texts


In [None]:
texts = split_docs(documents)
print(len(texts))

## Get embeddings

In [None]:
!pip install openai

In [None]:
import os
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])

## Init ChromaDB

In [None]:
!pip install chromadb

In [None]:
from langchain.vectorstores import Chroma

vectordb = Chroma.from_documents(texts, embeddings)

## Create the chain

In [None]:
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA

qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)

In [None]:
query = "What did Alice found in the little glass box?"
qa.run(query)