# Literature

This notebook demonstrates:

1. Downloading two books from Project Gutenberg
2. Chunking them
3. Storing in a vector database
4. Usint the query to find a similar chunk in the vector database to form the context for an LLM call (Retrieval Augmented Generation, aka "RAG")


In [None]:
!git clone https://github.com/The-AI-Alliance/proscenium.git

In [None]:
%cd proscenium

In [None]:
!python -m pip install .

In [None]:
import os
from google.colab import userdata

api_key = userdata.get('TOGETHER_API_KEY')
os.environ['TOGETHER_API_KEY'] = api_key

In [None]:
from rich import print
from rich.panel import Panel
from rich.prompt import Prompt

import asyncio

import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"


In [None]:
model_id = "together:meta-llama/Llama-3-70b-chat-hf"

print(model_id)

# Prepare Vector Database from Document Chunks

In [None]:
import demo.domains.literature as domain
from proscenium.verbs.read import url_to_file

import nest_asyncio
nest_asyncio.apply()

for book in domain.books:
    print("Book:", book.title)
    asyncio.run(url_to_file(book.url, book.data_file))
    print("Local copy to chunk:", book.data_file)


In [None]:
from proscenium.verbs.vector_database import embedding_function

embedding_fn = embedding_function(domain.embedding_model_id)
print("Embedding model", domain.embedding_model_id)

In [None]:
from proscenium.verbs.vector_database import vector_db

milvus_uri = "file:/milvus.db"

vector_db_client = vector_db(milvus_uri, overwrite=True)
print("Vector db at uri", milvus_uri)

In [None]:
from proscenium.scripts.chunk_space import build_vector_db

collection_name = "literature_chunks"

build_vector_db([book.data_file for book in domain.books], vector_db_client, embedding_fn, collection_name)

# vector_db_client.close()

# Answer User Question

In [None]:
question = "What did Hermes say to Prometheus about giving fire to humans?"

In [None]:
from proscenium.scripts.rag import answer_question

answer = answer_question(
    question, domain.model_id, vector_db_client, embedding_fn, collection_name, True
)

print(Panel(answer, title="Assistant"))
